Script File To Upgrade Cluster and Node Pool

Use the script file in Oracle WebLogic Server for OKE to upgrade cluster and node pools. You can upgrade either the cluster or the node pool, or both.

Note:

Ensure that you stop the domain before upgrading the nodepool, which contains the domain pods running inside it. Run the following command to stop the domain:
/u01/scripts/wls-domain-lifecycle/stopDomain.sh -d <domain-name> -n <domain-name>-ns
After you upgrade the nodepool, start the domain. Run the following command to start the domain:
/u01/scripts/wls-domain-lifecycle/startDomain.sh -d <domain-name> -n <domain-name>-ns

Copy the following script in a file named, upgrade_cluster.py, and then run the script on the administration instance. See Upgrade Cluster and Node Pool Using Script.

#
# Copyright (c) 2023, Oracle Corporation and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
#

import oci
import sys
import re
sys.path.append('/u01/scripts')
from metadata import databag

'''
A python class for upgrading kubernetes cluster and the node pools along with the nodes to a given kubernetes version.

Note:
 - User is responsible to provide correct target Kubernetes version to upgrade.

Prerequisites:
 - Requires python OCI SDK 2.90 or above. To install python OCI SDK, run the following command as 'root' user in the 
   WebLogic for OKE admin host
   python3 -m pip install oci==2.90

Description:
 - Recursively upgrades the kubernetes cluster till the target version (to be provided by user) is reached 
   (stops at the previous version if the target version is not available; does not rollback to original version)
 - For each node pool ...
    -- Get the nodes and delete them
    -- Upgrade the node pool to target kubernetes version
    -- Create the nodes (the same number) in the node pool  
 This script can be used to only upgrade the cluster or only upgrade the nodepool. Caution should be exercised when 
 providing the correct kubernetes target version. Current targets supported are 1.24.x, 1.25.x and 1.26.x
'''

class UpgradeCluster():

    def __init__(self, target_k8s_version):
        self.k8s_version = target_k8s_version
        self.cluster_id = databag.get_oke_cluster_id()
        principal = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
        self.ce_client = oci.container_engine.ContainerEngineClient(config={},signer=principal)
        self.ce_client_ops = oci.container_engine.ContainerEngineClientCompositeOperations(self.ce_client)
        self.upgrade_complete = False

    def upgrade_cluster(self):
        if self.upgrade_complete:
            print ("OKE cluster upgrade completed.")
            return

        print ("Getting cluster information ...")
        response = self.ce_client.get_cluster(self.cluster_id)

        if response.status == 200:
            cluster = response.data
            available_k8s_upgrades = cluster.available_kubernetes_upgrades

            if len(available_k8s_upgrades) == 0:
                print ("The kubernetes cluster is already at the highest available version")
                self.upgrade_complete = True
                return

            upgrade_version = available_k8s_upgrades[0]
            print ("Upgrading cluster to version %s" % upgrade_version)
            if not self.check_versions(upgrade_version):
                print ("The version %s is not available for upgrade for this cluster." % self.k8s_version)
                sys.exit(1)

            # Upgrade the cluster
            update_cluster_details = oci.container_engine.models.UpdateClusterDetails(
                kubernetes_version = upgrade_version)
            update_cluster_response = self.ce_client_ops.update_cluster_and_wait_for_state (
                self.cluster_id, update_cluster_details,
                wait_for_states=[oci.container_engine.models.WorkRequest.STATUS_SUCCEEDED,
                                 oci.container_engine.models.WorkRequest.STATUS_FAILED])

            if update_cluster_response.status == oci.container_engine.models.WorkRequest.STATUS_FAILED:
                print ("Failed to upgrade cluster. %s" % update_cluster_response.data)
                self.upgrade_complete = True
                sys.exit(1)

            print ("Upgraded cluster to version %s" % upgrade_version)
            if upgrade_version == self.k8s_version:
                self.upgrade_complete = True

            self.upgrade_cluster()
        else:
            print ("Failed to get the kubernetes cluster details. Response [%s] " % response.status)
            sys.exit(1)

    def check_versions(self, available_version):
        # The version will be of the format v<major>.<minor>.<patch>. We should check if <major>.<minor> matches the
        # target version (or less than target in which case we will upgrade and get into next upgrade iteration).
        # If <major>.<minor> match with the target version, then compare the available and target version as strings.
        available_version_split_list = available_version.split('.')
        target_version_split_list = self.k8s_version.split('.')

        # the major version should be less than or equal to the target
        major_version_available_str = available_version_split_list[0]
        major_version_desired_str = target_version_split_list[0]
        if  major_version_available_str != major_version_desired_str:
            major_version_available_int = major_version_available_str[1:]
            major_version_desired_int = major_version_desired_str[1:]

            # available > desired. Skip the upgrade
            if major_version_available_int > major_version_desired_int:
                return False

        # Check the minor versions
        # If the available version is less than target, upgrade to the lower version. Cluster upgrade is one version
        # at a time.
        if available_version_split_list[1] < target_version_split_list[1]:
            return True
        # Attempt to upgrade to a lower cluster version. Reject.
        elif available_version_split_list[1] > target_version_split_list[1]:
            return False
        # Minor versions match. Check the patch version
        elif available_version_split_list[1] == target_version_split_list[1]:
            if available_version_split_list[2] != target_version_split_list[2]:
                return False
            else:
                return True

    # Upgrade all the node pools  to the target k8s version. This should be done after the cluster upgrade.
    def upgrade_nodepools(self):
        print ("Upgrading node pools ...")
        # Get the list of nodepools
        comp_id = self.ce_client.get_cluster(self.cluster_id).data.compartment_id
        node_pools_list = self.ce_client.list_node_pools(comp_id).data

        for node_pool_summary in node_pools_list:
            # check if the node pool kubernetes version is target version, if yes, skip update
            if node_pool_summary.kubernetes_version == self.k8s_version or node_pool_summary.cluster_id != self.cluster_id:
                continue

            # Delete the nodes in the nodepool. Update Node Pool will create new nodes with the updated k8s version
            node_pool = self.ce_client.get_node_pool(node_pool_summary.id).data
            for node in node_pool.nodes:
                print ("Deleting node %s from node pool %s" % (node.name, node_pool.name))
                if node.lifecycle_state == oci.container_engine.models.Node.LIFECYCLE_STATE_ACTIVE:
                    response = self.ce_client_ops.delete_node_and_wait_for_state (
                        node_pool.id, node.id,
                        wait_for_states=[oci.container_engine.models.WorkRequest.STATUS_SUCCEEDED,
                                         oci.container_engine.models.WorkRequest.STATUS_FAILED])

                    if response.status == oci.container_engine.models.WorkRequest.STATUS_FAILED:
                        print ("Failed to delete nodes in the node pool %s. Continuing with the upgrade..." % node_pool.name)

            # Make NodeConfigDetails object
            node_config_details = oci.container_engine.models.UpdateNodePoolNodeConfigDetails(
                size = node_pool_summary.node_config_details.size
            )
            # Make NodeSourceDetails object
            node_source_details = oci.container_engine.models.NodeSourceViaImageDetails(
                source_type = oci.container_engine.models.NodeSourceDetails.SOURCE_TYPE_IMAGE,
                image_id = node_pool_summary.node_image_id
            )
            # Make the UpdateNodePoolDetails
            update_nodepool_details = oci.container_engine.models.UpdateNodePoolDetails(
                kubernetes_version = self.k8s_version,
                node_config_details = node_config_details,
                node_source_details = node_source_details
            )
            print ("Upgrading kuberenetes for node pool %s " % node_pool.name)
            response = self.ce_client_ops.update_node_pool_and_wait_for_state (
                node_pool_summary.id, update_nodepool_details,
                wait_for_states=[oci.container_engine.models.WorkRequest.STATUS_SUCCEEDED,
                                 oci.container_engine.models.WorkRequest.STATUS_FAILED])

            if response.status == oci.container_engine.models.WorkRequest.STATUS_FAILED:
                print ("Failed to upgrade node pool %s" % node_pool.name)
                sys.exit(1)

        print ("Successfully upgraded node pools  in cluster %s" % self.ce_client.get_cluster(self.cluster_id).data.name)
        return


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python3 upgrade_cluster.py <target_k8s_version> [<cluster, nodepool>]")
        sys.exit(1)

    k8s_version = sys.argv[1]
    if not re.search("^(v1.)2[4-6]\.{1}[0-9]{1,5}$", k8s_version):
        print ("Invalid/Unsupported kubernetes version provided for upgrade. Supported versions are "
               "[v1.24.1, v1.25.4, v1.26.2].")
        sys.exit(1)

    upgrade = UpgradeCluster(k8s_version)

    # If option 'cluster' is provided, just upgrade the cluster, if 'nodepool' is provided, upgrade only the nodepool
    if len(sys.argv) > 2:
        component = sys.argv[2]
        if component.lower() == 'cluster':
            upgrade.upgrade_cluster()
        elif component.lower() == 'nodepool':
            upgrade.upgrade_nodepools()
        else:
            print ("unrecognized parameter %s. Provide one of [cluster, nodepool]." % component)
            sys.exit(1)

    # If second argument is not provided, upgrade both cluster and nodepool.
    upgrade.upgrade_cluster()
    upgrade.upgrade_nodepools()