Sun Cluster Data Service for Sun Grid Engine Guide for Solaris OS

Appendix A Files for Configuring and Removing Sun Cluster HA for Sun Grid Engine Resources

The /opt/SUNWscsge/util directory contains files that automate the process of configuring and removing Sun Cluster HA for Sun Grid Engine resources. Listings of these files are provided in the following sections:

Listing of sge_config

#
# Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
# 
# This file will be sourced in by sge_register and the parameters
# listed below will be used.
#
# These parameters can be customized in (key=value) form
#
#	COMMDRS    - name of the resource for the sge_commd daemon
#                    This is only needed for SGE 5.3.
#	QMASTERRS  - name of the resource for the sge_qmaster daemon
#                    This is always needed.
#	SCHEDDRS   - name of the resource for the sge_schedd daemon
#                    This is always needed.
#	MASTERRG   - name of the resource group containing the resources
#	             for sge_commd (5.3) sge_qmaster and sge_schedd
#                    This is always needed.
#	MASTERPORT - name of any port number, as it's ignored
#	MASTERLH   - name of the LogicalHostname SC resource within MASTERRG
#                    This is always needed.
#	SGE_ROOT   - SGE_ROOT of this Sun GridEngine Installation
#	SGE_CELL   - SGE_CELL of this Sun GridEngine Installation
#	SGE_VER    - Version of this Sun GridEngine Installation
#                    This can be either 5.3 or 6.0 .
#

COMMDRS=
QMASTERRS=
SCHEDDRS=
MASTERRG=
MASTERPORT=536
MASTERLH=
SGE_ROOT=
SGE_CELL=
SGE_VER=

Listing of sge_register

#
# Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

. `dirname $0`/sge_config

GDSTYPE=SUNW.gds
GDSINSTALLED=`/usr/cluster/bin/scha_cluster_get -O ALL_RESOURCETYPES | grep "^${GDSTYPE}"`

if [ "${GDSINSTALLED}" = "" ]
then
	/usr/cluster/bin/scrgadm -a -t ${GDSTYPE}
	if [ $? -ne 0 ]
	then
		echo "Error: Unable to register resource type ${GDSTYPE}, please check!"
		exit 1
	fi
fi

if [ "${SGE_VER}" = "5.3" ]; then
	# Disable SGE launch at boot if same exists.
	# A marker is left to restore boot launch
	# if SGE cluster agents are removed with:
	# sge_remove.
	if [ -L /etc/rc2.d/S95rcsge ]; then
		touch $SGE_ROOT/sge_boot_launch_disabled
		rm /etc/rc2.d/S95rcsge
	fi

	# Register resource for sge_commd
	/usr/cluster/bin/scrgadm -a -j ${COMMDRS} -g ${MASTERRG} -t ${GDSTYPE} \
-x Start_command="/opt/SUNWscsge/bin/sge_commd/start_sge_commd \
-R ${COMMDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Stop_command="/opt/SUNWscsge/bin/sge_commd/stop_sge_commd \
-R ${COMMDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Probe_command="/opt/SUNWscsge/bin/sge_commd/probe_sge_commd \
-R ${COMMDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-y Port_list=${PORT}/tcp -y Network_resources_used=${MASTERLH} \
-x Stop_signal=9 \
-y retry_count=5 -y retry_interval=300
	St=$?
	if [ "${St}" -ne 0 ]; then
		echo "Error: Registration of resource ${COMMDRS} failed, \
please correct the wrong parameters"
		exit 1
	else
		echo "Registration of resource ${COMMDRS} succeeded"
	fi

	# Register resource for sge_qmaster
	/usr/cluster/bin/scrgadm -a -j ${QMASTERRS} -g ${MASTERRG} -t ${GDSTYPE} \
-x Start_command="/opt/SUNWscsge/bin/sge_qmaster/start_sge_qmaster \
-R ${QMASTERRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Stop_command="/opt/SUNWscsge/bin/sge_qmaster/stop_sge_qmaster \
-R ${QMASTERRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Probe_command="/opt/SUNWscsge/bin/sge_qmaster/probe_sge_qmaster \
-R ${QMASTERRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-y Port_list=${PORT}/tcp -y Network_resources_used=${MASTERLH} \
-x Stop_signal=9 \
-x probe_timeout=90 -y Thorough_probe_interval=120 \
-y retry_count=2 -y retry_interval=900 \
-y Resource_dependencies=${COMMDRS}
	St=$?
	if [ "${St}" -ne 0 ]; then
		echo "Error: Registration of resource ${QMASTERRS} failed, \
please correct the wrong parameters"
		exit 1
	else
		echo "Registration of resource ${QMASTERRS} succeeded"
	fi

	# Register resource for sge_schedd
	/usr/cluster/bin/scrgadm -a -j ${SCHEDDRS} -g ${MASTERRG} -t ${GDSTYPE} \
-x Start_command="/opt/SUNWscsge/bin/sge_schedd/start_sge_schedd \
-R ${SCHEDDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Stop_command="/opt/SUNWscsge/bin/sge_schedd/stop_sge_schedd \
-R ${SCHEDDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Probe_command="/opt/SUNWscsge/bin/sge_schedd/probe_sge_schedd \
-R ${SCHEDDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-y Port_list=${PORT}/tcp -y Network_resources_used=${MASTERLH} \
-x Stop_signal=9 \
-x probe_timeout=90 -y Thorough_probe_interval=120 \
-y retry_count=2 -y retry_interval=900 \
-y Resource_dependencies=${QMASTERRS}
	St=$?
	if [ "${St}" -ne 0 ]; then
		echo "Error: Registration of resource ${SCHEDDRS} failed, \
please correct the wrong parameters"
		exit 1
	else
		echo "Registration of resource ${SCHEDDRS} succeeded"
	fi
elif [ "${SGE_VER}" = "6.0" ]; then
	# Register resource for sge_qmaster
	/usr/cluster/bin/scrgadm -a -j ${QMASTERRS} -g ${MASTERRG} -t ${GDSTYPE} \
-x Start_command="/opt/SUNWscsge/bin/sge_qmaster6/start_sge_qmaster \
-R ${QMASTERRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Stop_command="/opt/SUNWscsge/bin/sge_qmaster6/stop_sge_qmaster \
-R ${QMASTERRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Probe_command="/opt/SUNWscsge/bin/sge_qmaster6/probe_sge_qmaster \
-R ${QMASTERRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-y Port_list=${MASTERPORT}/tcp -y Network_resources_used=${MASTERLH} \
-x Stop_signal=9 \
-x probe_timeout=90 -y Thorough_probe_interval=120 \
-y retry_count=2 -y retry_interval=900
	St=$?
	if [ "${St}" -ne 0 ]; then
		echo "Error: Registration of resource ${QMASTERRS} failed, \
please correct the wrong parameters"
		exit 1
	else
		echo "Registration of resource ${QMASTERRS} succeeded"
	fi

	# Register resource for sge_schedd
	/usr/cluster/bin/scrgadm -a -j ${SCHEDDRS} -g ${MASTERRG} -t ${GDSTYPE} \
-x Start_command="/opt/SUNWscsge/bin/sge_schedd6/start_sge_schedd \
-R ${SCHEDDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Stop_command="/opt/SUNWscsge/bin/sge_schedd6/stop_sge_schedd \
-R ${SCHEDDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-x Probe_command="/opt/SUNWscsge/bin/sge_schedd6/probe_sge_schedd \
-R ${SCHEDDRS} -G ${MASTERRG} -S ${SGE_ROOT} -C ${SGE_CELL}" \
-y Port_list=${MASTERPORT}/tcp -y Network_resources_used=${MASTERLH} \
-x Stop_signal=9 \
-x probe_timeout=90 -y Thorough_probe_interval=120 \
-y retry_count=2 -y retry_interval=900 \
-y Resource_dependencies=${QMASTERRS}
	St=$?
	if [ "${St}" -ne 0 ]; then
		echo "Error: Registration of resource ${SCHEDDRS} failed, \
please correct the wrong parameters"
		exit 1
	else
		echo "Registration of resource ${SCHEDDRS} succeeded"
	fi
else
	echo "Fatal: Please set variable SGE_VER properly in `dirname $0`/sge_config!"
	exit 1
fi

Listing of sge_remove

#
# Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

. `dirname $0`/sge_config

/usr/cluster/bin/scswitch -n -j ${SCHEDDRS}
/usr/cluster/bin/scswitch -n -j ${QMASTERRS}
if [ "${SGE_VER}" = "5.3" ]; then
	/usr/cluster/bin/scswitch -n -j ${COMMDRS}
fi

/usr/cluster/bin/scrgadm -r -j  ${SCHEDDRS}
/usr/cluster/bin/scrgadm -r -j  ${QMASTERRS}
if [ "${SGE_VER}" = "5.3" ]; then
	/usr/cluster/bin/scrgadm -r -j ${COMMDRS}

	# SGE launch at boot re-enabled, and
	# run-level script link re-established.
	if [ -e ${SGE_ROOT}/sge_boot_launch_disabled && -e /etc/init.d/rcsge ]; then
		rm ${SGE_ROOT}/sge_boot_launch_disabled
		ln -s /etc/init.d/rcsge /etc/rc2.d/S95rcsge
		chmod 111 /etc/rc2.d/S95rcsge
	fi
fi