decide_restart_or_failover() 调用 restart_service() 函数,以尝试在同一节点上重新启动数据服务。 该函数执行以下逻辑:
确定数据服务是否仍在 PMF 之下注册。如果服务仍处于注册状态,函数将执行以下操作:
为数据服务获取 Stop 方法名称和 Stop_timeout 值
使用 hatimerun 为数据服务启动 Stop 方法,传递 Stop_timeout 值
如果数据服务成功停止,则为数据服务获取 Start 方法名称和 Start_timeout 值
使用 hatimerun 为数据服务启动 Start 方法,传递 Start_timeout 值
如果数据服务已不在 PMF 之下注册,则表示数据服务已超过了 PMF 下允许的最大重试次数。系统将使用 GIVEOVER 选项调用 scha_control() 函数,以将数据服务故障转移到其他节点。
function restart_service
{
# To restart the data service, first verify that the
# data service itself is still registered under PMF.
pmfadm -q $PMF_TAG
if [[ $? -eq 0 ]]; then
# Since the TAG for the data service is still registered under
# PMF, first stop the data service and start it back up again.
# Obtain the Stop method name and the STOP_TIMEOUT value for
# this resource.
STOP_TIMEOUT=`scha_resource_get -O STOP_TIMEOUT \
-R $RESOURCE_NAME -G $RESOURCEGROUP_NAM?
STOP_METHOD=`scha_resource_get -O STOP \
-R $RESOURCE_NAME -G $RESOURCEGROUP_NAM?
hatimerun -t $STOP_TIMEOUT $RT_BASEDIR/$STOP_METHOD \
-R $RESOURCE_NAME -G $RESOURCEGROUP_NAME \
-T $RESOURCETYPE_NAME
if [[ $? -ne 0 ]]; then
logger-p ${SYSLOG_FACILITY}.err -t [$SYSLOG_TAG] \
“${ARGV0} Stop method failed.”
return 1
fi
# Obtain the START method name and the START_TIMEOUT value for
# this resource.
START_TIMEOUT=`scha_resource_get -O START_TIMEOUT \
-R $RESOURCE_NAME -G $RESOURCEGROUP_NAM?
START_METHOD=`scha_resource_get -O START \
-R $RESOURCE_NAME -G $RESOURCEGROUP_NAM?
hatimerun -t $START_TIMEOUT $RT_BASEDIR/$START_METHOD \
-R $RESOURCE_NAME -G $RESOURCEGROUP_NAME \
-T $RESOURCETYPE_NAME
if [[ $? -ne 0 ]]; then
logger-p ${SYSLOG_FACILITY}.err -t [$SYSLOG_TAG] \
“${ARGV0} Start method failed.”
return 1
fi
else
# The absence of the TAG for the dataservice
# implies that the data service has already
# exceeded the maximum retries allowed under PMF.
# Therefore, do not attempt to restart the
# data service again, but try to failover
# to another node in the cluster.
scha_control -O GIVEOVER -G $RESOURCEGROUP_NAME \
-R $RESOURCE_NAME
fi
return 0
}