resources/OCF/drbd

13 months ago

author
Dejan Muhamedagic <dejan@hello-penguin.com>
date
Sun Jun 07 14:42:00 2009 +0200
changeset 12422
3317a881ba21
parent 12412
2bc573423043
permissions
-rw-r--r--

Dev: RA: drbd: fix metadata

     1 #!/bin/bash
     2 #
     3 #
     4 #       OCF Resource Agent compliant drbd resource script.
     5 #
     6 # Copyright (c) 2004 - 2007 SUSE LINUX Products GmbH, Lars Marowsky-Bree
     7 #                    All Rights Reserved.
     8 #
     9 # This program is free software; you can redistribute it and/or modify
    10 # it under the terms of version 2 of the GNU General Public License as
    11 # published by the Free Software Foundation.
    12 #
    13 # This program is distributed in the hope that it would be useful, but
    14 # WITHOUT ANY WARRANTY; without even the implied warranty of
    15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    16 #
    17 # Further, this software is distributed without any warranty that it is
    18 # free of the rightful claim of any third person regarding infringement
    19 # or the like.  Any license provided herein, whether implied or
    20 # otherwise, applies only to this software file.  Patent licenses, if
    21 # any, provided herein do not apply to combinations of this program with
    22 # other software, or any other product whatsoever.
    23 #
    24 # You should have received a copy of the GNU General Public License
    25 # along with this program; if not, write the Free Software Foundation,
    26 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
    27 #
    28 #
    30 # OCF instance parameters
    31 #	OCF_RESKEY_drbd_resource
    32 #	OCF_RESKEY_drbdconf
    34 #######################################################################
    35 # Initialization:
    37 if [ -n "$OCF_DEBUG_LIBRARY" ]; then
    38 	. $OCF_DEBUG_LIBRARY
    39 else
    40 	. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
    41 fi
    43 #######################################################################
    45 meta_data() {
    46 	cat <<END
    47 <?xml version="1.0"?>
    48 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
    49 <resource-agent name="drbd">
    50 <version>1.1</version>
    52 <longdesc lang="en">
    53 Master/Slave OCF Resource Agent for DRBD
    54 </longdesc>
    56 <shortdesc lang="en">This resource agent manages a Distributed
    57 Replicated Block Device (DRBD) object as a master/slave
    58 resource. DRBD is a mechanism for replicating storage; please see the
    59 documentation for setup details.</shortdesc>
    61 <parameters>
    62 <parameter name="drbd_resource" unique="1" required="1">
    63 <longdesc lang="en">
    64 The name of the drbd resource from the drbd.conf file.
    65 </longdesc>
    66 <shortdesc lang="en">drbd resource name</shortdesc>
    67 <content type="string" default="drbd0" />
    68 </parameter>
    70 <parameter name="drbdconf">
    71 <longdesc lang="en">
    72 Full path to the drbd.conf file.
    73 </longdesc>
    74 <shortdesc lang="en">Path to drbd.conf</shortdesc>
    75 <content type="string" default="/etc/drbd.conf"/>
    76 </parameter>
    78 <parameter name="clone_overrides_hostname">
    79 <longdesc lang="en">
    80 Whether or not to override the hostname with the clone number. This can
    81 be used to create floating peer configurations; drbd will be told to
    82 use node_&#60;cloneno&#62; as the hostname instead of the real uname,
    83 which can then be used in drbd.conf.
    84 </longdesc>
    85 <shortdesc lang="en">Override drbd hostname</shortdesc>
    86 <content type="boolean" default="no"/>
    87 </parameter>
    88 </parameters>
    90 <actions>
    91 <action name="start"   timeout="240" />
    92 <action name="promote"   timeout="90" />
    93 <action name="demote"   timeout="90" />
    94 <action name="notify"   timeout="90" />
    95 <action name="stop"    timeout="100" />
    96 <action name="monitor" depth="0"  timeout="20" interval="20" start-delay="0" role="Slave" />
    97 <action name="monitor" depth="0"  timeout="20" interval="10" start-delay="0" role="Master" />
    98 <action name="meta-data"  timeout="5" />
    99 <action name="validate-all"  timeout="30" />
   100 </actions>
   101 </resource-agent>
   102 END
   104 	exit $OCF_SUCCESS
   105 }
   107 do_cmd() {
   108 	local cmd="$*"
   109 	ocf_log debug "$RESOURCE: Calling $cmd"
   110 	local cmd_out
   111 	cmd_out=$($cmd 2>&1)
   112 	ret=$?
   114 	if [ $ret -ne 0 ]; then
   115 		ocf_log err "$RESOURCE: Called $cmd"
   116 		ocf_log err "$RESOURCE: Exit code $ret"
   117 		ocf_log err "$RESOURCE: Command output: $cmd_out"
   118 	else
   119 		ocf_log debug "$RESOURCE: Exit code $ret"
   120 		ocf_log debug "$RESOURCE: Command output: $cmd_out"
   121 	fi
   123 	echo $cmd_out
   125 	return $ret
   126 }
   128 do_drbdadm() {
   129 	local cmd="$DRBDADM -c $DRBDCONF $*"
   130 	ocf_log debug "$RESOURCE: Calling $cmd"
   131 	local cmd_out
   132 	cmd_out=$($cmd 2>&1)
   133 	ret=$?
   134 	# Trim the garbage drbdadm likes to print when using the node
   135 	# override feature:
   136 	local cmd_ret=$(echo $cmd_out | sed -e 's/found __DRBD_NODE__.*<<//;')
   138 	if [ $ret -ne 0 ]; then
   139 		ocf_log err "$RESOURCE: Called $cmd"
   140 		ocf_log err "$RESOURCE: Exit code $ret"
   141 		ocf_log err "$RESOURCE: Command output: $cmd_ret"
   142 	else
   143 		ocf_log debug "$RESOURCE: Exit code $ret"
   144 		ocf_log debug "$RESOURCE: Command output: $cmd_ret"
   145 	fi
   147 	echo $cmd_ret
   149 	return $ret
   150 }
   152 drbd_init() {
   153 	check_binary $DRBDADM
   154 	CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
   156 	RESOURCE="$OCF_RESKEY_drbd_resource"
   157 	CLONE_NO="$OCF_RESKEY_CRM_meta_clone"
   158 	DRBDCONF="${OCF_RESKEY_drbdconf:=/etc/drbd.conf}"
   160 	if [ ! -f "$DRBDCONF" ]; then
   161 		ocf_log err "drbd.conf not installed."
   162 		if [ "$ACTION" = 'monitor' ]; then
   163 			exit $OCF_NOT_RUNNING
   164 		else
   165 			exit $OCF_ERR_INSTALLED
   166 		fi
   167 	fi
   169 	case "$OCF_RESKEY_clone_overrides_hostname" in
   170 	[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
   171 		__DRBD_NODE__="node_${CLONE_NO}"
   172 		export __DRBD_NODE__
   173 		ocf_log info "$RESOURCE: Using hostname $__DRBD_NODE__"
   174 		;;
   175 	esac
   177 }
   180 #######################################################################
   182 drbd_usage() {
   183 	cat <<END
   184 usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}
   186 Expects to have a fully populated OCF RA-compliant environment set.
   187 END
   188 }
   190 is_drbd_enabled () {
   191 	if [ -f /proc/drbd ]; then
   192 		return 0
   193 	fi
   194 	return 1
   195 }
   197 get_drbd_ver() {
   198 	# returns 2 for unsupported (not 0.7.x or < 8.x)
   199 	# returns 1 for versions prior to 8.3.x
   200 	# returns 0 otherwise
   201 	drbdadm  | grep Version | awk '{print $2}' |
   202 	awk -F. '
   203 		{
   204 			if( $1 == "0" ) # 0.7.x
   205 				{ v1 = $2; v2 = $3; }
   206 			else
   207 				{ v1 = $1; v2 = $2; }
   208 		}
   209 		v1 < 7 { exit 2; }
   210 		v1 == 7 || (v1 == 8 && v2 < 3) { exit 1; } # use state
   211 		# otherwise use role
   212 	'
   213 }
   214 get_status_cmd() {
   215 	get_drbd_ver
   216 	rc=$?
   217 	if [ $rc -ge 2 ]; then
   218 		ocf_log err "Cannot parse output of 'drbdadm | grep Version'"
   219 		exit $OCF_ERR_GENERIC 
   220 	elif [ $rc -eq 1 ]; then
   221 		echo state
   222 	else
   223 		echo role
   224 	fi
   225 }
   226 drbd_get_status() {
   227 	cmd=`get_status_cmd`
   228 	DRBD_STATE=$(do_drbdadm "$cmd" $RESOURCE)
   229 	DRBD_STATE_LOCAL=$(echo $DRBD_STATE | sed -e 's#/.*##')
   230 	DRBD_STATE_REMOTE=$(echo $DRBD_STATE | sed -e 's#.*/##')
   231 	DRBD_CSTATE=$(do_drbdadm cstate $RESOURCE)
   233 	# Sanitize the various states, drbdadm is quite annoying; so if it 
   234 	# outputs something which doesn't make sense, translate it into
   235 	# a harmless state:
   237 	case "$DRBD_STATE_LOCAL" in
   238 		"Not configured"|"Primary"|"Secondary") ;;
   239 		*)	DRBD_STATE_LOCAL="Not configured" ;;
   240 	esac
   242 	case "$DRBD_STATE_REMOTE" in
   243 		"Primary"|"Secondary"|"Unknown") ;;
   244 		*)	DRBD_STATE_REMOTE="Not configured" ;;
   245 	esac
   247 	case "$DRBD_CSTATE" in
   248 		Unconfigured|StandAlone|Unconnected|Timeout|BrokenPipe) ;;
   249 		NetworkFailure|WFConnection|WFReportParams|Connected|SkippedSyncS) ;;
   250 		SkippedSyncT|WFBitMapS|WFBitMapT|SyncSource|SyncTarget) ;; 
   251 		PausedSyncS|PausedSyncT) ;;
   252 		*) DRBD_CSTATE="Unconfigured" ;;
   253 	esac
   255 	ocf_log debug "$RESOURCE status: $DRBD_STATE $DRBD_STATE_LOCAL $DRBD_STATE_REMOTE $DRBD_CSTATE"
   256 }
   258 drbd_start() {
   259 	if is_drbd_enabled; then
   260 	    : OK
   261 	else
   262 	    do_cmd modprobe -s drbd `$DRBDADM sh-mod-parms` || { 
   263 		    ocf_log err "Cannot load the drbd module."$'\n'; 
   264 		    return $OCF_ERR_GENERIC 
   265 	    }
   266 	    ocf_log debug "$RESOURCE start: Module loaded."
   267 	fi
   269 	drbd_get_status
   271 	if [ "$DRBD_STATE_LOCAL" != "Not configured" ]; then
   272 		ocf_log debug "$RESOURCE start: already configured."
   273 		return $OCF_SUCCESS
   274 	fi
   276 	if do_drbdadm up $RESOURCE ; then
   277 		drbd_get_status
   278 		if [ "$DRBD_STATE_LOCAL" != "Secondary" ]; then
   279 			ocf_log err "$RESOURCE start: not in Secondary mode after start."
   280 			return $OCF_ERR_GENERIC
   281 		fi
   283 		ocf_log debug "$RESOURCE start: succeeded."
   284 		return $OCF_SUCCESS
   285 	else
   286 		ocf_log err "$RESOURCE: Failed to start up."
   287 		return $OCF_ERR_GENERIC
   288 	fi
   289 }
   291 drbd_update_prefs() {
   292 	drbd_get_status
   294 	# TODO: This is probably way too complex. 
   295 	case $DRBD_CSTATE in
   296 	Connected) 
   297 		do_cmd $CRM_MASTER -v 75
   298 		;;
   299 	SyncSource|PausedSyncS|WFBitMapS|SkippedSyncS)
   300 		do_cmd $CRM_MASTER -v 100
   301 		;;
   302         # TODO:
   303         # (Inconsistent || Diskless && WFConnection) should be -infinity
   304 	# This one implies we'll try to promote even on disconnected
   305 	# nodes, but that might not work.
   306 	WFConnection) 
   307 		do_cmd $CRM_MASTER -v 10
   308 		;;
   309 	*)
   310 		do_cmd $CRM_MASTER -v 5
   311 		;;
   312 	esac
   314 	return $OCF_SUCCESS
   315 }
   317 drbd_stop() {
   318 	# Do not bother if drbd is not enabled
   319 	if is_drbd_enabled; then
   320 		drbd_get_status
   322 		# Clear preference for becoming master
   323 		do_cmd $CRM_MASTER -D
   325 		if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
   326 			ocf_log debug "$RESOURCE stop: already unconfigured."
   327 			return $OCF_SUCCESS
   328 		fi
   330 		# TODO: this is a _force_ operation. we may need to kill higher
   331 		# levels to be able to down drbd. figure out how...
   332 		if do_drbdadm down $RESOURCE ; then
   333 			ocf_log debug "$RESOURCE stop: drbdadm down succeeded."
   335 			# TODO: If drbdadm propagated error codes, this
   336 			# wouldn't be needed.
   337 			drbd_get_status
   339 			if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
   340 				return $OCF_SUCCESS
   341 			else
   342 				ocf_log err "$RESOURCE stop: Not stopped."
   343 			fi
   344 		else
   345 			ocf_log err "$RESOURCE stop: Failed with exit code: $?"
   346 		fi
   347 		return $OCF_ERR_GENERIC
   348 	else
   349 		ocf_log debug "$RESOURCE stop: drbd not loaded."
   350 	fi
   352 	return $OCF_SUCCESS
   353 }
   355 drbd_monitor() {
   356 	# TODO: Think about how to monitor drbd and what constitutes
   357 	# failure cases...
   358 	# diskless etc?
   359 	# A secondary node which is supposed to be primary?
   361 	# TODO: we ought to update the preferences here occasionally,
   362 	# but that causes transitions right now ...
   364 	if is_drbd_enabled; then
   365 	    : OK
   366 	else
   367 	    ocf_log warn "$RESOURCE monitor: drbd module not loaded"
   368 	    return $OCF_NOT_RUNNING
   369 	fi
   371 	drbd_get_status
   373 	if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
   374 	    ocf_log debug "$RESOURCE monitor: resource not configured"
   375 	    return $OCF_NOT_RUNNING
   376 	elif [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
   377 		if [ "$OCF_RESKEY_CRM_meta_interval" -eq 0 ]; then
   378 			# Restore the master setting during probes 
   379 			ocf_log debug "$RESOURCE monitor: restoring master setting during probe"
   380 			drbd_update_prefs
   381 		fi
   382 	    return $OCF_RUNNING_MASTER
   383 	elif [ "$DRBD_STATE_LOCAL" = "Secondary" ]; then
   384 #	    drbd_update_prefs
   385 	    return $OCF_SUCCESS
   386 	else
   387 	    ocf_log err "$RESOURCE monitor: unexpected local state: $DRBD_STATE_LOCAL"
   388 	fi
   390 	return $OCF_ERR_GENERIC
   391 }
   393 drbd_promote() {
   394 	if is_drbd_enabled; then
   395 	    : OK
   396 	else
   397 	    ocf_log err "drbd is not enabled"
   398 	    return $OCF_ERR_GENERIC
   399 	fi
   401 	drbd_get_status
   403 	if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
   404 		ocf_log info "$RESOURCE promote: already primary"
   405 		return $OCF_SUCCESS
   406 	fi
   408 	if [ "$DRBD_STATE_LOCAL" != "Secondary" ]; then
   409 		ocf_log warn "$RESOURCE promote: Not secondary to start with."
   410 		return $OCF_ERR_GENERIC
   411 	fi
   412 	if do_drbdadm primary $RESOURCE ; then
   413 		# TODO: WORK AROUND because drbdadm has a bug and
   414 		# reports success even if it failed :-(
   415 		drbd_get_status
   417 		if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
   418 			ocf_log info "$RESOURCE promote: primary succeeded"
   419 			return $OCF_SUCCESS
   420 		else 
   421 			ocf_log err "$RESOURCE promote: Not primary despite drbdadm call."
   422 		fi
   424 	else
   425 		ocf_log err "$RESOURCE promote: Failed with exit code $?."
   426 	fi
   427 	return $OCF_ERR_GENERIC
   428 }
   430 drbd_demote() {
   431 	if is_drbd_enabled; then
   432 	    : OK
   433 	else
   434 	    # A stopped resource also is demoted.
   435 	    ocf_log debug "$RESOURCE demote: module not loaded"
   436 	    return $OCF_SUCCESS
   437 	fi
   439 	if [ "$DRBD_STATE_LOCAL" = "Secondary" ]; then
   440 		ocf_log debug "$RESOURCE demote: already secondary"
   441 		return $OCF_SUCCESS
   442 	fi
   443 	if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
   444 		ocf_log debug "$RESOURCE demote: already stopped"
   445 		return $OCF_NOT_RUNNING
   446 	fi
   448 	# TODO: this is a _force_ operation. we may need to kill higher
   449 	# levels (or switch them to r/o) to be able to demote drbd.
   450 	# figure out how...
   451 	if do_drbdadm secondary $RESOURCE ; then
   452 		if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
   453 			ocf_log err "$RESOURCE demote: still primary!"
   454 			return $OCF_ERR_GENERIC
   455 		fi
   457 	        ocf_log debug "$RESOURCE demote: succeeded"
   458 		return $OCF_SUCCESS
   459 	else
   460 	        ocf_log err "$RESOURCE demote: Failed with exit code $?."
   461 		return $OCF_ERR_GENERIC
   462 	fi
   463 }
   465 drbd_notify() {
   466 	local n_type="$OCF_RESKEY_CRM_meta_notify_type"
   467 	local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
   468 	set -- $OCF_RESKEY_CRM_meta_notify_active_resource
   469 	local n_active="$#"
   470 	set -- $OCF_RESKEY_CRM_meta_notify_stop_resource
   471 	local n_stop="$#"
   472 	set -- $OCF_RESKEY_CRM_meta_notify_start_resource
   473 	local n_start="$#"
   475 	ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop"
   477 	case $n_type in
   478 	pre)
   479 		case $n_op in
   480 		promote) # TODO:
   481 			 # Resist promotion of the other side in case we
   482 			 # are already primary - though the CRM should
   483 			 # not even attempt that.
   484 			;;
   485 		esac
   486 		;;
   487 	post)	# TODO: Entire case statement which follows redundant?
   488 		case $n_op in
   489 		start)
   490 			if [ "$n_active" -eq 2 ]; then
   491 				# The other side is running, so we ought
   492 				# to connect and wait for that.
   493 				do_drbdadm connect $RESOURCE
   494 				do_drbdadm wait_connect $RESOURCE
   495 				# TODO: If this can cause a hang if the
   496 				# other side isn't connected or goes
   497 				# away during that, maybe just sleep
   498 				# here for 5-10s or take out the entire
   499 				# case statement
   500 			fi
   501 			;;
   502 		stop)
   503 			# TODO BUG: disconnect seems to force
   504 			# non-primary mode?!?
   505 			#### do_drbdadm disconnect $RESOURCE
   507 			# TODO: If we are secondary, do we need to do
   508 			# anything about a stopped primary in case we
   509 			# had an outdated flag...?
   510 			;;
   511 		esac
   513 		# After something has been done is a good time to
   514 		# recheck our status:
   515 		drbd_update_prefs
   516 		;;
   517 	esac
   519 	return $OCF_SUCCESS
   520 }
   522 drbd_validate_all () {
   523 	# First check the configuration file
   524 	if [ -n "$DRBDCONF" ] && [ ! -f "$DRBDCONF" ]; then
   525 	    ocf_log err "Configuration file does not exist: $DRBDCONF"
   526 	    return $OCF_ERR_CONFIGURED
   527 	fi
   529 	# Check the resource name, it should appear in DRBDCONF
   530 	if [ -z "$RESOURCE" ]; then
   531 	    ocf_log err "No resource name specified!"
   532 	    return $OCF_ERR_ARGS
   533 	fi
   535 	if do_drbdadm dump $RESOURCE 2>/dev/null ; then
   536 	    :
   537 	else
   538 	    ocf_log err "Invalid configuration file $DRBDCONF"
   539 	    return $OCF_ERR_CONFIGURED
   540 	fi
   542 	if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] \
   543 	 || [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] \
   544 	 || [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] \
   545 	 || [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] ; then
   546 		ocf_log err "Clone options misconfigured."
   547 		exit $OCF_ERR_CONFIGURED
   548 	fi
   550 	return $OCF_SUCCESS
   551 }
   554 if [ $# -ne 1 ]; then
   555 	echo "Incorrect parameter count."
   556 	drbd_usage
   557 	exit $OCF_ERR_ARGS
   558 fi
   560 : ${OCF_RESKEY_CRM_meta_interval=0}
   562 ACTION=$1
   563 case $ACTION in
   564 meta-data)	meta_data
   565 		;;
   566 validate-all)	drbd_init
   567 		drbd_validate_all
   568 		;;
   569 start|stop|monitor|promote|demote|notify)
   570 		if ocf_is_root ; then : ; else
   571 			ocf_log err "You must be root to perform this operation."
   572 			exit $OCF_ERR_PERM
   573 		fi
   575 		drbd_init
   576 		drbd_$ACTION
   577 		exit $?
   578 		;;
   579 usage|help)	drbd_usage
   580 		exit $OCF_SUCCESS
   581 		;;
   582 *)		drbd_usage
   583 		exit $OCF_ERR_ARGS
   584 		;;
   585 esac

mercurial