13 months ago
Dev: RA: drbd: fix metadata
1 #!/bin/bash
2 #
3 #
4 # OCF Resource Agent compliant drbd resource script.
5 #
6 # Copyright (c) 2004 - 2007 SUSE LINUX Products GmbH, Lars Marowsky-Bree
7 # All Rights Reserved.
8 #
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of version 2 of the GNU General Public License as
11 # published by the Free Software Foundation.
12 #
13 # This program is distributed in the hope that it would be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 #
17 # Further, this software is distributed without any warranty that it is
18 # free of the rightful claim of any third person regarding infringement
19 # or the like. Any license provided herein, whether implied or
20 # otherwise, applies only to this software file. Patent licenses, if
21 # any, provided herein do not apply to combinations of this program with
22 # other software, or any other product whatsoever.
23 #
24 # You should have received a copy of the GNU General Public License
25 # along with this program; if not, write the Free Software Foundation,
26 # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
27 #
28 #
30 # OCF instance parameters
31 # OCF_RESKEY_drbd_resource
32 # OCF_RESKEY_drbdconf
34 #######################################################################
35 # Initialization:
37 if [ -n "$OCF_DEBUG_LIBRARY" ]; then
38 . $OCF_DEBUG_LIBRARY
39 else
40 . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
41 fi
43 #######################################################################
45 meta_data() {
46 cat <<END
47 <?xml version="1.0"?>
48 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
49 <resource-agent name="drbd">
50 <version>1.1</version>
52 <longdesc lang="en">
53 Master/Slave OCF Resource Agent for DRBD
54 </longdesc>
56 <shortdesc lang="en">This resource agent manages a Distributed
57 Replicated Block Device (DRBD) object as a master/slave
58 resource. DRBD is a mechanism for replicating storage; please see the
59 documentation for setup details.</shortdesc>
61 <parameters>
62 <parameter name="drbd_resource" unique="1" required="1">
63 <longdesc lang="en">
64 The name of the drbd resource from the drbd.conf file.
65 </longdesc>
66 <shortdesc lang="en">drbd resource name</shortdesc>
67 <content type="string" default="drbd0" />
68 </parameter>
70 <parameter name="drbdconf">
71 <longdesc lang="en">
72 Full path to the drbd.conf file.
73 </longdesc>
74 <shortdesc lang="en">Path to drbd.conf</shortdesc>
75 <content type="string" default="/etc/drbd.conf"/>
76 </parameter>
78 <parameter name="clone_overrides_hostname">
79 <longdesc lang="en">
80 Whether or not to override the hostname with the clone number. This can
81 be used to create floating peer configurations; drbd will be told to
82 use node_<cloneno> as the hostname instead of the real uname,
83 which can then be used in drbd.conf.
84 </longdesc>
85 <shortdesc lang="en">Override drbd hostname</shortdesc>
86 <content type="boolean" default="no"/>
87 </parameter>
88 </parameters>
90 <actions>
91 <action name="start" timeout="240" />
92 <action name="promote" timeout="90" />
93 <action name="demote" timeout="90" />
94 <action name="notify" timeout="90" />
95 <action name="stop" timeout="100" />
96 <action name="monitor" depth="0" timeout="20" interval="20" start-delay="0" role="Slave" />
97 <action name="monitor" depth="0" timeout="20" interval="10" start-delay="0" role="Master" />
98 <action name="meta-data" timeout="5" />
99 <action name="validate-all" timeout="30" />
100 </actions>
101 </resource-agent>
102 END
104 exit $OCF_SUCCESS
105 }
107 do_cmd() {
108 local cmd="$*"
109 ocf_log debug "$RESOURCE: Calling $cmd"
110 local cmd_out
111 cmd_out=$($cmd 2>&1)
112 ret=$?
114 if [ $ret -ne 0 ]; then
115 ocf_log err "$RESOURCE: Called $cmd"
116 ocf_log err "$RESOURCE: Exit code $ret"
117 ocf_log err "$RESOURCE: Command output: $cmd_out"
118 else
119 ocf_log debug "$RESOURCE: Exit code $ret"
120 ocf_log debug "$RESOURCE: Command output: $cmd_out"
121 fi
123 echo $cmd_out
125 return $ret
126 }
128 do_drbdadm() {
129 local cmd="$DRBDADM -c $DRBDCONF $*"
130 ocf_log debug "$RESOURCE: Calling $cmd"
131 local cmd_out
132 cmd_out=$($cmd 2>&1)
133 ret=$?
134 # Trim the garbage drbdadm likes to print when using the node
135 # override feature:
136 local cmd_ret=$(echo $cmd_out | sed -e 's/found __DRBD_NODE__.*<<//;')
138 if [ $ret -ne 0 ]; then
139 ocf_log err "$RESOURCE: Called $cmd"
140 ocf_log err "$RESOURCE: Exit code $ret"
141 ocf_log err "$RESOURCE: Command output: $cmd_ret"
142 else
143 ocf_log debug "$RESOURCE: Exit code $ret"
144 ocf_log debug "$RESOURCE: Command output: $cmd_ret"
145 fi
147 echo $cmd_ret
149 return $ret
150 }
152 drbd_init() {
153 check_binary $DRBDADM
154 CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
156 RESOURCE="$OCF_RESKEY_drbd_resource"
157 CLONE_NO="$OCF_RESKEY_CRM_meta_clone"
158 DRBDCONF="${OCF_RESKEY_drbdconf:=/etc/drbd.conf}"
160 if [ ! -f "$DRBDCONF" ]; then
161 ocf_log err "drbd.conf not installed."
162 if [ "$ACTION" = 'monitor' ]; then
163 exit $OCF_NOT_RUNNING
164 else
165 exit $OCF_ERR_INSTALLED
166 fi
167 fi
169 case "$OCF_RESKEY_clone_overrides_hostname" in
170 [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
171 __DRBD_NODE__="node_${CLONE_NO}"
172 export __DRBD_NODE__
173 ocf_log info "$RESOURCE: Using hostname $__DRBD_NODE__"
174 ;;
175 esac
177 }
180 #######################################################################
182 drbd_usage() {
183 cat <<END
184 usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}
186 Expects to have a fully populated OCF RA-compliant environment set.
187 END
188 }
190 is_drbd_enabled () {
191 if [ -f /proc/drbd ]; then
192 return 0
193 fi
194 return 1
195 }
197 get_drbd_ver() {
198 # returns 2 for unsupported (not 0.7.x or < 8.x)
199 # returns 1 for versions prior to 8.3.x
200 # returns 0 otherwise
201 drbdadm | grep Version | awk '{print $2}' |
202 awk -F. '
203 {
204 if( $1 == "0" ) # 0.7.x
205 { v1 = $2; v2 = $3; }
206 else
207 { v1 = $1; v2 = $2; }
208 }
209 v1 < 7 { exit 2; }
210 v1 == 7 || (v1 == 8 && v2 < 3) { exit 1; } # use state
211 # otherwise use role
212 '
213 }
214 get_status_cmd() {
215 get_drbd_ver
216 rc=$?
217 if [ $rc -ge 2 ]; then
218 ocf_log err "Cannot parse output of 'drbdadm | grep Version'"
219 exit $OCF_ERR_GENERIC
220 elif [ $rc -eq 1 ]; then
221 echo state
222 else
223 echo role
224 fi
225 }
226 drbd_get_status() {
227 cmd=`get_status_cmd`
228 DRBD_STATE=$(do_drbdadm "$cmd" $RESOURCE)
229 DRBD_STATE_LOCAL=$(echo $DRBD_STATE | sed -e 's#/.*##')
230 DRBD_STATE_REMOTE=$(echo $DRBD_STATE | sed -e 's#.*/##')
231 DRBD_CSTATE=$(do_drbdadm cstate $RESOURCE)
233 # Sanitize the various states, drbdadm is quite annoying; so if it
234 # outputs something which doesn't make sense, translate it into
235 # a harmless state:
237 case "$DRBD_STATE_LOCAL" in
238 "Not configured"|"Primary"|"Secondary") ;;
239 *) DRBD_STATE_LOCAL="Not configured" ;;
240 esac
242 case "$DRBD_STATE_REMOTE" in
243 "Primary"|"Secondary"|"Unknown") ;;
244 *) DRBD_STATE_REMOTE="Not configured" ;;
245 esac
247 case "$DRBD_CSTATE" in
248 Unconfigured|StandAlone|Unconnected|Timeout|BrokenPipe) ;;
249 NetworkFailure|WFConnection|WFReportParams|Connected|SkippedSyncS) ;;
250 SkippedSyncT|WFBitMapS|WFBitMapT|SyncSource|SyncTarget) ;;
251 PausedSyncS|PausedSyncT) ;;
252 *) DRBD_CSTATE="Unconfigured" ;;
253 esac
255 ocf_log debug "$RESOURCE status: $DRBD_STATE $DRBD_STATE_LOCAL $DRBD_STATE_REMOTE $DRBD_CSTATE"
256 }
258 drbd_start() {
259 if is_drbd_enabled; then
260 : OK
261 else
262 do_cmd modprobe -s drbd `$DRBDADM sh-mod-parms` || {
263 ocf_log err "Cannot load the drbd module."$'\n';
264 return $OCF_ERR_GENERIC
265 }
266 ocf_log debug "$RESOURCE start: Module loaded."
267 fi
269 drbd_get_status
271 if [ "$DRBD_STATE_LOCAL" != "Not configured" ]; then
272 ocf_log debug "$RESOURCE start: already configured."
273 return $OCF_SUCCESS
274 fi
276 if do_drbdadm up $RESOURCE ; then
277 drbd_get_status
278 if [ "$DRBD_STATE_LOCAL" != "Secondary" ]; then
279 ocf_log err "$RESOURCE start: not in Secondary mode after start."
280 return $OCF_ERR_GENERIC
281 fi
283 ocf_log debug "$RESOURCE start: succeeded."
284 return $OCF_SUCCESS
285 else
286 ocf_log err "$RESOURCE: Failed to start up."
287 return $OCF_ERR_GENERIC
288 fi
289 }
291 drbd_update_prefs() {
292 drbd_get_status
294 # TODO: This is probably way too complex.
295 case $DRBD_CSTATE in
296 Connected)
297 do_cmd $CRM_MASTER -v 75
298 ;;
299 SyncSource|PausedSyncS|WFBitMapS|SkippedSyncS)
300 do_cmd $CRM_MASTER -v 100
301 ;;
302 # TODO:
303 # (Inconsistent || Diskless && WFConnection) should be -infinity
304 # This one implies we'll try to promote even on disconnected
305 # nodes, but that might not work.
306 WFConnection)
307 do_cmd $CRM_MASTER -v 10
308 ;;
309 *)
310 do_cmd $CRM_MASTER -v 5
311 ;;
312 esac
314 return $OCF_SUCCESS
315 }
317 drbd_stop() {
318 # Do not bother if drbd is not enabled
319 if is_drbd_enabled; then
320 drbd_get_status
322 # Clear preference for becoming master
323 do_cmd $CRM_MASTER -D
325 if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
326 ocf_log debug "$RESOURCE stop: already unconfigured."
327 return $OCF_SUCCESS
328 fi
330 # TODO: this is a _force_ operation. we may need to kill higher
331 # levels to be able to down drbd. figure out how...
332 if do_drbdadm down $RESOURCE ; then
333 ocf_log debug "$RESOURCE stop: drbdadm down succeeded."
335 # TODO: If drbdadm propagated error codes, this
336 # wouldn't be needed.
337 drbd_get_status
339 if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
340 return $OCF_SUCCESS
341 else
342 ocf_log err "$RESOURCE stop: Not stopped."
343 fi
344 else
345 ocf_log err "$RESOURCE stop: Failed with exit code: $?"
346 fi
347 return $OCF_ERR_GENERIC
348 else
349 ocf_log debug "$RESOURCE stop: drbd not loaded."
350 fi
352 return $OCF_SUCCESS
353 }
355 drbd_monitor() {
356 # TODO: Think about how to monitor drbd and what constitutes
357 # failure cases...
358 # diskless etc?
359 # A secondary node which is supposed to be primary?
361 # TODO: we ought to update the preferences here occasionally,
362 # but that causes transitions right now ...
364 if is_drbd_enabled; then
365 : OK
366 else
367 ocf_log warn "$RESOURCE monitor: drbd module not loaded"
368 return $OCF_NOT_RUNNING
369 fi
371 drbd_get_status
373 if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
374 ocf_log debug "$RESOURCE monitor: resource not configured"
375 return $OCF_NOT_RUNNING
376 elif [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
377 if [ "$OCF_RESKEY_CRM_meta_interval" -eq 0 ]; then
378 # Restore the master setting during probes
379 ocf_log debug "$RESOURCE monitor: restoring master setting during probe"
380 drbd_update_prefs
381 fi
382 return $OCF_RUNNING_MASTER
383 elif [ "$DRBD_STATE_LOCAL" = "Secondary" ]; then
384 # drbd_update_prefs
385 return $OCF_SUCCESS
386 else
387 ocf_log err "$RESOURCE monitor: unexpected local state: $DRBD_STATE_LOCAL"
388 fi
390 return $OCF_ERR_GENERIC
391 }
393 drbd_promote() {
394 if is_drbd_enabled; then
395 : OK
396 else
397 ocf_log err "drbd is not enabled"
398 return $OCF_ERR_GENERIC
399 fi
401 drbd_get_status
403 if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
404 ocf_log info "$RESOURCE promote: already primary"
405 return $OCF_SUCCESS
406 fi
408 if [ "$DRBD_STATE_LOCAL" != "Secondary" ]; then
409 ocf_log warn "$RESOURCE promote: Not secondary to start with."
410 return $OCF_ERR_GENERIC
411 fi
412 if do_drbdadm primary $RESOURCE ; then
413 # TODO: WORK AROUND because drbdadm has a bug and
414 # reports success even if it failed :-(
415 drbd_get_status
417 if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
418 ocf_log info "$RESOURCE promote: primary succeeded"
419 return $OCF_SUCCESS
420 else
421 ocf_log err "$RESOURCE promote: Not primary despite drbdadm call."
422 fi
424 else
425 ocf_log err "$RESOURCE promote: Failed with exit code $?."
426 fi
427 return $OCF_ERR_GENERIC
428 }
430 drbd_demote() {
431 if is_drbd_enabled; then
432 : OK
433 else
434 # A stopped resource also is demoted.
435 ocf_log debug "$RESOURCE demote: module not loaded"
436 return $OCF_SUCCESS
437 fi
439 if [ "$DRBD_STATE_LOCAL" = "Secondary" ]; then
440 ocf_log debug "$RESOURCE demote: already secondary"
441 return $OCF_SUCCESS
442 fi
443 if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
444 ocf_log debug "$RESOURCE demote: already stopped"
445 return $OCF_NOT_RUNNING
446 fi
448 # TODO: this is a _force_ operation. we may need to kill higher
449 # levels (or switch them to r/o) to be able to demote drbd.
450 # figure out how...
451 if do_drbdadm secondary $RESOURCE ; then
452 if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
453 ocf_log err "$RESOURCE demote: still primary!"
454 return $OCF_ERR_GENERIC
455 fi
457 ocf_log debug "$RESOURCE demote: succeeded"
458 return $OCF_SUCCESS
459 else
460 ocf_log err "$RESOURCE demote: Failed with exit code $?."
461 return $OCF_ERR_GENERIC
462 fi
463 }
465 drbd_notify() {
466 local n_type="$OCF_RESKEY_CRM_meta_notify_type"
467 local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
468 set -- $OCF_RESKEY_CRM_meta_notify_active_resource
469 local n_active="$#"
470 set -- $OCF_RESKEY_CRM_meta_notify_stop_resource
471 local n_stop="$#"
472 set -- $OCF_RESKEY_CRM_meta_notify_start_resource
473 local n_start="$#"
475 ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop"
477 case $n_type in
478 pre)
479 case $n_op in
480 promote) # TODO:
481 # Resist promotion of the other side in case we
482 # are already primary - though the CRM should
483 # not even attempt that.
484 ;;
485 esac
486 ;;
487 post) # TODO: Entire case statement which follows redundant?
488 case $n_op in
489 start)
490 if [ "$n_active" -eq 2 ]; then
491 # The other side is running, so we ought
492 # to connect and wait for that.
493 do_drbdadm connect $RESOURCE
494 do_drbdadm wait_connect $RESOURCE
495 # TODO: If this can cause a hang if the
496 # other side isn't connected or goes
497 # away during that, maybe just sleep
498 # here for 5-10s or take out the entire
499 # case statement
500 fi
501 ;;
502 stop)
503 # TODO BUG: disconnect seems to force
504 # non-primary mode?!?
505 #### do_drbdadm disconnect $RESOURCE
507 # TODO: If we are secondary, do we need to do
508 # anything about a stopped primary in case we
509 # had an outdated flag...?
510 ;;
511 esac
513 # After something has been done is a good time to
514 # recheck our status:
515 drbd_update_prefs
516 ;;
517 esac
519 return $OCF_SUCCESS
520 }
522 drbd_validate_all () {
523 # First check the configuration file
524 if [ -n "$DRBDCONF" ] && [ ! -f "$DRBDCONF" ]; then
525 ocf_log err "Configuration file does not exist: $DRBDCONF"
526 return $OCF_ERR_CONFIGURED
527 fi
529 # Check the resource name, it should appear in DRBDCONF
530 if [ -z "$RESOURCE" ]; then
531 ocf_log err "No resource name specified!"
532 return $OCF_ERR_ARGS
533 fi
535 if do_drbdadm dump $RESOURCE 2>/dev/null ; then
536 :
537 else
538 ocf_log err "Invalid configuration file $DRBDCONF"
539 return $OCF_ERR_CONFIGURED
540 fi
542 if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] \
543 || [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] \
544 || [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] \
545 || [ "$OCF_RESKEY_CRM_meta_master_max" -ne 1 ] ; then
546 ocf_log err "Clone options misconfigured."
547 exit $OCF_ERR_CONFIGURED
548 fi
550 return $OCF_SUCCESS
551 }
554 if [ $# -ne 1 ]; then
555 echo "Incorrect parameter count."
556 drbd_usage
557 exit $OCF_ERR_ARGS
558 fi
560 : ${OCF_RESKEY_CRM_meta_interval=0}
562 ACTION=$1
563 case $ACTION in
564 meta-data) meta_data
565 ;;
566 validate-all) drbd_init
567 drbd_validate_all
568 ;;
569 start|stop|monitor|promote|demote|notify)
570 if ocf_is_root ; then : ; else
571 ocf_log err "You must be root to perform this operation."
572 exit $OCF_ERR_PERM
573 fi
575 drbd_init
576 drbd_$ACTION
577 exit $?
578 ;;
579 usage|help) drbd_usage
580 exit $OCF_SUCCESS
581 ;;
582 *) drbd_usage
583 exit $OCF_ERR_ARGS
584 ;;
585 esac