6 days ago
Low: init script: don't use echo -e
The -e argument to echo is not supported by dash but treated as an literal.
None of the strings printed contains backslash escape sequences anyways,
not even those sourced from /etc/rc.status on SuSE like systems --
they are already expanded.
Reported-by: Frederik Sch?ler <fs@debian.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
1 /*
2 * This program is free software; you can redistribute it and/or modify it under
3 * the terms of the GNU General Public License as published by the Free Software
4 * Foundation; either version 2 of the License, or (at your option) any later
5 * version.
6 *
7 * This program is distributed in the hope that it will be useful, but WITHOUT
8 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
10 * details.
11 *
12 * You should have received a copy of the GNU General Public License along with
13 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
14 * Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 ******************************************************************************
17 * TODO:
18 * 1) Man page
19 * 2) Add the "cl_respawn recover" function, for combining with recovery
20 * manager. But what's its strategy ?
21 * The pid will passed by environment
22 * 3) Add the function for "-l" option ?
23 ******************************************************************************
24 *
25 * File: cl_respawn.c
26 * Description:
27 * A small respawn tool which will start a program as a child process, and
28 * unless it exits with the "magic" exit code, will restart the program again
29 * if it exits(dies). It is intended that this respawn program should be usable
30 * in resource agent scripts and other places. The respawn tool should properly
31 * log all restarts, and all exits which it doesn't respawn, and run itself as a
32 * client of the apphb application heartbeating program, so that it can be
33 * restarted if the program it is monitoring dies.
34 *
35 * Author: Sun Jiang Dong <sunjd@cn.ibm.com>
36 * Copyright (c) 2004 International Business Machines
37 */
39 #include <lha_internal.h>
40 #include <stdio.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #include <fcntl.h>
44 #include <signal.h>
45 #ifdef HAVE_GETOPT_H
46 #include <getopt.h>
47 #endif
48 #include <stdarg.h>
49 #include <errno.h>
50 #include <string.h>
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #include <sys/wait.h>
54 #include <glib.h>
55 #include <clplumbing/cl_log.h>
56 #include <clplumbing/cl_signal.h>
57 #include <clplumbing/uids.h>
58 #include <clplumbing/lsb_exitcodes.h>
59 #include <clplumbing/GSource.h>
60 #include <clplumbing/proctrack.h>
61 #include <clplumbing/Gmain_timeout.h>
62 #include <clplumbing/cl_pidfile.h>
63 #include <apphb.h>
65 static const char * Simple_helpscreen =
66 "Usage cl_respawn [<options>] <monitored_program> [<arg1>] [<arg2>] ...\n"
67 "Options are as below:\n"
68 "-m magic_exit_code\n"
69 " When monitored_program exit as this magic_exit_code, then cl_respawn\n"
70 " will not try to respawn it.\n"
71 "-i interval\n"
72 " Set the interval(ms) of application hearbeat or plumbing its client.\n"
73 "-w warntime\n"
74 " Set the warning time (ms) of application heartbeat.\n"
75 "-p pidfile\n"
76 " Set the name of a pid file to use.\n"
77 "-r Recover itself from crash. Only called by other monitor programs like"
78 " recovery manager.\n"
79 "-l List the program monitored by cl_respawn.\n"
80 " Notice: donnot support yet.\n"
81 "-h Display this simple help.\n";
84 static void become_daemon(void);
85 static int run_client_as_child(char * client_argv[]);
86 static gboolean plumb_client_and_emit_apphb(gpointer data);
87 static gboolean cl_respawn_quit(int signo, gpointer user_data);
88 static void separate_argv(int * argc_p, char *** argv_p, char *** client_argv);
89 static int cmd_str_to_argv(char * cmd_str, char *** argv);
90 static void free_argv(char ** argv);
92 /* Functions for handling the child quit/abort event
93 */
94 static void monitoredProcessDied(ProcTrack* p, int status, int signo
95 , int exitcode, int waslogged);
96 static void monitoredProcessRegistered(ProcTrack* p);
97 static const char * monitoredProcessName(ProcTrack* p);
99 static ProcTrack_ops MonitoredProcessTrackOps = {
100 monitoredProcessDied,
101 monitoredProcessRegistered,
102 monitoredProcessName
103 };
105 static const int
106 INSTANCE_NAME_LEN = 20,
107 APPHB_INTVL_DETLA = 30; /* Avoid the incorrect warning message */
109 static const unsigned long
110 DEFAULT_APPHB_INTERVAL = 2000, /* MS */
111 DEFAULT_APPHB_WARNTIME = 6000; /* MS */
113 static int MAGIC_EXIT_CODE = 100;
115 static const char * app_name = "cl_respawn";
116 static gboolean REGTO_APPHBD = FALSE;
117 static char * pidfile = NULL;
119 /*
120 * This pid will equal to the PID of the process who was ever the child of
121 * that dead cl_respawn.
122 */
123 static pid_t monitored_PID = 0;
125 static const char * optstr = "rm:i:w:p:lh";
126 static GMainLoop * mainloop = NULL;
127 static gboolean IS_RECOVERY = FALSE;
129 static gboolean shutting_down = FALSE;
131 int main(int argc, char * argv[])
132 {
133 char app_instance[INSTANCE_NAME_LEN];
134 int option_char;
135 int interval = DEFAULT_APPHB_INTERVAL;
136 int apphb_warntime = DEFAULT_APPHB_WARNTIME;
137 char ** client_argv = NULL;
138 pid_t child_tmp = 0;
140 cl_log_set_entity(app_name);
141 cl_log_enable_stderr(TRUE);
142 cl_log_set_facility(HA_LOG_FACILITY);
144 if (argc == 1) { /* no arguments */
145 printf("%s\n", Simple_helpscreen);
146 exit(LSB_EXIT_EINVAL);
147 }
149 /*
150 * Try to separate the option parameter between myself and the client.
151 * Maybe rewrite the argc and argv.
152 */
153 separate_argv(&argc, &argv, &client_argv);
155 /* code for debug */
156 #if 0
157 {
158 int j;
159 cl_log(LOG_INFO, "client_argv: 0x%08lx", (unsigned long) client_argv);
160 cl_log(LOG_INFO, "Called arg");
162 for (j=0; argv[j] != NULL; ++j) {
163 cl_log(LOG_INFO, "argv[%d]: %s", j, argv[j]);
164 }
166 for (j=0; client_argv && client_argv[j] != NULL; ++j) {
167 if (ANYDEBUG) {
168 cl_log(LOG_INFO, "client_argv[%d]: %s", j, client_argv[j]);
169 }
170 }
171 }
172 #endif
174 do {
175 option_char = getopt(argc, argv, optstr);
177 if (option_char == -1) {
178 break;
179 }
181 switch (option_char) {
182 case 'r':
183 IS_RECOVERY = TRUE;
184 break;
186 case 'm':
187 if (optarg) {
188 MAGIC_EXIT_CODE = atoi(optarg);
189 }
190 break;
192 case 'i':
193 if (optarg) {
194 interval = atoi(optarg);
195 } else {
196 printf("error.\n");
197 }
198 break;
200 case 'p':
201 if (optarg) {
202 pidfile = optarg;
203 }
204 break;
205 case 'w':
206 if (optarg) {
207 apphb_warntime = atoi(optarg);
208 }
209 break;
211 case 'l':
212 break;
213 /* information */
214 return LSB_EXIT_OK;
216 case 'h':
217 printf("%s\n",Simple_helpscreen);
218 return LSB_EXIT_OK;
220 default:
221 cl_log(LOG_ERR, "getopt returned"
222 "character code %c.", option_char);
223 printf("%s\n",Simple_helpscreen);
224 return LSB_EXIT_EINVAL;
225 }
226 } while (1);
229 /*
230 * Now I suppose recovery program only pass the client name via
231 * environment variables.
232 */
233 if ( (IS_RECOVERY == FALSE) && (client_argv == NULL) ) {
234 cl_log(LOG_ERR, "Please give the program name which will be "
235 "run as a child process of cl_respawn.");
236 printf("%s\n", Simple_helpscreen);
237 exit(LSB_EXIT_EINVAL);
238 }
240 if ((IS_RECOVERY == TRUE ) && ( client_argv == NULL)) {
241 /*
242 * Here the client_argv must be NULL. At least now just
243 * suppose so.
244 */
245 /*
246 * From the environment variables to acquire the necessary
247 * information set by other daemons like recovery manager.
248 * RSP_PID: the PID of the process which need to be monitored.
249 * RSP_CMD: the command line to restart the program, which is
250 * the same as the input in command line as above.
251 */
252 if ( getenv("RSP_PID") == NULL ) {
253 cl_log(LOG_ERR, "cannot get monitored PID from the "
254 "environment variable which should be set by "
255 "the recovery program.");
256 exit(LSB_EXIT_EINVAL);
257 } else {
258 monitored_PID = atoi(getenv("RSP_PID"));
259 }
261 /*
262 * client_argv == NULL" indicates no client program passed as
263 * a parameter by others such as a recovery manager, so expect
264 * it will be passed by environment variable RSP_CMD, see as
265 * below. If cannot get it, quit.
266 */
267 if (client_argv == NULL) {
268 if (getenv("RSP_CMD") == NULL) {
269 cl_log(LOG_ERR, "cannot get the argument of the "
270 "monitored program from the environment "
271 "variable, which should be set by the "
272 "recovery program.");
273 }
275 if (0!=cmd_str_to_argv(getenv("RSP_CMD"), &client_argv)) {
276 cl_log(LOG_ERR, "Failed to transfer the CLI "
277 "string to the argv[] style.");
278 exit(LSB_EXIT_EINVAL);
279 }
280 }
281 }
283 /* Not use the API 'daemon' since it's not a POSIX's */
284 become_daemon();
286 /* Code for debug
287 int k = 0;
288 do {
289 cl_log(LOG_INFO,"%s", execv_argv[k]);
290 } while (execv_argv[++k] != NULL);
291 */
293 set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME);
295 if (( IS_RECOVERY == FALSE )) {
296 child_tmp = run_client_as_child(client_argv);
297 if (child_tmp > 0 ) {
298 cl_log(LOG_NOTICE, "started the monitored program %s, "
299 "whose PID is %d", client_argv[0], child_tmp);
300 } else {
301 exit(LSB_EXIT_GENERIC);
302 }
303 }
305 snprintf(app_instance, INSTANCE_NAME_LEN, "%s_%ldd"
306 , app_name, (long)getpid());
308 if (apphb_register(app_name, app_instance) != 0) {
309 cl_log(LOG_WARNING, "Failed to register with apphbd.");
310 cl_log(LOG_WARNING, "Maybe apphd isn't running.");
311 REGTO_APPHBD = FALSE;
312 } else {
313 REGTO_APPHBD = TRUE;
314 cl_log(LOG_INFO, "Registered with apphbd.");
315 apphb_setinterval(interval);
316 apphb_setwarn(apphb_warntime);
317 /* To avoid the warning when app_interval is very small. */
318 apphb_hb();
319 }
320 Gmain_timeout_add(interval - APPHB_INTVL_DETLA
321 , plumb_client_and_emit_apphb, client_argv);
323 mainloop = g_main_new(FALSE);
324 g_main_run(mainloop);
326 if ( REGTO_APPHBD == TRUE ) {
327 apphb_hb();
328 apphb_unregister();
329 }
331 return LSB_EXIT_OK;
332 }
334 static int
335 run_client_as_child(char * execv_argv[])
336 {
337 long pid;
338 int i;
340 if (execv_argv[0] == NULL) {
341 cl_log(LOG_ERR, "Null pointer to program name which need to"
342 "be executed.");
343 return LSB_EXIT_EINVAL;
344 }
346 pid = fork();
348 if (pid < 0) {
349 cl_log(LOG_ERR, "cannot start monitor program %s.",
350 execv_argv[0]);
351 return -1;
352 } else if (pid > 0) { /* in the parent process */
353 NewTrackedProc( pid, 1, PT_LOGVERBOSE
354 , execv_argv, &MonitoredProcessTrackOps);
355 monitored_PID = pid;
356 return pid;
357 }
359 /* Now in child process */
360 execvp(execv_argv[0], execv_argv);
361 /* if go here, there must be something wrong */
362 cl_log(LOG_ERR, "%s",strerror(errno));
363 cl_log(LOG_ERR, "execving monitored program %s failed.", execv_argv[0]);
365 i = 0;
366 do {
367 free(execv_argv[i]);
368 } while (execv_argv[++i] != NULL);
370 /* Since parameter error, donnot need to be respawned */
371 exit(MAGIC_EXIT_CODE);
372 }
374 /*
375 * Notes: Since the work dir is changed to "/", the client name should include
376 * pathname or it's located in the system PATH
377 */
378 static void
379 become_daemon(void)
380 {
382 int j;
384 if (pidfile) {
385 int runningpid;
386 if ((runningpid=cl_read_pidfile(pidfile)) > 0) {
387 cl_log(LOG_WARNING, "pidfile [%s] says we're already running as pid [%d]"
388 , pidfile, runningpid);
389 exit(LSB_EXIT_OK);
390 }
391 if (cl_lock_pidfile(pidfile) != 0) {
392 cl_log(LOG_ERR, "Cannot create pidfile [%s]"
393 , pidfile);
394 exit(LSB_EXIT_GENERIC);
395 }
396 }
397 #if 0
398 pid_t pid;
400 pid = fork();
402 if (pid < 0) {
403 cl_log(LOG_ERR, "cannot start daemon.");
404 exit(LSB_EXIT_GENERIC);
405 } else if (pid > 0) {
406 exit(LSB_EXIT_OK);
407 }
408 #endif
410 if (chdir("/") < 0) {
411 cl_log(LOG_ERR, "cannot chroot to /.");
412 exit(LSB_EXIT_GENERIC);
413 }
415 umask(022);
416 setsid();
418 for (j=0; j < 3; ++j) {
419 close(j);
420 (void)open("/dev/null", j == 0 ? O_RDONLY : O_RDWR);
421 }
423 CL_IGNORE_SIG(SIGINT);
424 CL_IGNORE_SIG(SIGHUP);
426 G_main_add_SignalHandler(G_PRIORITY_DEFAULT, SIGTERM, cl_respawn_quit, NULL, NULL);
427 }
429 static gboolean
430 plumb_client_and_emit_apphb(gpointer data)
431 {
432 pid_t new_pid;
433 char ** client_argv = (char **) data;
435 if ( REGTO_APPHBD == TRUE ) {
436 apphb_hb();
437 }
438 if (shutting_down) {
439 return TRUE;
440 }
441 /* cl_log(LOG_NOTICE,"donnot emit hb for test."); */
442 if ( IS_RECOVERY == TRUE && !(CL_PID_EXISTS(monitored_PID)) ) {
443 cl_log(LOG_INFO, "process %d exited.", monitored_PID);
445 new_pid = run_client_as_child(client_argv);
446 if (new_pid > 0 ) {
447 cl_log(LOG_NOTICE, "restart the monitored program %s,"
448 " whose PID is %d", client_argv[0], new_pid);
449 } else {
450 /*
451 * donnot let recovery manager restart me again, avoid
452 * infinite loop
453 */
454 cl_log(LOG_ERR, "Failed to restart the monitored "
455 "program %s, will exit.", client_argv[0]);
456 cl_respawn_quit(SIGTERM, NULL);
457 }
458 }
460 return TRUE;
461 }
463 static gboolean
464 cl_respawn_quit(int signo, gpointer user_data)
465 {
466 shutting_down = TRUE;
467 if (monitored_PID != 0) {
468 cl_log(LOG_INFO, "Killing pid [%d] with SIGTERM"
469 , monitored_PID);
470 /* DisableProcLogging(); */
471 if (kill(monitored_PID, SIGTERM) < 0) {
472 monitored_PID=0;
473 }else{
474 return TRUE;
475 }
476 }
478 if (mainloop != NULL && g_main_is_running(mainloop)) {
479 DisableProcLogging();
480 g_main_quit(mainloop);
481 } else {
482 apphb_unregister();
483 DisableProcLogging();
484 exit(LSB_EXIT_OK);
485 }
486 return TRUE;
487 }
489 static void
490 separate_argv(int * argc_p, char *** argv_p, char *** client_argv_p)
491 {
492 /* Search the first no-option parameter */
493 int i,j;
494 struct stat buf;
495 *client_argv_p = NULL;
497 for (i=1; i < *argc_p; i++) {
498 if ( ((*argv_p)[i][0] != '-')
499 && (0 == stat((*argv_p)[i], &buf)) ) {
500 if ( S_ISREG(buf.st_mode)
501 && ((S_IXUSR| S_IXGRP | S_IXOTH) & buf.st_mode) ) {
502 break;
503 }
504 }
505 }
507 /*
508 * Cannot find a valid program name which will be run as a child
509 * process of cl_respawn, may be a recovery.
510 */
511 if (*argc_p == i) {
512 return;
513 }
515 *client_argv_p = calloc(*argc_p - i + 1, sizeof(char*));
516 if (*client_argv_p == NULL) {
517 cl_perror("separate_argv:calloc: ");
518 exit(1);
519 }
521 for (j=i; j < *argc_p; j++) {
522 (*client_argv_p)[j-i] = (*argv_p)[j];
523 }
525 (*argv_p)[i] = NULL;
526 *argc_p = i;
528 return;
529 }
531 static int
532 cmd_str_to_argv(char * cmd_str, char *** client_argv_p)
533 {
534 const int MAX_NUM_OF_PARAMETER = 80;
535 char *pre, *next;
536 int index = 0;
537 int i, len_tmp;
539 if (cmd_str == NULL) {
540 return LSB_EXIT_EINVAL;
541 }
543 *client_argv_p = calloc(MAX_NUM_OF_PARAMETER, sizeof(char *));
544 if (*client_argv_p == NULL) {
545 cl_perror("cmd_str_to_argv:calloc: ");
546 return LSB_EXIT_GENERIC;
547 }
549 pre = cmd_str;
550 do {
551 next = strchr(pre,' ');
553 if (next == NULL) {
554 len_tmp = strnlen(pre, 80);
555 (*client_argv_p)[index] = calloc(len_tmp+1, sizeof(char));
556 if (((*client_argv_p)[index]) == NULL ) {
557 cl_perror("cmd_str_to_argv:calloc: ");
558 return LSB_EXIT_GENERIC;
559 }
560 strncpy((*client_argv_p)[index], pre, len_tmp);
561 break;
562 }
564 (*client_argv_p)[index] = calloc(next-pre+1, sizeof(char));
565 if (((*client_argv_p)[index]) == NULL ) {
566 cl_perror("cmd_str_to_argv:calloc: ");
567 return LSB_EXIT_GENERIC;
568 }
569 strncpy((*client_argv_p)[index], pre, next-pre);
571 /* remove redundant spaces between parametes */
572 while ((char)(*next)==' ') {
573 next++;
574 }
576 pre = next;
577 if (++index >= MAX_NUM_OF_PARAMETER - 1) {
578 break;
579 }
580 } while (1==1);
582 if (index >= MAX_NUM_OF_PARAMETER - 1) {
583 for (i = 0; i < MAX_NUM_OF_PARAMETER; i++) {
584 free((*client_argv_p)[i]);
585 }
586 free(*client_argv_p);
587 return LSB_EXIT_EINVAL;
588 }
590 (*client_argv_p)[index+1] = NULL;
592 return 0;
593 }
595 static void
596 monitoredProcessDied(ProcTrack* p, int status, int signo
597 , int exitcode, int waslogged)
598 {
599 pid_t new_pid;
600 char ** client_argv = (char **) p->privatedata;
601 const char * pname = p->ops->proctype(p);
603 if (shutting_down) {
604 cl_respawn_quit(SIGTERM, NULL);
605 p->privatedata = NULL;
606 return;
607 }
609 if ( exitcode == MAGIC_EXIT_CODE) {
610 cl_log(LOG_INFO, "Don't restart the monitored program"
611 " %s [%d], since we got the magic exit code."
612 , pname, p->pid);
613 free_argv(client_argv);
614 cl_respawn_quit(SIGTERM, NULL); /* Does NOT always exit */
615 return;
616 }
618 cl_log(LOG_INFO, "process %s[%d] exited, and its exit code is %d"
619 , pname, p->pid, exitcode);
620 if ( 0 < (new_pid = run_client_as_child(client_argv)) ) {
621 cl_log(LOG_NOTICE, "restarted the monitored program, whose PID "
622 " is %d", new_pid);
623 } else {
624 cl_log(LOG_ERR, "Failed to restart the monitored program %s ,"
625 "will exit.", pname );
626 free_argv(client_argv);
627 cl_respawn_quit(SIGTERM, NULL); /* Does NOT always exit */
628 return;
629 }
631 p->privatedata = NULL;
632 }
634 static void
635 monitoredProcessRegistered(ProcTrack* p)
636 {
637 cl_log(LOG_INFO, "Child process [%s] started [ pid: %d ]."
638 , p->ops->proctype(p), p->pid);
639 }
641 static const char *
642 monitoredProcessName(ProcTrack* p)
643 {
644 char ** argv = p->privatedata;
645 return argv[0];
646 }
648 static void
649 free_argv(char ** argv)
650 {
651 int i = 0;
653 if ( argv == NULL ) {
654 return;
655 }
657 do {
658 if (argv[i] != NULL) {
659 free(argv[i++]);
660 } else {
661 free(argv);
662 return;
663 }
664 } while (1==1);
665 }