summaryrefslogblamecommitdiff
path: root/jobmon/jobmon.c
blob: baed742cd1745aad65339335a62d488ce799a1e1 (plain) (tree)




























































































































































































































































                                                                                                                                                          
/**
 * Simple job monitor
 *
 * Usage:
 *    jobmon [-f] [-v] [jobs]
 *
 */

#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <sys/wait.h>
#include <unistd.h>

bool foreground = false;
bool use_syslog = false;
bool verbose = false;
volatile bool keep_going = true;

/* Values correspond to `rcctl check` exit codes */
enum job_status {
   job_ok=0, job_stopped=1, job_error=2
};


void
do_log (int priority, const char *msg, ...) {
   if (priority == LOG_DEBUG && ! verbose) return;

   va_list ap;
   va_start (ap, msg);

   if (use_syslog) {
      vsyslog (priority, msg, ap);
   } else {
      FILE *out = (priority == LOG_NOTICE ||
                   priority == LOG_INFO ||
                   priority == LOG_DEBUG) ? stdout : stderr;
      if (foreground) {
         char buf[4+1+2+1+2 + 1 + 2+1+2+1+2 + 1] = {0}; /* YYYY-MM-DD HH:MM:SS */
         time_t now = time (NULL);
         strftime (buf, sizeof (buf), "%Y-%m-%d %H:%M:%S", localtime (&now));
         fprintf (out, "%s %s", buf, priority == LOG_DEBUG ? "debug" : (priority == LOG_INFO ? "info" : (priority == LOG_WARNING ? "warning" : "error")));
      }
      vfprintf (out, msg, ap);
      fputc ('\n', out);
   }

   va_end (ap);
}

#define debug(m, ...) do_log (LOG_DEBUG, "[%u]: " m, __LINE__, ##__VA_ARGS__)
#define info(m, ...) do_log (LOG_INFO, "[%u]: " m, __LINE__, ##__VA_ARGS__)
#define warning(m, ...) do_log (LOG_WARNING, "[%u]: " m, __LINE__, ##__VA_ARGS__)
#define error(m, ...) do_log (LOG_ERR, "[%u]: " m, __LINE__, ##__VA_ARGS__)

/**
 * Handler for SIGINT and SIGTERM.
 */
void
term (int sig) {
   keep_going = false;
}

/**
 * Launch an `rcctl` process against a job.
 *
 * @param[in] job  name of job
 * @param[in] arg  either `"check"` or `"start"`
 * @return status of the job
 */
enum job_status
run_rcctl (char *const arg, char *const job) {
   pid_t pid = fork ();

   if (pid < 0) {
      error ("fork: %s", strerror (errno));
      return job_error;
   }

   if (pid == 0) {
      char *const args[] = { "/usr/sbin/rcctl", arg, job, NULL };

      int fd = open ("/dev/null", O_RDWR);
      if (fd >= 0) {
         dup2 (fd, 0);
         dup2 (fd, 1);
         dup2 (fd, 2);
      }

      execv (args[0], args);
      error ("exec(rcctl) failed: %s", strerror (errno));
      exit (job_error);
   }

   int status;
   pid_t w = waitpid (pid, &status, 0);
   if (w < 0 && errno == EINTR) {
      return job_ok; /* most likely have received signal to exit, so do less work */
   } else if (w <= 0) {
      error ("waitpid(%d)", pid);
      return job_error;
   } else if (WIFEXITED (status)) {
      if (WEXITSTATUS (status) >= job_ok && WEXITSTATUS (status) <= job_error) {
         return WEXITSTATUS (status);
      }
      return job_error;
   }

   return job_stopped;
}

#define check_job(job) run_rcctl ("check", (job))
#define start_job(job) run_rcctl ("start", (job))

/**
 * Print a help message and exit.
 */
void
help (int ec) {
   printf ("usage: jobmon [-f|--foreground] [-v|--verbose] [job ...]\n");
   exit (ec);
}

int
main (int argc, char *argv[]) {
   struct option options[] = {
      { "verbose",    no_argument,       NULL, 'v' },
      { "foreground", no_argument,       NULL, 'f' },
      { "help",       no_argument,       NULL, 'h' },
      { 0, 0, 0, 0 }
   };
   char **jobs;
   int job_count, i, opt;
   unsigned int sleep_interval = 10;
   unsigned int logged_every = 0;
   unsigned int log_every = 60;

   while ((opt = getopt_long (argc, argv, "vfh", options, NULL)) != -1) {
      switch (opt) {
      case 'v':
         verbose = true;
         sleep_interval = 1;
         log_every = 4;
         break;
      case 'f':
         foreground = true;
         break;
      case 'h':
         help (EXIT_SUCCESS);
         break;
      default:
         help (EXIT_FAILURE);
      }
   }

   jobs = calloc (argc - optind, sizeof (char *));
   if (jobs == NULL) {
      error ("unable to allocate memory: %s", strerror (errno));
      exit (EXIT_FAILURE);
   }

   for (i = optind, job_count = 0; i < argc; i++) {
      jobs[job_count++] = argv[i];
   }

   if (job_count == 0) {
      info ("no jobs");
      exit (0);
   }

   if (! foreground) {
      if (daemon (0, 0) < 0) {
         error ("failed to daemonize: %s", strerror (errno));
         exit (EXIT_FAILURE);
      }
      openlog ("jobmon", 0, LOG_DAEMON);
      use_syslog = true;
   }

   struct sigaction sa = { 0 };
   sa.sa_handler = term;
   if (sigaction (SIGINT, &sa, NULL) < 0 ||
       sigaction (SIGTERM, &sa, NULL) < 0) {
      warning ("failed to install signal handler: %s", strerror (errno));
   }

   if (unveil ("/usr/sbin/rcctl", "x") < 0 ||
       unveil ("/dev/null", "rw") < 0 ||
       unveil (NULL, NULL)) {
      error ("unveil failed: %s", strerror (errno));
      exit (EXIT_FAILURE);
   }
   if (pledge ("proc exec stdio", NULL) < 0) {
      error ("pledge failed: %s", strerror (errno));
      exit (EXIT_FAILURE);
   }

   while (keep_going) {
      char *ok_jobs[job_count];
      int ok = 0;
      size_t bytes = 0;

      for (int j = 0; j < job_count && keep_going; j++) {
         enum job_status job_status = check_job (jobs[j]);

         if (job_status == job_ok) {
            ok_jobs[ok++] = jobs[j];
            bytes += strlen (jobs[j]) + 1;
         } else if (job_status == job_stopped) {
            info ("restarting %s", jobs[j]);
            start_job (jobs[j]);
         } else {
            info ("invalid job, removing from list %s", jobs[j]);
            if (j < job_count - 1) {
               memmove (&jobs[j], &jobs[j + 1], sizeof (char *) * (job_count - j - 1));
            }
            j--;
            if (--job_count == 0) { keep_going = false; goto out; }
         }
      }

      if ((logged_every++ % log_every) == 0 && ok) {
         /* Reduce logging when all is well */
         char *buf = calloc (bytes, sizeof (char));
         while (--ok >= 0) {
            strlcat (buf, ok_jobs[ok], bytes);
            if (ok) strlcat (buf, ",", bytes);
         }
         info ("%s ok", buf);
         free (buf);
      }

      sleep (sleep_interval);
     out:;
   }

   free (jobs);
   info ("exiting");

   if (use_syslog) {
      closelog ();
   }

   return 0;
}