fork+exec+wait a child process added by aisa on Wed Oct 31 18:06:57 2012

/***
 ***
 *** Copyright (c) 2012 ".alyn.post." 
 ***
 *** Permission to use, copy, modify, and/or distribute this software for any
 *** purpose with or without fee is hereby granted, provided that the above
 *** copyright notice and this permission notice appear in all copies.
 ***
 *** THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 *** WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 *** MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 *** ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 *** WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 *** ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 *** OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 ***
 ***/

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include "forkexecwait.h"

extern char *__progname;

/**
 **
 ** global defines
 **
 **/

/*
 * The number of child processes.  Used for EINTR handling, where
 * we'll silently restart NPROC + 1 times before reporting EINTR
 * problems.  This allows SIGCHLD to be delivered for every
 * outstanding child, all of which restart the syscall.  Additional
 * signals could conceivably be delivered at the same time, but
 * that's worth letting the user know about.
 *
 */
#define NPROC 1

/*
 * For transient errors, the number of times to retry a syscall.
 * the retry count is also used as the number of seconds to wait
 * between retries.  A high count here might introduce unacceptable
 * delay.
 */
#define RETRY_SYSCALL 3

/**
 **
 ** global variables
 **
 **/
struct global
{
  int loglevel,
      selfpipe[2];
  struct sigaction sig_restore[_NSIG];
  sigjmp_buf env;
};

struct global _g, *g = &_g;


/*
 * fmt |o| into |s| in radix |r|.
 * if |w| is greater than the number
 * of characters written to |s|,
 * fill the remaining space in |s|
 * with |ch|.
 *
 * return the number of characters
 * written to |s|.
 *
 * if |s| is null, return the number
 * of characters required to fmt |o|.
 */
static size_t
fmtbase(char *s, size_t o, unsigned r, size_t w, int ch)
{
  int n;
  size_t l, d;
  size_t n0=o, r0=r;

  l=(size_t)1U;
  while(n0>=r0) {
    ++l;
    n0/=r0;
  }

  d=(size_t)0U;
  if (l sizeof(_loglevel) / sizeof(_loglevel[0]))
    loglevel = sizeof(_loglevel) / sizeof(_loglevel[0]) - 1;

  pid = getpid();

  v->iov_base = __progname;
  v->iov_len = strlen(v->iov_base);
  ++v;
  v->iov_base = "[";
  v->iov_len = strlen(v->iov_base);
  ++v;
  v->iov_base = &fmt[0];
  v->iov_len = fmtuint(&fmt[0], pid);
  ++v;
  v->iov_base = "]: ";
  v->iov_len = strlen(v->iov_base);
  ++v;

  v->iov_base = (char*)_loglevel[loglevel];
  v->iov_len = strlen(v->iov_base);
  ++v;
  v->iov_base = ": ";
  v->iov_len = strlen(v->iov_base);
  ++v;

  v->iov_base = (char*)syscall;
  v->iov_len = strlen(v->iov_base);
  ++v;
  v->iov_base = ": ";
  v->iov_len = strlen(v->iov_base);
  ++v;

  return v;
}

static struct iovec*
_log_postamble(struct iovec *v, char *fmt, int errnum)
{
  if (errnum) {
    v->iov_base = ": ";
    v->iov_len = strlen(v->iov_base);
    ++v;
    v->iov_base = strerror(errnum);
    v->iov_len = strlen(v->iov_base);
    ++v;
    v->iov_base = " (";
    v->iov_len = strlen(v->iov_base);
    ++v;
    v->iov_base = &fmt[0];
    v->iov_len = fmtuint(&fmt[0], errnum);
    ++v;
    v->iov_base = ")";
    v->iov_len = strlen(v->iov_base);
    ++v;
  }

  v->iov_base = "\n";
  v->iov_len = strlen(v->iov_base);
  ++v;

  return v;
}

static void
_log(int loglevel,
     const char *syscall,
     int errnum,
     const char *msg,
     const char *s0,
     const char *s1,
     const char *s2,
     const char *s3)
{
  struct iovec _v[IOV_MAX], *v = &_v[0], *i = &_v[0];
  char fmtpid[FMT_UINT],
       fmterrnum[FMT_UINT];
  int saved_errno;

  if (loglevel > g->loglevel) return;

  i = _log_preamble(i, &fmtpid[0], loglevel, syscall);

  i->iov_base = (char*)msg;
  i->iov_len = strlen(i->iov_base);
  ++i;

  if (s0) {
    i->iov_base = (char*)s0;
    i->iov_len = strlen(i->iov_base);
    ++i;
  }
  if (s1) {
    i->iov_base = (char*)s1;
    i->iov_len = strlen(i->iov_base);
    ++i;
  }
  if (s2) {
    i->iov_base = (char*)s2;
    i->iov_len = strlen(i->iov_base);
    ++i;
  }
  if (s3) {
    i->iov_base = (char*)s3;
    i->iov_len = strlen(i->iov_base);
    ++i;
  }

  i = _log_postamble(i, &fmterrnum[0], errnum);

  saved_errno = errno;
  /* XXX: should we write to fd1 if we get a fd-specific error here? */
  writev(2, v, i - v);
  errno = saved_errno;
}

static void
_log_nomem(int loglevel,
           const char *syscall,
           int errnum,
           const char *msg,
           int fd,
           size_t bytes)
{
  char fmtbytes[FMT_UINT],
       fmtfd[FMT_UINT];

  fmtbytes[fmtuint(&fmtbytes[0], bytes)] = '\0';

  if (-1 == fd) {
      _log(loglevel,
           syscall,
           errnum,
           msg,
           &fmtbytes[0],
           1 == bytes ? " byte" : "bytes",
           0,
           0);
  } else {
      fmtfd[fmtuint(&fmtfd[0], fd)] = '\0';

      _log(loglevel,
           syscall,
           errnum,
           msg,
           &fmtbytes[0],
           1 == bytes ? " byte" : "bytes",
           " to pipe file descriptor ",
           &fmtfd[0]);
  }
}

static void
_log1(int loglevel,
      const char *syscall,
      int errnum,
      const char *msg,
      const char *s0)
{
  _log(loglevel, syscall, errnum, msg, s0, 0, 0, 0);
}

static void
_log0(int loglevel, const char *syscall, int errnum, const char *msg)
{
  _log1(loglevel, syscall, errnum, msg, 0);
}


/**
 **
 ** error handling
 **
 **/
__attribute__((noreturn)) static void
die()
{
  exit(EXIT_FAILURE);
}

/*
 * A version of die() that calls _exit, rather than exit.  Used for
 * failures that can happen in the child process.
 */
__attribute__((noreturn)) static void
_die()
{
  _exit(EXIT_FAILURE);
}

#if 0 /*XXX*/
__attribute__((noreturn)) static void
_die_nomem(int loglevel,
           const char *syscall,
           int errnum,
           const char *msg,
           int fd,
           size_t bytes)
{
  _log_nomem(loglevel, syscall, errnum, msg, fd, bytes);
  _die();
}
#endif


/*
 * low/no memory error handlers: used in signal handlers and to
 * report memory allocation errors.
 */
__attribute__((noreturn)) static void
_die_malloc(int errnum, size_t bytes)
{
  _log_nomem(LOG_FATAL,
             "malloc",
             errnum,
             "out of memory allocating",
             -1,
             bytes);
  _die();
}

_log_readpipe(int loglevel, int errnum, int fd, size_t bytes)
{
  _log_nomem(loglevel, "read", errnum, "cannot read", fd, bytes);
}

__attribute__((noreturn)) static void
die_readpipe(int errnum, int fd, size_t bytes)
{
  _log_readpipe(LOG_FATAL, errnum, fd, bytes);
  die();
}

_log_writepipe(int loglevel, int errnum, int fd, size_t bytes)
{
  _log_nomem(loglevel, "write", errnum, "cannot write", fd, bytes);
}

__attribute__((noreturn)) static void
die_writepipe(int errnum, int fd, size_t bytes)
{
  _log_writepipe(LOG_FATAL, errnum, fd, bytes);
  die();
}

__attribute__((noreturn)) static void
_die_writepipe(int errnum, int fd, size_t bytes)
{
  _log_writepipe(LOG_FATAL, errnum, fd, bytes);
  _die();
}

__attribute__((noreturn)) static void
die_readpipehang(int fd)
{
  _log_nomem(LOG_FATAL, "read", 0, "read", fd, 0);
  die();
}

__attribute__((noreturn)) static void
_die_writepipehang(int fd)
{
  _log_nomem(LOG_FATAL, "write", 0, "wrote", fd, 0);
  _die();
}

__attribute__((noreturn)) static void
die_readbaddata(int fd, size_t bytes)
{
  _log_nomem(LOG_FATAL, "read", 0, "read unexpected data", fd, bytes);
  die();
}

__attribute__((noreturn)) static void
die_overreadpipe(int fd, size_t bytes)
{
  _log_nomem(LOG_FATAL, "read", 0, "over read", fd, bytes);
  die();
}

__attribute__((noreturn)) static void
_die_overwritepipe(int fd, size_t bytes)
{
  _log_nomem(LOG_FATAL, "write", 0, "over write", fd, bytes);
	_die();
}

__attribute__((noreturn)) static void
die_readsignal(int fd)
{
  _log_nomem(LOG_FATAL, "read", 0, "polling self-pipe for read", fd, 0);
	die();
}


/*
 * regular error handlers.  Uses buffered i/o and provides more
 * flexibility in error reporting.
 */

static void
_log_sigaction(int loglevel, int errnum, int signum)
{
  _log1(loglevel,
        "sigaction",
        errnum,
        "cannot set handler for ",
        strsignal(signum));
}

__attribute__((noreturn)) static void
die_sigaction(int errnum, int signum)
{
  _log_sigaction(LOG_FATAL, errnum, signum);
  die();
}

/* called in child process, use _exit */
__attribute__((noreturn)) static void
_die_sigaction(int errnum, int signum)
{
  _log_sigaction(LOG_FATAL, errnum, signum);
  _die();
}

static void
_log_close(int loglevel, int errnum, int fd)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], fd)] = '\0';
  _log1(loglevel, "close", errnum, "cannot close fd ", &fmt[0]);
}

__attribute__((noreturn)) static void
die_close(int errnum, int fd)
{
  _log_close(LOG_FATAL, errnum, fd);
  die();
}

/* called in child process, use _exit */
__attribute__((noreturn)) static void
_die_close(int errnum, int fd)
{
  _log_close(LOG_FATAL, errnum, fd);
  _die();
}

static void
_log_coe(int loglevel, int errnum, int fd)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], fd)] = '\0';
  _log1(loglevel,
        "fcntl",
        errnum,
        "cannot set close-on-exec flag for fd ",
        &fmt[0]);
}

__attribute__((noreturn)) static void
die_coe(int errnum, int fd)
{
  _log_coe(LOG_FATAL, errnum, fd);
  die();
}

static void
_log_dup2(int loglevel, int errnum, int fd)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], fd)] = '\0';
  _log1(LOG_FATAL, "dup2", errnum, "cannot duplicated fd ", &fmt[0]);
}

/* called in child process, use _exit */
__attribute__((noreturn)) static void
_die_dup2(int errnum, int fd)
{
  _log_dup2(LOG_FATAL, errnum, fd);
  _die();
}

static void
_log_execve(int loglevel, int errnum, const char *file)
{
  _log1(loglevel, "execve", errnum, "cannot exec child process ", file);
}

/* called in child process, use _exit */
__attribute__((noreturn)) static void
_die_execve(int errnum, const char *file)
{
  _log_execve(LOG_FATAL, errnum, file);
  _die();
}

static void
_log_fork(int loglevel, int errnum, const char *file)
{
  _log1(loglevel, "fork", errnum, "cannot fork for child process ", file);
}

__attribute__((noreturn)) static void
die_fork(int errnum, const char *file)
{
  _log_fork(LOG_FATAL, errnum, file);
  die();
}

static void
_log_noblock(int loglevel, int errnum, int fd)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], fd)] = '\0';
  _log1(loglevel,
        "fcntl",
        errnum,
        "cannot set non-blocking mode for fd ",
        &fmt[0]);
}

__attribute__((noreturn)) static void
die_noblock(int errnum, int fd)
{
  _log_noblock(LOG_FATAL, errnum, fd);
  die();
}

static void
_log_pipe(int loglevel, int errnum)
{
  _log0(loglevel, "pipe", errnum, "cannot create pipe");
}

__attribute__((noreturn)) static void
die_pipe(int errnum)
{
  _log_pipe(LOG_FATAL, errnum);
  die();
}

static void
_log_poll(int loglevel, int errnum)
{
  _log0(loglevel, "poll", errnum, "cannot poll");
}

__attribute__((noreturn)) static void
die_poll(int errnum)
{
  _log_poll(LOG_FATAL, errnum);
  die();
}

__attribute__((noreturn)) static void
die_polltimeout()
{
  die(LOG_FATAL, "poll", 0, "unexpected poll timeout");
}


__attribute__((noreturn)) static void
die_pollfd(int fd)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], fd)] = '\0';
  _log1(LOG_FATAL, "poll", 0, "cannot poll fd ", &fmt[0]);
  die();
}

static void
_log_waitpid(int loglevel, int errnum, pid_t pid)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], pid)] = '\0';
  _log1(loglevel, "waitpid", errnum, "cannot wait child process ", &fmt[0]);
}

__attribute__((noreturn)) static void
die_waitpid(int errnum, pid_t pid)
{
  _log_waitpid(LOG_FATAL, errnum, pid);
  die();
}

__attribute__((noreturn)) static void
die_waithang(pid_t pid)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], pid)] = '\0';
  _log1(LOG_FATAL, "waitpid", ECHILD, "unwaited child process ", &fmt[0]);
  die();
}

__attribute__((noreturn)) static void
die_waitstatus(pid_t pid)
{
  char fmt[FMT_UINT];
  fmt[fmtuint(&fmt[0], pid)] = '\0';
  _log1(LOG_FATAL,
        "waitpid",
        EINVAL,
        "unexpected status from child process ",
        &fmt[0]);
  die();
}


/**
 **
 ** I/O handling
 **
 **/
static void
pause_syscall(int retry)
{
  struct timespec _tv, *tv = &_tv;
  int r;

  tv->tv_sec = retry;
  tv->tv_nsec = 0;

restart:
  r = nanosleep(tv, tv);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      goto restart;

    default:
      break; /* something went wrong, continue anyway */
    }
  }
}

static ssize_t
fdreadpipe(int fd, void *b, size_t n)
{
  ssize_t r;
  int retry = 0;

restart:
  r = read(fd, b, n);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_readpipe(LOG_INFO, errno, fd, n);
      }
      goto restart;
    default:
      die_readpipe(errno, fd, n);
    }
  }

  return r;
}

static ssize_t
fdwritepipe(int fd, const void *b, size_t n)
{
  ssize_t r;
  int retry = 0;

restart:
  r = write(fd, b, n);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_writepipe(LOG_INFO, errno, fd, n);
      }
      goto restart;
    default:
      die_writepipe(errno, fd, n);
    }
  }

  return r;
}

static ssize_t
_fdwritepipe(int fd, const void *b, size_t n)
{
  ssize_t r;
  int retry = 0;

restart:
  r = write(fd, b, n);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_writepipe(LOG_INFO, errno, fd, n);
      }
      goto restart;
    default:
      _die_writepipe(errno, fd, n);
    }
  }

  return r;
}

static void
fdpipe(int fd[2])
{
  int r, retryEINTR = 0, retryENFILE = 0;

restart:
  r = pipe(fd);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retryEINTR > NPROC + 1) {
        _log_pipe(LOG_INFO, errno);
      }
      goto restart;
    case ENFILE:
      if (++retryENFILE <= RETRY_SYSCALL) {
        _log_pipe(LOG_WARNING, errno);
        pause_syscall(retryENFILE);
        goto restart;
      }
		  /* down seems more likely */
    default:
      die_pipe(errno);
    }
  }
}

static void
fdnoblock(int fd)
{
  int r, retry = 0;

restart:
  r = fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_noblock(LOG_INFO, errno, fd);
      }
      goto restart;
    default:
      die_noblock(errno, fd);
    }
  }
}

static void
fdcoe(int fd)
{
  int r, retry = 0;

restart:
  r = fcntl(fd, F_SETFD, FD_CLOEXEC);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (retry--) {
        _log_coe(LOG_INFO, errno, fd);
      }
      goto restart;
    default:
      die_coe(errno, fd);
    }
  }
}

static void
fdselfpipe(int fd[2])
{
  fdpipe(fd);
  fdnoblock(fd[0]);
  fdnoblock(fd[1]);
  fdcoe(fd[0]);
  fdcoe(fd[1]);
}

static void
fdclose(int fd)
{
  int r, retry = 0;

  if (-1 == fd) return;

restart:
  r = close(fd);
  if (-1 == r) {
    switch(errno) {
    case ENOMEM:
      _die_close(errno, fd);

    case EINTR:
      if (++retry > NPROC + 1) {
        _log_close(LOG_INFO, errno, fd);
      }
      goto restart;
    default:
      die_close(errno, fd);
    }
  }
}

static void
_fdclose(int fd)
{
  int r, retry = 0;

restart:
  r = close(fd);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_close(LOG_INFO, errno, fd);
      }
      goto restart;
    default:
      _die_close(errno, fd);
    }
  }
}

static void
fdclosepipe(int fd[2])
{
  fdclose(fd[0]); fd[0] = -1;
  fdclose(fd[1]); fd[1] = -1;
}

static void
_fdclosepipe(int fd[2])
{
  _fdclose(fd[0]); fd[0] = -1;
  _fdclose(fd[1]); fd[1] = -1;
}

/*
 * it is possible here for a failure to occur when stderr is closed.
 */
static void
_fdmove(int oldfd, int newfd)
{
  int r, retry = 0;

restart:
  r = dup2(oldfd, newfd);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_dup2(LOG_INFO, errno, oldfd);
      }
      goto restart;
    default:
      _die_dup2(errno, oldfd);
    }
  }

  fdclose(oldfd);
}


/**
 **
 ** string library
 **
 **/

/*
 * if buf is non-null and alloc is non-zero, we have allocated the
 * memory from the heap.  if alloc is zero, buf points to static and
 * possibly read-only memory and should not be freed.
 */
struct string
{
  size_t size,
         alloc;
  char *buf;
};

static void
string_init(struct string *s)
{
  s->size = 0;
  s->alloc = 0;
  s->buf = (char*)0;
}

static void
string_init_static(struct string *s, char *b, size_t n)
{
  s->size = n;
  s->alloc = 0;
  s->buf = b;
}

static void
string_init_static0(struct string *s, char *b)
{
  string_init_static(s, b, strlen(b));
}

static void
string_realloc(struct string *s, size_t alloc)
{
  char *r;
  size_t round;

  round = (alloc + 7) & -8;
  r = realloc(s->buf, round);
  if (!r) {
    round = alloc;
    r = realloc(s->buf, round);
    if (!r) _die_malloc(errno, round);
  }

  s->buf = r;
  s->alloc = alloc;
}

static void
string_fini(struct string *s)
{
  if (s->buf && s->alloc) {
    free(s->buf);
    /* s->size = 0; */
    s->alloc = 0;
    s->buf = (char*)0;
  }
}

static void
string_free(struct string *s)
{
  if (s) free(s);
}

static void
string_append(struct string *s, char *b, size_t n)
{
  size_t alloc;
  char *sbuf;

  /*
   * avoid allocating memory for a static buffer if we're
   * not appending data.  Avoid having realloc free our
   * memory if the total allocation size is zero.
   *
  if (!n) return;

  /* add room for nul pointer */
  alloc = s->size + n + 1;

  /* copy the static buffer into newly allocated memory. */
  if (!s->alloc) {
    sbuf = s->buf;
    s->buf = (char*)0;
    string_realloc(s, alloc);
    memcpy(s->buf, sbuf, s->size);
  } else if (alloc > s->alloc) {
    string_realloc(s, alloc);
  }

  memcpy(s->buf + s->size, b, n);
  s->size += n;
  s->buf[s->size] = '\0';
}

static void
string_append0(struct string *s, char *b)
{
  string_append(s, b, strlen(b));
}

static void
string_seek(struct string *s, size_t n)
{
  if (s->alloc) {
    memmove(&s->buf[0], &s->buf[n], s->size - n);
  } else {
    s->buf += n;
  }
  s->size -= n;
}


/**
 **
 ** signal handling
 **
 **/
static void
signal_handle(int signum, void (*handler)(int))
{
  struct sigaction _sa, *sa = &_sa;
  int r, retry = 0;

  sa->sa_handler = handler;
  sa->sa_flags = SA_RESTART;

  sigemptyset(&sa->sa_mask);

restart:
  r = sigaction(signum, sa, &g->sig_restore[signum]);
  if (-1 == r) {
    switch(errno) {
    case ENOMEM:
      _die_sigaction(errno, signum);

    case EINTR:
      if (++retry > NPROC + 1) {
        _log_sigaction(LOG_INFO, errno, signum);
      }
      goto restart;
    default:
      die_sigaction(errno, signum);
    }
  }
}

/*
 * sigchld handler.
 *
 * We're careful to call _exit here, on failure.  Further
 * we limit ourselves to signal-safe syscalls.
 */
static void
_trampoline(int signum)
{
  int saved_errno = errno;
  ssize_t r;

  char ch = (char)(unsigned char)(unsigned)signum;
  r = _fdwritepipe(g->selfpipe[1], &ch, 1);
  switch(r) {
  case 0:
    _die_writepipehang(g->selfpipe[1]);

  case 1:
    break; /* success */

  default: 
    _die_overwritepipe(g->selfpipe[1], r - 1);
  }

  errno = saved_errno;
}

static void
sigchld_trampoline()
{
  signal_handle(SIGCHLD, _trampoline);
}

static void
sigpipe_ignore()
{
  signal_handle(SIGPIPE, SIG_IGN);
}

static void
signal_restore(int signum)
{
  int r, retry = 0;

restart:
  r = sigaction(signum, &g->sig_restore[signum], 0);
  if (-1 == r) {
    switch(errno) {
    case ENOMEM:
      _die_sigaction(errno, SIGCHLD);

    case EINTR:
      if (++retry > NPROC + 1) {
        _log_sigaction(LOG_INFO, errno, SIGCHLD);
      }
      goto restart;
    default:
      die_sigaction(errno, SIGCHLD);
    }
  }
}

/*
 * on failure, call _exit rather that exit.
 */
static void
_signal_restore(int signum)
{
  int r, retry = 0;

restart:
  r = sigaction(signum, &g->sig_restore[signum], 0);
  if (-1 == r) {
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_sigaction(LOG_INFO, errno, SIGCHLD);
      }
      goto restart;
    default:
      _die_sigaction(errno, SIGCHLD);
    }
  }
}


/**
 **
 ** process/child handling library
 **
 **/
struct proc
{
  int fd[3][2],
      core,
      status;
  pid_t pid;
  struct string stdio[3];
};

static struct proc*
proc_new()
{
  struct proc *p;
  p = malloc(sizeof(struct proc));
  if (!p) _die_malloc(errno, sizeof(struct proc));
  return p;
}

static void
proc_init(struct proc *p)
{
  fdpipe(p->fd[0]);
  fdpipe(p->fd[1]);
  fdpipe(p->fd[2]);

  p->core = 0;
  p->status = 0;
  p->pid = -1;

  string_init(&p->stdio[0]);
  string_init(&p->stdio[1]);
  string_init(&p->stdio[2]);
}

static void
proc_init_stdin(struct proc *p, char *b, size_t n)
{
  fdpipe(p->fd[0]);
  fdpipe(p->fd[1]);
  fdpipe(p->fd[2]);

  p->core = 0;
  p->status = 0;
  p->pid = -1;

  string_init_static(&p->stdio[0], b, n);
  string_init(&p->stdio[1]);
  string_init(&p->stdio[2]);
}

static void
proc_init_stdin0(struct proc *p, char *b)
{
  proc_init_stdin(p, b, strlen(b));
}

/* internal routine: doesn't accept a proc object. */
static void
proc_readpipe(int fd, struct string *s)
{
  char _b[PIPE_BUF], *b = &_b[0];
  ssize_t r;

  r = fdreadpipe(fd, b, sizeof(_b) / sizeof(_b[0]));
  string_append(s, b, r);
}

/* internal routine: doesn't accept a proc object. */
static size_t
proc_writepipe(int fd, struct string *s)
{
  ssize_t r;

  r = fdwritepipe(fd, &s->buf[0], s->size);
  string_seek(s, r);
  return s->size;
}

static void
proc_wait(struct proc *p)
{
  int r, retry = 0, wstatus, status, core;

restart:
  r = waitpid(p->pid, &wstatus, WNOHANG);
  switch(r) {
  case -1:
    switch(errno) {
    case EINTR:
      if (retry--) {
        _log_waitpid(LOG_INFO, errno, p->pid);
      }
      goto restart;
    default:
      die_waitpid(errno, p->pid);
    }

  case 0:
    die_waithang(p->pid);

  default:
    if (WIFEXITED(wstatus)) {
      core = 0;
      status = WEXITSTATUS(wstatus);
    } else if (WIFSIGNALED(wstatus)) {
      core = WCOREDUMP(wstatus);
      status = 0x80 + WTERMSIG(wstatus);
    } else {
      die_waitstatus(p->pid);
    }

    p->core = core;
    p->status = status;
    p->pid = -1;
  }
}

static void
proc_readsignal(struct proc *p, int fd)
{
  char _b[PIPE_BUF], *b = &_b[0];
  ssize_t r;
  int signum;

  r = fdreadpipe(fd, b, sizeof(_b) / sizeof(_b[0]));
  switch(r) {
  case 0:
    die_readpipehang(fd);

  case 1:
    signum = (int)(char)(unsigned char)b[0];

    switch(signum) {
    case SIGCHLD:
      proc_wait(p);
      break;

    default:
      die_readbaddata(fd, r);
    }
    break;

  default: 
    die_overreadpipe(fd, r - 1);
  }
}

static void
proc_poll(struct proc *p, int selfpipe)
{
  struct pollfd fds[4];
  int i, r, retry = 0, nfds = sizeof(fds) / sizeof(fds[0]) - 1;
  size_t remain;

  fds[0].fd = p->fd[0][1];
  fds[1].fd = p->fd[1][0];
  fds[2].fd = p->fd[2][0];
  fds[3].fd = selfpipe;

  fds[0].events = POLLOUT;
  fds[1].events = POLLIN;
  fds[2].events = POLLIN;
  fds[3].events = POLLIN;

  if (!p->stdio[0].size) {
    fdclose(p->fd[0][1]); p->fd[0][1] = -1;
    fds[0].fd = -1;
    --nfds;
  }

restart:
  r = poll(&fds[0], sizeof(fds) / sizeof(fds[0]), INFTIM);
  switch(r) {
  case -1:
    switch(errno) {
    case EINTR:
      if (++retry > NPROC + 1) {
        _log_poll(LOG_INFO, errno);
      }
      goto restart;
    default:
      die_poll(errno);
    }

  case 0:
    die_polltimeout();

  default:
    for(i = 0; i < sizeof(fds) / sizeof(fds[0]); ++i) {
      if (-1 == fds[i].fd) {
        continue;
      }

      if (fds[i].revents & (POLLERR | POLLNVAL)) {
        die_pollfd(fds[i].fd);
      }

      switch(i) {
      case 0: /* stdin */
        if (fds[i].revents & POLLOUT) {
          remain = proc_writepipe(fds[i].fd, &p->stdio[i]);
        }

        /*
         * if there is nothing else to write, or we got a hangup on
         * the other end, close our pipe.
         */
        if (!remain || fds[i].revents & POLLHUP) {
          fdclose(p->fd[i][1]); p->fd[i][1] = -1;
          fds[i].fd = -1;
          --nfds;
        }
        break;

      case 1: /* stdout */
      case 2: /* stderr */
        if (fds[i].revents & POLLIN) {
          proc_readpipe(fds[i].fd, &p->stdio[i]);
        }

        if (fds[i].revents & POLLHUP) {
          fdclose(p->fd[i][0]); p->fd[i][0] = -1;
          fds[i].fd = -1;
          --nfds;
        }
        break;

      case 3: /* self pipe */
        if (fds[i].revents & POLLIN) {
          proc_readsignal(p, fds[i].fd);
        }

        if (fds[i].revents & POLLHUP) {
          die_readsignal(fds[i].fd);
        }
        break;
      }
    }
  }
  if (nfds) goto restart;
}

static void
_proc_fini_child(struct proc *p)
{
  /* close unneeded file descriptors */
  _fdclose(p->fd[0][1]); p->fd[0][1] = -1;
  _fdclose(p->fd[1][0]); p->fd[1][0] = -1;
  _fdclose(p->fd[2][0]); p->fd[2][0] = -1;
}

static void
proc_fini_parent(struct proc *p)
{
  /* close unneeded file descriptors */
  fdclose(p->fd[0][0]); p->fd[0][0] = -1;
  fdclose(p->fd[1][1]); p->fd[1][1] = -1;
  fdclose(p->fd[2][1]); p->fd[2][1] = -1;
}

static void
proc_fini(struct proc *p)
{
  /* XXX: do I ever get here when these aren't already closed? */
  fdclose(p->fd[0][0]); p->fd[0][0] = -1;
  fdclose(p->fd[0][1]); p->fd[0][1] = -1;
  fdclose(p->fd[1][0]); p->fd[1][0] = -1;
  fdclose(p->fd[1][1]); p->fd[1][1] = -1;
  fdclose(p->fd[2][0]); p->fd[2][0] = -1;
  fdclose(p->fd[2][1]); p->fd[2][1] = -1;

  string_fini(&p->stdio[0]);
  string_fini(&p->stdio[1]);
  string_fini(&p->stdio[2]);
}

static void
_proc_fini(struct proc *p)
{
  /* XXX: do I ever get here when these aren't already closed? */
  _fdclose(p->fd[0][0]); p->fd[0][0] = -1;
  _fdclose(p->fd[0][1]); p->fd[0][1] = -1;
  _fdclose(p->fd[1][0]); p->fd[1][0] = -1;
  _fdclose(p->fd[1][1]); p->fd[1][1] = -1;
  _fdclose(p->fd[2][0]); p->fd[2][0] = -1;
  _fdclose(p->fd[2][1]); p->fd[2][1] = -1;

  string_fini(&p->stdio[0]);
  string_fini(&p->stdio[1]);
  string_fini(&p->stdio[2]);
}

static void
proc_free(struct proc *p)
{
  if (p) free(p);
}

__attribute__((noreturn)) static void
proc_child(struct proc *p, const char *file, char *const argv[])
{
  int r, retryEINTR = 0, retryENFILE = 0;

  /*
   * XXX: we cannot longjmp from here.  die and exit instead,
   *      we'll pick the error up from our pipe.
   */

  /*
   * move standard error first, to report errors through error our
   * pipe let this first call to dup2() close stdandard error, so
   * we don't find ourselves in a situation where we don't have an
   * error descriptor.
   */
  /*_fdclose(2);*/ _fdmove(p->fd[2][1], 2); p->fd[2][1] = -1;
  _fdclose(1);     _fdmove(p->fd[1][1], 1); p->fd[1][1] = -1;
  _fdclose(0);     _fdmove(p->fd[0][0], 0); p->fd[0][0] = -1;

  /* close unneeded file descriptors */
  _proc_fini_child(p);

  _signal_restore(SIGCHLD);
  _signal_restore(SIGPIPE);

restart:
  execvp(file, argv);
  switch(errno) {
  case EINTR:
    if (++retryEINTR > NPROC + 1) {
      _log_execve(LOG_INFO, errno, file);
    }
    goto restart;
  case ETXTBSY:
  case ENFILE:
    if (++retryENFILE <= RETRY_SYSCALL) {
      _log_execve(LOG_WARNING, errno, file);
      pause_syscall(retryENFILE);
      goto restart;
    }
		/* down seems more likely */
  default:
    _die_execve(errno, file);
  }
}

static void
proc_parent(struct proc *p)
{
  proc_fini_parent(p);
  proc_poll(p, g->selfpipe[0]);
}

static void
proc_forkwait(struct proc *p, const char *file, char *const argv[])
{
  int retryEINTR = 0, retryEAGAIN = 0;
  pid_t pid;

restart:
  switch(pid = fork()) {
  case -1:
    switch(errno) {
    case EINTR:
      if (++retryEINTR > NPROC + 1) {
        _log_fork(LOG_INFO, errno, file);
      }
      goto restart;
    case EAGAIN:
      if (++retryEAGAIN <= RETRY_SYSCALL) {
        _log_fork(LOG_INFO, errno, file);
        pause_syscall(retryEAGAIN);
        goto restart;
      }
		  /* down seems more likely */
    default:
      die_fork(errno, file);
    }
  case 0:
    proc_child(p, file, argv);

  default:
    p->pid = pid;
    proc_parent(p);
  }
}


/**
 **
 ** input/output marshalling
 **
 */

/* copy data from internal structures to output marshall. */
static void
output_marshall(struct output_marshall *o,
                struct proc *p)
{
  o->error = 0;
  o->msg = (char*)0;
  o->msgsize = 0;

  o->core = p->core;
  o->status = p->status;

  o->stdio[0] = p->stdio[1].buf;
  o->stdio[1] = p->stdio[2].buf;

  o->size[0] = p->stdio[1].size;
  o->size[1] = p->stdio[2].size;

  /*
   * caller is responsible for freeing memory
   */
  p->stdio[1].alloc = 0;
  p->stdio[2].alloc = 0;
}

void
forkexecwait(struct input_marshall *i, struct output_marshall *o)
{
  struct proc _p, *p = &_p;

  /* set globals */
  g->loglevel = i->loglevel;

  switch(sigsetjmp(g->env, 1)) {
  case 0:
    /* not responsible for freeing this memory */
    proc_init_stdin(p, i->stdio, i->size);

    fdselfpipe(g->selfpipe);
    sigchld_trampoline();
    sigpipe_ignore();

    proc_forkwait(p, i->file, i->argv);

    output_marshall(o, p);

    signal_restore(SIGPIPE);
    signal_restore(SIGCHLD);
    fdclosepipe(g->selfpipe);

    proc_fini(p);
    break;

  case 1: /* memory error */
    output_marshall(o, p);

    /*
     * XXX: on error, we can no longer longjmp from here.
     *      we may need fire-and-forget versions of these.
     *      perhaps a global variable for error discipline.
     */
    _signal_restore(SIGPIPE);
    _signal_restore(SIGCHLD);
    _fdclosepipe(g->selfpipe);

    _proc_fini(p);
    _exit(EXIT_FAILURE);

  case 2: /* exception */
  default:
    output_marshall(o, p);

    /*
     * XXX: on error, we can no longer longjmp from here.
     *      we may need fire-and-forget versions of these.
     *      perhaps a global variable for error discipline.
     */
    signal_restore(SIGPIPE);
    signal_restore(SIGCHLD);
    fdclosepipe(g->selfpipe);

    proc_fini(p);
    exit(EXIT_FAILURE);
  }
}