/* 
logpp (Log PreProcessor) 0.15 - input.c
Copyright (C) 2006-2008 Risto Vaarandi

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

#include "common.h"
#include "logpp.h"

/* open_src_file() opens an input source file 'file', moving the file 
   offset to the end of the file if 'pos' is -1 or to 'pos' is it is 
   non-negative. The function returns a file descriptor of the opened
   file on success and -1 otherwise */

int
open_src_file(struct src *file, off_t pos)
{
  struct stat fileinfo;
  char is_stdin;
  int fd, i;
  off_t p;

  /* at this point, the source file status is unknown */
  file->status = SDSTAT_UNKN;

  /* if REOPENINT was set with a command line option, get the current time
     and assign it to the 'tloa' (time of last open) field of the 'file' */
  if (REOPENINT) file->tloa = get_time();

  /* check if 'file' is standard input */
  is_stdin = (file->name[0] == '-' && !file->name[1]);

  /* if 'file' is not standard input, obtain source status info and if 
     stat() failed, set the 'fd' field of the 'file' to -1 and return 
     (if stat() failed because the file does not exist, set the 'status' 
     field of 'file' to SDSTAT_NONE and don't log an error message) */
  while (!is_stdin && stat(file->name, &fileinfo) == -1) {
    if (errno == EINTR) continue;
    if (errno == ENOENT) file->status = SDSTAT_NONE;
    else
      log_msg(LOG_ERR, "stat(%s) error (%s)", file->name, strerror(errno));
    file->fd = -1;
    return -1;
  }

  if (is_stdin) {
    /* source is standard input, duplicate it; if duplication
       failed, set the 'fd' field of 'file' to -1 and return -1 */
    if (DAEMON) {
      log_msg(LOG_ERR, "Can't open standard input when running as a daemon");
      file->fd = -1;
      return -1;
    }
    for (;;) {
      fd = dup(0);
      if (fd != -1) break;
      if (errno == EINTR) continue;
      log_msg(LOG_ERR, "dup() error (%s)", strerror(errno));
      file->fd = -1;
      return -1;
    }
    /* set the 'status' field of 'file' to SDSTAT_FIFO (fifo),
       i.e., treat standard input as a FIFO-like file in the future */
    file->status = SDSTAT_FIFO;
    /* set the 'fd' field of 'file' to the file descriptor */
    file->fd = fd;

  } else if (S_ISREG(fileinfo.st_mode)) {
    /* source is a regular file, open it with the O_RDONLY flag; if open
       failed, set the 'fd' field of 'file' to -1 and return -1 */
    for (;;) {
      fd = open(file->name, O_RDONLY);
      if (fd != -1) break;
      if (errno == EINTR) continue;
      log_msg(LOG_ERR, "open(%s) error (%s)", file->name, strerror(errno));
      file->fd = -1;
      return -1;
    }
    /* move the file offset to a position given by 'pos' */
    if (pos == -1) p = lseek(fd, 0, SEEK_END);
      else p = lseek(fd, pos, SEEK_SET);
    /* if lseek() failed, set the 'fd' field of 'file' to -1 and return -1 */
    if (p == (off_t) -1) {
      log_msg(LOG_ERR, "lseek(%s) error (%s)", file->name, strerror(errno));
      while (close(fd) == -1 && errno == EINTR);
      file->fd = -1;
      return -1;
    }
    /* set the 'status' field of 'file' to SDSTAT_REG (regular file) */
    file->status = SDSTAT_REG;
    /* set the 'fd' field of 'file' to the file descriptor */
    file->fd = fd;
    /* set the 'dev' and 'ino' fields of 'file' to dev and inode numbers */
    file->dev = fileinfo.st_dev;
    file->ino = fileinfo.st_ino;
    /* set the 'pos' field of 'file' to the file offset position */
    file->pos = p;

  } else if (S_ISFIFO(fileinfo.st_mode)) {
    /* source is a FIFO, open it with the O_RDWR flag (just O_RDONLY will
       block the call if there are no writers on FIFO, so the process will 
       become a writer to ensure it will not be blocked); if open failed, 
       set the 'fd' field of 'file' to -1 and return -1 */
    for (;;) {
      fd = open(file->name, O_RDWR);
      if (fd != -1) break;
      if (errno == EINTR) continue;
      log_msg(LOG_ERR, "open(%s) error (%s)", file->name, strerror(errno));
      file->fd = -1;
      return -1;
    }
    /* set the 'status' field of 'file' to SDSTAT_FIFO (fifo) */
    file->status = SDSTAT_FIFO;
    /* set the 'fd' field of 'file' to the file descriptor */
    file->fd = fd;
    /* set the 'dev' and 'ino' fields of 'file' to dev and inode numbers */
    file->dev = fileinfo.st_dev;
    file->ino = fileinfo.st_ino;

  } else {
    log_msg(LOG_ERR, "%s is of unsupported type", file->name);
    file->fd = -1;
    return -1;
  }

  /* create the IO buffer of size 3*BLOCKSIZE-2 for the input source
     (before each read(2), there can be at most BLOCKSIZE-1 bytes in the
     the IO buffer and (file->iobuf_pos - file->iobuf) can be at most 
     BLOCKSIZE-1) */ 
  file->iobuf = (char *) my_malloc(sizeof(char) * (3*BLOCKSIZE - 2));
  file->iobuf_pos = file->iobuf;
  file->iobuf_len = 0;

  /* create the input buffer for the input source that holds IBUFSIZE
     last lines read from the source; for each line, allocate BLOCKSIZE+1
     bytes (the extra byte is for the terminating 0 that is needed by 
     some output methods) */
  file->lbuf = (struct line *) my_malloc(sizeof(struct line) * IBUFSIZE);

  for (i = 0; i < IBUFSIZE; ++i) {
    file->lbuf[i].bytes = (char *) my_malloc(sizeof(char) * (BLOCKSIZE + 1));
    file->lbuf[i].len = 0;
  }

  file->lbuf_pos = IBUFSIZE - 1;

  /* return the file descriptor */
  return fd;
}

/* close_src_file() closes the source file by calling close() for the 'fd'
   field of the 'file' structure and sets 'fd' to -1. The function returns
   1 if closing the file succeeded and 0 otherwise */

int
close_src_file(struct src *file)
{
  int i, ret;

  ret = 1;

  while (close(file->fd) == -1) {
    if (errno == EINTR) continue;
    log_msg(LOG_ERR, "close(%s) error (%s)", file->name, strerror(errno));
    ret = 0;
    break;
  }

  file->fd = -1;

  for (i = 0; i < IBUFSIZE; ++i) my_free(file->lbuf[i].bytes);
  my_free(file->lbuf);
  my_free(file->iobuf);

  return ret;
}

/* open_inputs() opens all input source files */

void
open_inputs(void)
{
  struct input *input;
  int i;

  for (input = INPUTLIST; input; input = input->next)
    for (i = 0; i < input->srcl_size; ++i) 
      if (open_src_file(input->srclist + i, -1) == -1)
        log_msg(LOG_ERR, "Failed to open input file %s", 
                         input->srclist[i].name);
}

/* close_inputs() closes all input source files */

void
close_inputs(void)
{
  struct input *input;
  int i;

  for (input = INPUTLIST; input; input = input->next)
    for (i = 0; i < input->srcl_size; ++i) 
      if (input->srclist[i].fd != -1 &&
          !close_src_file(input->srclist + i))
        log_msg(LOG_ERR, "Failed to close input file %s", 
                         input->srclist[i].name);
}

/* read_line_from_iobuf() reads a line (a byte sequence terminated by 
   a newline) from the IO buffer of 'file' and places it to the input 
   buffer of 'file', returning 1 if there was a line in the IO buffer 
   and 0 otherwise. 
   During buffer updates, the function keeps the data offset within the 
   first block of the buffer */

int
read_line_from_iobuf(struct src *file)
{
  char *pos, *ptr;
  size_t i, n;

  /* search for a newline in the IO buffer - since the length of a line
     is limited to BLOCKSIZE, search at most BLOCKSIZE first bytes in
     the IO buffer */
  if (file->iobuf_len < BLOCKSIZE) 
    pos = (char *) memchr(file->iobuf_pos, NEWLINE, file->iobuf_len);
  else
    pos = (char *) memchr(file->iobuf_pos, NEWLINE, BLOCKSIZE);

  if (pos) {
    /* if a newline was found, move the offset of the input buffer forward */
    file->lbuf_pos = (file->lbuf_pos + 1) % IBUFSIZE;
    n = pos - file->iobuf_pos;
    i = 0;
    /* copy bytes that precede newline to the input buffer, replacing 
       zeros with REPLACENULL characters */
    for (;;) {
      ptr = (char *) memccpy(file->lbuf[file->lbuf_pos].bytes + i,
                             file->iobuf_pos + i, 0, n - i);
      if (!ptr) break;
      *(ptr - 1) = REPLACENULL;
      i = ptr - file->lbuf[file->lbuf_pos].bytes;
    }
    /* set the terminating 0 and length for the line in input buffer */
    file->lbuf[file->lbuf_pos].bytes[n] = 0;
    file->lbuf[file->lbuf_pos].len = n;
    /* update the offset and byte counter for the IO buffer */
    file->iobuf_pos = pos + 1;
    file->iobuf_len -= n + 1;
    /* if the data offset of the IO buffer is not within the first block,
       move all bytes in the buffer to the beginning of the buffer (note
       that at this point the buffer contains less than BLOCKSIZE bytes) */
    if (file->iobuf_pos >= file->iobuf + BLOCKSIZE) {
      memcpy(file->iobuf, file->iobuf_pos, file->iobuf_len);
      file->iobuf_pos = file->iobuf; 
    }
    /* increment the counter of read lines */
    ++file->counter;
    return 1;
  }

  return 0;
}

/* read_block_from_iobuf() reads a block (BLOCKSIZE bytes) from the IO 
   buffer of 'file' and places it to the input buffer of 'file', returning 
   1 if there were enough bytes in the IO buffer and 0 otherwise.
   During buffer updates, the function keeps the data offset within the 
   first block of the buffer */

int
read_block_from_iobuf(struct src *file)
{
  char *ptr;
  size_t i;

  /* check if there are at least BLOCKSIZE bytes in the IO buffer */
  if (file->iobuf_len >= BLOCKSIZE) {
    /* if there is a block, move the offset of the input buffer forward */
    file->lbuf_pos = (file->lbuf_pos + 1) % IBUFSIZE;
    i = 0;
    /* copy BLOCKSIZE bytes to the input buffer, replacing zeros with 
       REPLACENULL characters */
    for (;;) {
      ptr = (char *) memccpy(file->lbuf[file->lbuf_pos].bytes + i,
                             file->iobuf_pos + i, 0, BLOCKSIZE - i);
      if (!ptr) break;
      *(ptr - 1) = REPLACENULL;
      i = ptr - file->lbuf[file->lbuf_pos].bytes;
    }
    /* set the terminating 0 and length for the line in input buffer */
    file->lbuf[file->lbuf_pos].bytes[BLOCKSIZE] = 0;
    file->lbuf[file->lbuf_pos].len = BLOCKSIZE;
    /* update the offset and byte counter for the IO buffer */
    file->iobuf_pos += BLOCKSIZE;
    file->iobuf_len -= BLOCKSIZE;
    /* since the data offset of the IO buffer is not within the first block,
       move all bytes in the buffer to the beginning of the buffer (note
       that at this point the buffer contains less than BLOCKSIZE bytes) */
    memcpy(file->iobuf, file->iobuf_pos, file->iobuf_len);
    file->iobuf_pos = file->iobuf; 
    /* increment the counter of read lines */
    ++file->counter;
    return 1;
  }

  return 0;
}

/* read_line() reads a line from input source 'file', returning 1 if the
   read was successful, 0 if there was no data, and -1 for IO error
   (note that after each invocation of this function there are always
   less than BLOCKSIZE bytes in the IO buffer of 'file') */

int
read_line(struct src *file)
{
  ssize_t nbytes;
  fd_set rfds;
  struct timeval timeout;
  int retval;
  off_t p;

  /* if there is a complete line in the IO buffer, read it and return
     (note that before the first invocation the IO buffer is empty, while
     before following invocations it contains less than BLOCKSIZE bytes) */
  if (read_line_from_iobuf(file)) return 1;

  /* the input source is a regular file: read(2) returns -1 for IO error
     and 0 for EOF */
  if (file->status == SDSTAT_REG) {
    nbytes = read(file->fd, file->iobuf_pos + file->iobuf_len, BLOCKSIZE); 

    if (nbytes == -1) {
      /* if an IO error occurred, file offset could have moved (note that
         according to posix this can also happen if errno == EINTR);
         learn the current position of the offset and store it */
      p = lseek(file->fd, 0, SEEK_CUR);
      if (p == (off_t) -1) {
        log_msg(LOG_ERR, "lseek(%s) error (%s)", file->name, strerror(errno));
        return -1;
      }
      file->pos = p;
      /* if read(2) was interrupted by a signal, return 0, otherwise -1 */
      if (errno == EINTR) return 0;
      log_msg(LOG_ERR, "read(%s) error (%s)", file->name, strerror(errno));
      return -1;
    } 
    else if (nbytes == 0) return 0; /* read(2) returned EOF */

    file->pos += nbytes;
    file->iobuf_len += nbytes;
  } 
  /* the input source is a FIFO: use select(2) first for checking if new
     data are available (-1 means IO error and 0 no new data), then use
     read(2) for fetching the new data (-1 means IO error and 0 that
     there are no writers on FIFO, but since we have opened the FIFO in
     RW-mode, this must be an IO error) */
  else if (file->status == SDSTAT_FIFO) {
    FD_ZERO(&rfds);
    FD_SET(file->fd, &rfds);

    timeout.tv_sec = 0;
    timeout.tv_usec = 0;

    retval = select(file->fd + 1, &rfds, 0, 0, &timeout);

    if (retval == -1) {
      if (errno == EINTR) return 0;
      log_msg(LOG_ERR, "select(%s) error (%s)", file->name, strerror(errno));
      return -1;
    }
    else if (retval == 0) return 0;

    nbytes = read(file->fd, file->iobuf_pos + file->iobuf_len, BLOCKSIZE); 

    if (nbytes == -1) {
      if (errno == EINTR) return 0;
      log_msg(LOG_ERR, "read(%s) error (%s)", file->name, strerror(errno));
      return -1;
    } 
    else if (nbytes == 0) {
      log_msg(LOG_ERR, "No writers for %s", file->name);
      return -1;
    } 

    file->iobuf_len += nbytes;
  } 

  /* Note that at this point the IO buffer contains M+N bytes, with M and
     N bytes being there before and after read(2), respectively; also note
     that M < BLOCKSIZE, N <= BLOCKSIZE, and M bytes do not contain a newline
     (otherwise read_line_from_iobuf() in the beginning of this function 
     would have been successful). This means that if read_line_from_iobuf() 
     succeeds, the IO buffer will contain less than N bytes (which is also 
     less than BLOCKSIZE); if read_block_from_iobuf() succeeds, there will 
     be less than BLOCKSIZE bytes in the IO buffer (since M+N < 2*BLOCKSIZE); 
     if both functions fail, there was less than BLOCKSIZE bytes in the IO 
     buffer initially without a newline among them. */
  if (read_line_from_iobuf(file)) return 1;
  if (read_block_from_iobuf(file)) return 1;

  return 0;
}

/* src_file_shuffled() checks if 'file' has been recreated (with a directory 
   entry having a new i-node) or has been truncated (the file size is less
   than the current file offset); the function returns 1 if 'file' needs to 
   be reopened, and 0 if there is no need for a reopen */

int
src_file_shuffled(struct src *file)
{
  struct stat fileinfo;

  /* standard input never needs reopen */
  if (file->name[0] == '-' && !file->name[1]) return 0;

  /* if stat() by filename fails, directory entry for 'file' has been
     removed or there was a system error when quering the new inode,
     therefore it is best to keep the old inode open by returning 0 */
  if (stat(file->name, &fileinfo) == -1) return 0;

  /* if there is a new inode, 'file' needs to be reopened and return 1 */
  if (file->dev != fileinfo.st_dev || file->ino != fileinfo.st_ino) return 1;

  /* if 'file' is a regular file, compare its size and the file offset,
     and if the file size is less than offset, return 1 for reopen */
  if (file->status == SDSTAT_REG && fileinfo.st_size < file->pos) return 1;

  return 0;
}
