/* $Id: filestore_input.c 736 2006-06-13 18:45:39Z jim $
   teebu - An archiving tool
   Copyright (C) 2006 Jim Farrand

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 2 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program; if not, write to the Free Software Foundation, Inc., 51
   Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <regex.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <utime.h>

#include "baseio.h"
#include "datapairstore.h"
#include "hashtable.h"
#include "logging.h"
#include "md5sumio.h"
#include "nullio.h"
#include "sha1sumio.h"
#include "terminatedint.h"
#include "uidcache.h"
#include "unixio.h"

#include "filestore_input.h"
#include "filestore_common.h"

typedef enum
{ IN_MODE_VERIFY, IN_MODE_CHECK, IN_MODE_EXTRACT, IN_MODE_IDENTIFY } input_mode_t;

/* This struct holds the state needed whilst reading an input file. */
typedef struct
{
  filestore_in_t      context_fsi;
  filestore_id_t      context_id;
  filestore_stats_t * context_fsstats;
  input_mode_t        context_input_mode;
  char                context_filetype;
  path_t              context_path;
  path_t              context_pfix;
  mode_t              context_file_mode;
  uid_t               context_file_owner;
  gid_t               context_file_group;
  size_t              context_file_size;
  time_t              context_creation_time, context_modification_time;
  bool                context_need_md5, context_need_sha1;
} input_context_t;

struct filestore_in
{
  dps_in_t    in_dps;
  iostat_t    *in_stats;
  char        *in_md5sum;
  char        *in_sha1sum;
  uidcache_t  in_uidcache;
  const char  *in_gpg_passphrase_path;
  list_t      in_include_filters;
};

void init_filestore_in_params(filestore_in_params_t *params, in_stream_t in_stream)
{
  params->fip_in_stream = in_stream;
  params->fip_filestore_stats = NULL;
  params->fip_iostats = NULL;
  params->fip_gpg_passphrase = NULL;
  params->fip_gzip = false;
  params->fip_bzip2 = false;
  params->fip_sha1sum = false;
  params->fip_md5sum = false;
  params->fip_ignore_header = false;
  params->fip_regexs = NULL;
}

void
release_filestore_in (filestore_in_t fsi)
{
  assert (fsi);
  release_dps_in (fsi->in_dps);
  if (fsi->in_include_filters)
    release_list (fsi->in_include_filters);
  if (fsi->in_md5sum)
    free (fsi->in_md5sum);
  if (fsi->in_sha1sum)
    free (fsi->in_sha1sum);
  release_uidcache (fsi->in_uidcache);
  free (fsi);
}

filestore_in_t
open_filestore_in (filestore_in_params_t *params)
{
  assert (params->fip_in_stream);

  filestore_in_t fsi = malloc (sizeof (struct filestore_in));
  if (!fsi)
    return NULL;                // out of memory!

  // Initialize so that release_filestore_in can be use if there is an error
  fsi->in_include_filters = NULL;
  fsi->in_md5sum = NULL;
  fsi->in_sha1sum = NULL;
  fsi->in_uidcache = NULL;

  fsi->in_stats = params->fip_iostats;

  if (params->fip_regexs)
    {
      fsi->in_include_filters = copy_list (params->fip_regexs);
      if (!fsi->in_include_filters)
        {
          release_filestore_in (fsi);
          return NULL;
        }
    }

  if (params->fip_md5sum)
    {
      // Allocate the buffer that we store md5 checksum in
      fsi->in_md5sum = malloc (MD5_SUM_LEN);
      if (!fsi->in_md5sum)
        {
          release_filestore_in (fsi);
          return NULL;
        }
    }
  else
    {
      // Not doing md5s
      fsi->in_md5sum = NULL;
    }

  if (params->fip_sha1sum)
    {
      // Allocate the buffer that we store sha1 checksum in
      fsi->in_sha1sum = malloc (SHA1_SUM_LEN);
      if (!fsi->in_sha1sum)
        {
          release_filestore_in (fsi);
          return NULL;
        }
    }
  else
    {
      // Not doing sha1s
      fsi->in_sha1sum = NULL;
    }

  fsi->in_gpg_passphrase_path = params->fip_gpg_passphrase;

  fsi->in_uidcache = create_uidcache() ;
  if (!fsi->in_uidcache)
    {
      release_filestore_in (fsi);
      LOG (ERROR, "Couldn't open create uidcache");
      return NULL;              // couldn't open datapairstore!
    }

  fsi->in_dps = open_dps_in (params->fip_in_stream, MAX_TAG_LEN, fsi->in_stats);
  if (!fsi->in_dps)
    {
      release_filestore_in (fsi);
      LOG (ERROR, "Couldn't open data pair store");
      return NULL;              // couldn't open datapairstore!
    }

  if (params->fip_ignore_header)
    {
      dps_skip_chunk (fsi->in_dps);
      if (!enable_input_filters(fsi->in_dps, params->fip_gzip, params->fip_bzip2,
                           params->fip_gpg_passphrase ? true : false, params->fip_gpg_passphrase))
        {
          release_filestore_in (fsi);
          LOG (ERROR, "Couldn't enable filters");
          return NULL;              // couldn't open datapairstore!
        }

      return fsi;
    }

  bool start_ok = true;
  input_err_t err = INPUT_OK;
  if (!dps_input_pair (fsi->in_dps, &err))
    {
      LOGF (ERROR, "Couldn't find start of archive: %s",
            INPUT_ERR_NAME (err));
      STAT_INCR (params->fip_filestore_stats, STAT_ARCHIVE_INPUT_ERROR);
      start_ok = false;
    }
  else
    {
      if (0 !=
          strcmp (TAG_TEXT (TEEBU_FILESTORE_TAG),
                  dps_input_pair_name (fsi->in_dps)))
        {
          LOG (ERROR, "Couldn't find start of archive (wrong data)");
          STAT_INCR (params->fip_filestore_stats, STAT_ARCHIVE_INPUT_ERROR);
          start_ok = false;
        }
      else
        {
          const char *archive_version = dps_input_small_data (fsi->in_dps);
          if (!archive_version)
            {
              LOG (ERROR, "Couldn't find start of archive (skip failed)");
              STAT_INCR (params->fip_filestore_stats, STAT_ARCHIVE_INPUT_ERROR);
              start_ok = false;
            }
          else
            {
              LOGF (VERBOSE, "Archive created by: %s", archive_version);
            }
        }
    }

  if (!start_ok)
    {
      release_filestore_in (fsi);
      return NULL;
    }

  return fsi;
}

bool
close_filestore_in (filestore_in_t fsi)
{
  assert (fsi);
  return close_dps_in (fsi->in_dps);
}

/* Verify file permissions, times and ownership. */
static bool
verify_file_info (input_context_t *context, bool verify_permissions)
{
  // Grab file stats for verification
  struct stat input_stat;
  if (-1 == lstat (path_str (context->context_path), &input_stat))
    {
      LOGF (ERROR, "Error: Couldn't stat file for verification: %s",
            path_str (context->context_path));
      STAT_INCR (context->context_fsstats, STAT_INPUT_ERROR_STATING_FILE);
      return true; // Not fatal
    }
  else
    {
      // Verify file permissions
      if (verify_permissions)
        {
          if ((input_stat.st_mode & SAVE_PERMS) != context->context_file_mode)
            {
              LOGF (WARNING, "File permissions differ: %s (Saved = %o Actual = %o)",
                    path_str (context->context_path), context->context_file_mode,
                    input_stat.st_mode & SAVE_PERMS);
              STAT_INCR (context->context_fsstats, STAT_VERIFY_PERMISSIONS_FAILED);
            }
          else
            {
              LOGF (VERBOSE, "Verified file permissions: %o",
                    context->context_file_mode);
              STAT_INCR (context->context_fsstats, STAT_VERIFY_PERMISSIONS_OK);
            }
        }

      // Verificaty file times
      if (0 != context->context_creation_time)
        {
          if (context->context_creation_time != input_stat.st_ctime)
            {
              if(IS_VERBOSE_ENABLED())
                {
                  char saved_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE],
                       actual_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE] ;
                  format_time(context->context_creation_time, saved_buffer, true);
                  format_time(input_stat.st_ctime, actual_buffer, true);

                  LOGF (VERBOSE,
                        "File creation times differ: %s (Saved = %s Actual = %s)",
                        path_str(context->context_path),
                        saved_buffer, actual_buffer);
                  STAT_INCR (context->context_fsstats, STAT_VERIFY_CREATION_TIME_FAILED);
                }
              STAT_INCR (context->context_fsstats, STAT_VERIFY_PERMISSIONS_OK);
            }
          else 
            {
              STAT_INCR (context->context_fsstats, STAT_VERIFY_CREATION_TIME_OK);
              if(IS_VERBOSE_ENABLED())
                {
                  char saved_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE];
                  format_time(context->context_creation_time, saved_buffer, true);
                  LOGF (VERBOSE, "Verified creation time: %s", saved_buffer);
                }
            }
        }

      if (0 != context->context_modification_time)
        {
          if (context->context_modification_time != input_stat.st_mtime)
            {
              char saved_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE],
                   actual_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE] ;
              format_time(context->context_modification_time, saved_buffer, true);
              format_time(input_stat.st_mtime, actual_buffer, true);

              LOGF (WARNING,
                    "File modification times differ: %s (Saved = %s Actual = %s)",
                    path_str(context->context_path),
                    saved_buffer, actual_buffer);
              STAT_INCR (context->context_fsstats, STAT_VERIFY_MODIFICATION_TIME_FAILED);
            }
          else
            {
              if(IS_VERBOSE_ENABLED())
                {
                  char saved_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE];
                  format_time(context->context_modification_time, saved_buffer, true);
                  LOGF (VERBOSE, "Verified modification time: %s", saved_buffer);
                }
              STAT_INCR (context->context_fsstats, STAT_VERIFY_MODIFICATION_TIME_OK);
            }
        }

      // Verify file owner
      if (context->context_file_owner != input_stat.st_uid)
        {
              LOGF (VERBOSE,
                    "File owners differ: %s "
                    "(Saved = %d: %s Actual = %d: %s)",
                    path_str (context->context_path),
                    context->context_file_owner,
                    lookup_username_from_uid (context->context_fsi->in_uidcache,
                                              context->context_file_owner),
                    input_stat.st_uid,
                    lookup_username_from_uid (context->context_fsi->in_uidcache,
                                              input_stat.st_uid));
              // FILESTORE_WARNING (context->context_fsstats);
              STAT_INCR (context->context_fsstats, STAT_VERIFY_USER_FAILED);
        }
      else
        {
          LOGF (VERBOSE, "Verified file owner: %d: %s", input_stat.st_uid,
                lookup_username_from_uid (context->context_fsi->in_uidcache,
                                          input_stat.st_uid));
          STAT_INCR (context->context_fsstats, STAT_VERIFY_USER_OK);
        }

      // Verify file group
      if (context->context_file_group != input_stat.st_gid)
        {
              LOGF (VERBOSE,
                    "File groups differ: %s "
                    "(Saved = %d: %s Actual = %d: %s)",
                    path_str (context->context_path),
                    context->context_file_owner,
                    lookup_groupname_from_gid (context->context_fsi->in_uidcache,
                                              context->context_file_group),
                    input_stat.st_gid,
                    lookup_groupname_from_gid (context->context_fsi->in_uidcache,
                                              input_stat.st_gid));
              // FILESTORE_WARNING (context->context_fsstats);
              STAT_INCR (context->context_fsstats, STAT_VERIFY_GROUP_FAILED);
        }
      else
        {
          LOGF (VERBOSE, "Verified file group: %d: %s", input_stat.st_gid,
                lookup_groupname_from_gid (context->context_fsi->in_uidcache,
                                          input_stat.st_gid));
          STAT_INCR (context->context_fsstats, STAT_VERIFY_GROUP_OK);
        }
    }

  return true;
}

/* verify_file_from(path, stream) verifies the archive in stream, prefixing all
 * filenames with path. */
static bool
verify_file_from (input_context_t *context, in_stream_t data_in)
{
  if (!verify_file_info(context, true))
    return false;

  in_stream_t orig_in = baseio_open_in (path_str (context->context_path), "r");
  if (!orig_in)
    {
      LOGF (WARNING, "Could not open file for verification: %s",
            path_str (context->context_path));
      STAT_INCR (context->context_fsstats, STAT_INPUT_ERROR_OPENING_FILE);
      return true;
    }

  size_t compared;
  if (compare_stream (4096, &compared, data_in, orig_in))
    {
      LOGF (VERBOSE, "Verified:\t%s", path_str (context->context_path));
      if (context->context_fsi->in_stats)
        context->context_fsi->in_stats->stats_bytes_written += compared;
      STAT_INCR (context->context_fsstats, STAT_VERIFY_FILE_DATA_OK);
    }
  else
    {
      LOGF (WARNING, "Warning: Streams differ at: %zu in %s", compared,
            path_str (context->context_path));
      STAT_INCR (context->context_fsstats, STAT_VERIFY_FILE_DATA_FAILED);
    }

  context->context_file_size = compared;

  if (INPUT_OK != close_and_release_in (orig_in))
    {
      LOGF (ERROR, "Failed to close verification file: %s",
            path_str (context->context_path));
      STAT_INCR (context->context_fsstats, STAT_INPUT_ERROR);
      return false;
    }

  return true;
}

/* output_file_from_to(path, data_in, data_out) reads data from data_in (which
 * is a fixed len stream produced from the archive, and writes to data_out
 * (which is the output file, or a nullio handle if we are just testing the
 * archive). */
static bool
output_file_from_to (filestore_stats_t *stats, size_t *count, path_t path,
                     in_stream_t data_in, out_stream_t data_out)
{
  input_err_t in_err = INPUT_OK;
  output_err_t out_err = OUTPUT_OK;
  if (!copy_stream (4096, count, &in_err, &out_err, data_in, data_out))
    {
      if (INPUT_OK != in_err)
        {
          LOGF (ERROR, "%s reading from archive", INPUT_ERR_NAME (in_err));
          STAT_INCR (stats, STAT_ARCHIVE_INPUT_ERROR);
          return false;
        }
      if (OUTPUT_OK != out_err)
        {
          LOGF (WARNING, "%s writing output", INPUT_ERR_NAME (in_err));
          STAT_INCR (stats, STAT_OUTPUT_ERROR_WRITING_FILE);
        }
      return true;
    }

  LOGF (DEBUG, "Output:\t%s", path_str (path));

  return true;
}

/* Set file permissions, modification time and ownership */
static bool
set_file_info (input_context_t *context)
{
  if (0 != context->context_modification_time)
    {
      time_t now = time(NULL);
      struct utimbuf timebuf ;
      timebuf.actime = now;
      timebuf.modtime = context->context_modification_time;
      if(-1 == utime (path_str (context->context_path), &timebuf))
        {
          STAT_INCR (context->context_fsstats, STAT_OUTPUT_ERROR_SETTING_FILE_TIME);
          LOGF (WARNING, "Failed to set file modification time for file: %s",
                path_str(context->context_path));
        }
    }

  // Set file owner and group if possible
  if (-1 != context->context_file_owner)
    {
      if (-1 == lchown (path_str (context->context_path),
                       context->context_file_owner, -1))
        {
          STAT_INCR (context->context_fsstats, STAT_OUTPUT_ERROR_SETTING_FILE_OWNER);
          LOGF (DEBUG, "Failed to set owner %d on: %s",
                context->context_file_owner,
                path_str (context->context_path));
        }
    }

  if (-1 != context->context_file_group)
    {
      if (-1 == lchown (path_str (context->context_path),
                       -1, context->context_file_group))
        {
          // Not a warning, users often can't set group
          STAT_INCR (context->context_fsstats, STAT_OUTPUT_ERROR_SETTING_FILE_GROUP);
          LOGF (DEBUG, "Failed to set group %d on: %s",
                context->context_file_group,
                path_str (context->context_path));
        }
    }

  return true;
}

/* Extract a file to the given path. */
static bool
extract_file_from (input_context_t *context, in_stream_t data_in)
{
  bool include = include_file (context->context_path,
                               context->context_fsi->in_include_filters, NULL);

  out_stream_t data_out;
  if (include)
    {
      path_t parent = copy_path (context->context_path);
      remove_last_path_part (parent);
      bool parents_ok = create_directories (parent, 0755);
      release_path (parent);
      if (!parents_ok)
        {
          LOGF (ERROR, "Couldn't create directory to store: %s", path_str (context->context_path));
          return false;
        }

      data_out = unixio_open_out (path_str (context->context_path),
                         O_WRONLY | O_CREAT | O_TRUNC,
                         context->context_file_mode);
    }
  else
    {
      data_out = nullio_open_out ();
    }

  if (!data_out)
    {
      LOGF (ERROR, "Could not open file for output: %s",
            path_str (context->context_path));
      return false;
    }

  if (context->context_fsi->in_stats)
    {
      out_stream_t stat_out =
        statio_open_out (data_out, context->context_fsi->in_stats, false, true);
      if (!stat_out)
        {
          LOG (ERROR, "Couldn't open statio out channel");
          return false;
        }
      data_out = stat_out;
    }

  bool ok = true;
  if (!output_file_from_to (context->context_fsstats,
                            &context->context_file_size, context->context_path,
                            data_in, data_out))
    {
      LOGF (ERROR, "Failed to output: %s", path_str (context->context_path));
      ok = false;
    }

  if (INPUT_OK != close_and_release_out (data_out))
    {
      LOG (ERROR, "Failed to close output file!");
      ok = false;
    }

  if (!include)
    {
      LOGF (VERBOSE, "Skipped file: %s", path_str (context->context_path));
      return true;
    }

  return set_file_info(context);
}

/* Do a check of a file.  data_in is the data stream, which reads the data from
 * the archive. */
static bool
check_file_from (filestore_stats_t *stats, size_t *count, iostat_t * iostats, path_t path,
                 in_stream_t data_in)
{
  out_stream_t data_out = nullio_open_out ();
  if (!data_out)
    {
      LOG (ERROR, "Failed to open null output!");
      return false;
    }

  if (iostats)
    {
      out_stream_t stat_out =
        statio_open_out (data_out, iostats, false, true);
      if (!stat_out)
        {
          LOG (ERROR, "Couldn't open statio out channel");
          return false;
        }
      data_out = stat_out;
    }

  bool ok = true;

  if (!output_file_from_to (stats, count, path, data_in, data_out))
    {
      LOGF (ERROR, "Failed to output: %s", path_str (path));
      ok = false;
    }

  if (INPUT_OK != close_and_release_out (data_out))
    {
      LOG (ERROR, "Failed to close output file!");
      ok = false;
    }

  return ok;
}

/* Verify the given link against the filesystem */
static bool
verify_link (input_context_t *context, const char *link1)
{
  if (!verify_file_info(context, false))
    return true;

  char *link2 = read_link (context->context_path);
  if (!link2)
    {
      LOGF (WARNING, "Couldn't read link:\t%s",
            path_str (context->context_path));
      return false;
    }

  if (0 != strcmp (link1, link2))
    {
      STAT_INCR (context->context_fsstats, STAT_VERIFY_LINK_TARGET_FAILED);
      LOGF (WARNING, "Link wrong:\t%s: Expected %s but got %s",
            path_str (context->context_path), link1, link2);
      free (link2);
      return true; // Not fatal
    }

  LOGF (VERBOSE, "Verified link:\t%s\t-> %s",
        path_str (context->context_path), link1);

  free (link2);
  STAT_INCR (context->context_fsstats, STAT_VERIFY_LINK_TARGET_OK);
  return true;
}

/* Extract the given link */
static bool
extract_link (input_context_t *context, const char *link)
{
  bool include = include_file (context->context_path,
                               context->context_fsi->in_include_filters, NULL);

  if (include)
    {
      path_t parent = copy_path (context->context_path);
      remove_last_path_part (parent);
      bool parents_ok = create_directories (parent, 0755);
      release_path (parent);

      if (!parents_ok)
        {
          LOGF (ERROR, "Couldn't create directory to store link: %s", path_str (context->context_path));
          return false;
        }

      if (-1 == symlink (link, path_str (context->context_path)))
        {
          LOGF (WARNING, "Failed to create link: %s -> %s",
                path_str (context->context_path), link);
          return false;
        }

      STAT_INCR (context->context_fsstats, STAT_EXTRACT_LINK_OK);
      return set_file_info (context);
    }
  else
    {
      STAT_INCR (context->context_fsstats, STAT_EXTRACT_LINK_EXCLUDED);
      LOGF (VERBOSE, "Skipped link: %s", path_str (context->context_path));
      return true;
    }
}

/* Helper for do_input which handles activation of compression and encryption,
 * specified by a chunk filtertag */
static bool
handle_chunk_filter_tag (input_context_t *context)
{

  char *chunk_filter_data;
  if (!(chunk_filter_data = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  bool gzip = false, bzip2 = false, gpg = false;
  for (int i = 0; chunk_filter_data[i] != '\0'; i++)
    {
      switch (chunk_filter_data[i])
        {
        case GZIP_FILTER_FLAG:
          gzip = true;
          break;
        case BZIP2_FILTER_FLAG:
          bzip2 = true;
          break;
        case GPG_FILTER_FLAG:
          gpg = true;
          break;
        default:
          LOGF (WARNING, "Unknown chunk filter type: %c", chunk_filter_data[i]);
          break;
        }
    }

  if (!enable_input_filters(context->context_fsi->in_dps, gzip, bzip2,
                       gpg, context->context_fsi->in_gpg_passphrase_path))
    {
      LOG (ERROR, "Couldn't enable filters");
      return false;              // couldn't open datapairstore!
    }

  // If we are in identify mode, stop here
  return context->context_input_mode != IN_MODE_IDENTIFY;
}

/* Reset the context, ready for the next file.  */
static void
reset_context (input_context_t *context)
{
  context->context_filetype          = 0;
  context->context_path              = NULL;
  context->context_file_mode         = 0;
  context->context_file_owner        = -1;
  context->context_file_group        = -1;
  context->context_file_size         = 0;
  context->context_creation_time     = 0;
  context->context_modification_time = 0;
  context->context_need_md5          = false;
  context->context_need_sha1         = false;
}

/* Helper for do_input */
static bool
handle_filetype_tag (input_context_t *context)
{
  reset_context (context);

  char *filetype_data;
  if (!(filetype_data = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }
  context->context_filetype = filetype_data[0];

  // This is the first tag of a file, so reset the flags
  context->context_need_md5 = NULL != context->context_fsi->in_md5sum;
  context->context_need_sha1 = NULL != context->context_fsi->in_sha1sum;

  return true;
}

/* buffer must hold at least 11 characters */
static void
format_perms (mode_t file_mode, char type, char *perms_buffer)
{
  perms_buffer[0] = type;
  perms_buffer[1] = S_IRUSR & file_mode ? 'r' : '-';
  perms_buffer[2] = S_IWUSR & file_mode ? 'w' : '-';
  perms_buffer[3] = S_IXUSR & file_mode ? 'x' : '-';
  perms_buffer[4] = S_IRGRP & file_mode ? 'r' : '-';
  perms_buffer[5] = S_IWGRP & file_mode ? 'w' : '-';
  perms_buffer[6] = S_IXGRP & file_mode ? 'x' : '-';
  perms_buffer[7] = S_IROTH & file_mode ? 'r' : '-';
  perms_buffer[8] = S_IWOTH & file_mode ? 'w' : '-';
  perms_buffer[9] = S_IXOTH & file_mode ? 'x' : '-';
  perms_buffer[10] = '\0';
}

/* Format a uid into the given bufer.  If the uid can't be looked up, format as
 * an int. */
static void
format_uid (input_context_t *context, char *buf, size_t size)
{
  const char *user  = lookup_username_from_uid (context->context_fsi->in_uidcache, context->context_file_owner);
  if (user)
    strncpy (buf, user, size);
  else
    snprintf (buf, size, "%d", context->context_file_owner);
}

static void
format_gid (input_context_t *context, char *buf, size_t size)
{
  const char *name  = lookup_groupname_from_gid (context->context_fsi->in_uidcache, context->context_file_group);
  if (name)
    strncpy (buf, name, size);
  else
    snprintf (buf, size, "%d", context->context_file_group);
}

static void
print_link_done (input_context_t *context, const char *path)
{
  char user_id[8];
  char group_id[8];
  format_uid (context, user_id, 8);
  format_gid (context, group_id, 8);

  char time_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE];
  format_time(context->context_creation_time, time_buffer, true);

  LOGF(VERBOSE, "lrwxrwxrwx %s/%s%10zu %s %s -> %s", user_id, group_id,
       context->context_file_size, time_buffer,
       path_str (context->context_path), path);
}

static void
print_file_done (input_context_t *context, bool dir)
{
  // drwxr-xr-x root/root         0 2006-03-25 02:37:24 usr/share/doc/check/

  char perms_buffer[11];
  format_perms (context->context_file_mode, dir ? 'd' : '-', perms_buffer);

  char user_id[8];
  char group_id[8];
  format_uid (context, user_id, 8);
  format_gid (context, group_id, 8);

  char time_buffer[PRETTY_FORMATTED_TIME_BUFFER_SIZE];
  format_time(context->context_creation_time, time_buffer, true);

  LOGF(VERBOSE, "%s %s/%s%10zu %s %s", perms_buffer, user_id, group_id,
       context->context_file_size, time_buffer,
       path_str (context->context_path));
}


/* Helper for do_input */
static bool
handle_path_tag (input_context_t *context)
{

  char *path_buf;
  if (!(path_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  if (context->context_path)
    release_path (context->context_path);
  context->context_path = copy_path (context->context_pfix);
  path_add (context->context_path, path_buf);

  if (context->context_fsi->in_stats)
    merge_stats_path (context->context_fsi->in_stats, context->context_path);

  if (dirfiletype == context->context_filetype)
    {
      DEBUGF ("Processing directory %s", path_str (context->context_path));
      if (!include_file(context->context_path, context->context_fsi->in_include_filters, NULL))
        {
          LOGF (VERBOSE, "Skipped directory: %s", path_str (context->context_path));
        }
      else
        {
          switch (context->context_input_mode)
            {
            case IN_MODE_VERIFY:
              {
                DIR *dir = opendir (path_str (context->context_path));
                if (!dir)
                  {
                    LOGF (WARNING, "Couldn't find directory: %s",
                          path_str (context->context_path));
                    STAT_INCR (context->context_fsstats, STAT_VERIFY_DIR_FAILED_NOT_FOUND);

                    if (!verify_file_info (context, true))
                      return false;
                  }
                else
                  {
                    LOGF (VERBOSE, "Verified directory: %s",
                          path_str (context->context_path));
                    STAT_INCR (context->context_fsstats, STAT_VERIFY_DIR_OK);
                    closedir (dir);
                  }
              }
              break;
            case IN_MODE_CHECK:
              break;
            case IN_MODE_EXTRACT:
              if (!create_directories (context->context_path,
                                       context->context_file_mode))
                {
                  LOGF (ERROR, "Couldn't create directory: %s",
                        path_str (context->context_path));
                  STAT_INCR (context->context_fsstats, STAT_EXTRACT_DIR_FAILED);
                }
              else
                {
                  // LOGF (VERBOSE, "Created directory:\t%s",
                        // path_str (context->context_path));
                  STAT_INCR (context->context_fsstats, STAT_EXTRACT_DIR_OK);
                  set_file_info(context);
                }
              break;
            default:
              FATAL ("Fatal: Unknown input mode");
              break;
            } // end switch

          if (VERBOSE >= logging_log_lev)
            print_file_done (context, true);
        } // end if (include_file(context->context_path,
          //         context->context_fsi->in_include_filters, NULL))

    } // end if (dirfiletype == context->context_filetype)

  return true;
}

/* helper for do_input */
static bool
handle_unix_perm_tag (input_context_t *context)
{
  char *file_mode_buf;
  size_t file_mode_size = dps_input_data_size (context->context_fsi->in_dps);
  if (!(file_mode_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }
  uint32_t uncast_file_mode;
  if (!decode_uint32 (file_mode_buf, file_mode_size, &uncast_file_mode))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  context->context_file_mode = (mode_t) uncast_file_mode;

  LOGF (DEBUG, "Found file perms: 0%o", context->context_file_mode);

  return true;
}
/* Helper for do_input */
static bool
handle_filedata_tag (input_context_t *context)
{

  assert (regfiletype == context->context_filetype);

  bool fatal = false;
  in_stream_t data_in = dps_input_big_data (context->context_fsi->in_dps);
  if (!data_in)
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  if (context->context_fsi->in_md5sum)
    {
      in_stream_t md5sum_in =
        md5sumio_open_in (data_in, false, context->context_fsi->in_md5sum);
      if (!md5sum_in)
        {
          STAT_INCR (context->context_fsstats, STAT_INTERNAL_ERROR);
          return false;
        }
      data_in = md5sum_in;
    }

  if (context->context_fsi->in_sha1sum)
    {
      in_stream_t sha1sum_in =
        sha1sumio_open_in (data_in, context->context_fsi->in_md5sum,
                           context->context_fsi->in_sha1sum);
      if (!sha1sum_in)
        {
          STAT_INCR (context->context_fsstats, STAT_INTERNAL_ERROR);
          return false;
        }
      data_in = sha1sum_in;
    }

  DEBUGF ("Processing input data for %s", path_str (context->context_path));
  switch (context->context_input_mode)
    {
    case IN_MODE_VERIFY:
      fatal = !verify_file_from (context, data_in);
      break;

    case IN_MODE_CHECK:
      fatal = !check_file_from (context->context_fsstats,
                                &context->context_file_size,
                                context->context_fsi->in_stats,
                                context->context_path, data_in);
      break;

    case IN_MODE_EXTRACT:
      fatal = !extract_file_from (context, data_in);
      break;

    default:
      FATAL ("Fatal: Unknown input mode");
      break;
    }

  if (VERBOSE >= logging_log_lev)
    print_file_done (context, false);

  if (context->context_fsi->in_md5sum || context->context_fsi->in_sha1sum)
    {
      close_and_release_in (data_in);
    }

  if (!dps_finish_big_input (context->context_fsi->in_dps))
    {
    LOG (ERROR, "Couldn't open data pair store");
      LOGF (ERROR, "Error: Failed to close archive input stream: %s",
            path_str (context->context_path));
      fatal = false;
    }

  status_t stat = -1;
  if (fatal)
    {
      switch (context->context_input_mode)
        {
        case IN_MODE_VERIFY:
          stat = STAT_VERIFY_FILE_FAILED;
          break;
        case IN_MODE_CHECK:
          stat = STAT_CHECK_FILE_FAILED;
          break;
        case IN_MODE_EXTRACT:
          stat = STAT_EXTRACT_FILE_FAILED;
          break;
        default:
          FATAL ("Fatal: Unknown input mode");
          break;
        }
    }
  else
    {
      switch (context->context_input_mode)
        {
        case IN_MODE_VERIFY:
          stat = STAT_VERIFY_FILE_OK;
          break;
        case IN_MODE_CHECK:
          stat = STAT_CHECK_FILE_OK;
          break;
        case IN_MODE_EXTRACT:
          stat = STAT_EXTRACT_FILE_OK;
          break;
        default:
          FATAL ("Fatal: Unknown input mode");
          break;
        }
    }

  if (-1 != stat)
    STAT_INCR (context->context_fsstats, stat);

  return !fatal;
}

/* Helper for do_input */
static bool
handle_md5sum_tag (input_context_t *context)
{
  char *checksum_buf;
  if (!(checksum_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  if (context->context_fsi->in_md5sum)
    {
      if (0 != strncmp (checksum_buf, context->context_fsi->in_md5sum, MD5_SUM_LEN))
        {
          LOGF (ERROR, "Mismatching MD5: %s (Stored = %s Computed = %s)",
                path_str (context->context_path), checksum_buf, context->context_fsi->in_md5sum);
          STAT_INCR (context->context_fsstats, STAT_VERIFY_FILE_CHECKSUM_MD5_FAILED);
        }
      else
        {
          LOGF (VERBOSE, "Verified MD5:\t%s", checksum_buf);
          context->context_need_md5 = false;
          STAT_INCR (context->context_fsstats, STAT_VERIFY_FILE_CHECKSUM_MD5_OK);
        }
    }

  return true;
}

/* Helper for do_input */
static bool
handle_sha1sum_tag (input_context_t *context)
{
  char *checksum_buf;
  if (!(checksum_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  if (context->context_fsi->in_sha1sum)
    {
      if (0 != strncmp (checksum_buf, context->context_fsi->in_sha1sum, SHA1_SUM_LEN))
        {
          LOGF (ERROR, "Mismatching SHA1: %s (Stored = %s Computed = %s)",
                path_str (context->context_path), checksum_buf, context->context_fsi->in_sha1sum);
          STAT_INCR (context->context_fsstats, STAT_VERIFY_FILE_CHECKSUM_SHA1_FAILED);
        }
      else
        {
          LOGF (VERBOSE, "Verified SHA1:\t%s", checksum_buf);
          context->context_need_sha1 = false;
          STAT_INCR (context->context_fsstats, STAT_VERIFY_FILE_CHECKSUM_SHA1_OK);
        }
    }

  return true;
}

/* Helper for do_input */
static bool
handle_file_owner_tag (input_context_t *context)
{
  char *owner_buf;
  if (!(owner_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  if (!lookup_uid_from_username (context->context_fsi->in_uidcache, owner_buf,
                                 &context->context_file_owner))
    {
      if (!lookup_uid_from_username (context->context_fsi->in_uidcache, "nobody",
                                     &context->context_file_owner))
        {
          context->context_file_owner = 0;
        }
    }

  LOGF (DEBUG, "Read file owner: %lu (%s)",
        (unsigned long)context->context_file_owner,
        owner_buf) ;

  return true;
}

static bool
handle_file_group_tag (input_context_t *context)
{
  char *group_buf;
  if (!(group_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  if (!lookup_gid_from_groupname (context->context_fsi->in_uidcache, group_buf,
                                 &context->context_file_group))
    {
      context->context_file_owner = 0;
    }

  LOGF (DEBUG, "Read file group: %lu (%s)",
        (unsigned long)context->context_file_group,
        group_buf) ;

  return true;
}

/* Helper for do_input.  Handle the tag which stores the target of a softlink.
 */
static bool
handle_link_tag (input_context_t *context)
{

  assert (softlinkfiletype == context->context_filetype);
  bool ok = true;
  char *link_buf;
  if (!(link_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  DEBUGF ("Processing link data for %s", path_str (context->context_path));
  switch (context->context_input_mode)
    {
    case IN_MODE_VERIFY:
      ok = verify_link (context, link_buf);
      break;
    case IN_MODE_CHECK:
      break;
    case IN_MODE_EXTRACT:
      ok = extract_link (context, link_buf);
      break;
    default:
      FATAL ("Fatal: Unknown input mode");
      break;
    }

  if (VERBOSE >= logging_log_lev)
    print_link_done (context, link_buf);

  return ok;
}

/* Helper for handle_creation_time_tag and handle_modifcation_time_tag.  Parse
 * and store a file timestampt in the context. If creation_time is false, this
 * is the modification time. */
static bool
handle_file_time_tag (input_context_t *context, bool creation_time)
{
  char *time_buf;
  if (!(time_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  time_t time;
  if(!parse_time(time_buf, &time))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR_BAD_TIME);
      LOGF(ERROR, "Bad time in archive: %s", time_buf);
      return false;
    }

  if (creation_time)
    {
      LOGF (DEBUG, "Creation time:\t%s", time_buf);
      context->context_creation_time = time;
    }
  else
    {
      LOGF (DEBUG, "Modification time:\t%s", time_buf);
      context->context_modification_time = time;
    }

  return true;
}

/* Helper for do input.  Parse and store a file modification time. */
static bool
handle_creation_time_tag (input_context_t *context)
{
  return handle_file_time_tag (context, true);
}

/* Helper for do input.  Parse and store a file modification time. */
static bool
handle_modification_time_tag (input_context_t *context)
{
  return handle_file_time_tag (context, false);
}

/* Helper for do_input.  Read parse and display the archive timestamp. */
static bool
handle_archive_timestamp_tag (input_context_t *context)
{
  char *timestamp_buf;
  if (!(timestamp_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  time_t time = 0;
  if(!parse_time(timestamp_buf, &time))
    {
      LOGF(ERROR, "Bad timestamp in archive: %s", timestamp_buf);
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR_BAD_TIME);
      return false;
    }

  if (IS_NORMAL_ENABLED ())
    {
      char pretty_buffer[26];
      if (ctime_r(&time, pretty_buffer))
        {
          for(int i = 0 ; i < 26; i++)
            {
              if ('\n' == pretty_buffer[i])
                {
                  pretty_buffer[i] = '\0';
                  break;
                }
            }
          LOGF (NORMAL, "Archive created: %s", pretty_buffer);
        }
      else
        {
          LOG (ERROR, "Couldn't format timestamp");
          STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR_BAD_TIME);
        }
    }

  return true;
}

/* Helper for do_input.  Input and display the archive label. */
static bool
handle_archive_label_tag (input_context_t *context)
{
  char *label_buf;
  if (!(label_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  LOGF (NORMAL, "Archive label: %s", label_buf);

  return true;
}

/* Turn a buffer of bytes into a filestore id. */
static filestore_id_t
parse_filestore_id (char *buffer)
{
  filestore_id_t result = 0;
  // Read bytes from buffer, little endian format
  for (int i = 0; i < ID_LEN; i++)
    {
      result <<= 8;
      result |= (unsigned char)buffer[i]; // Cast prevents sign extension doom
    }

  return result;
}

/* Helper for do_input.  Input and display the archive id. */
static bool
handle_archive_id_tag (input_context_t *context)
{
  char *id_buf;
  if (!(id_buf = dps_input_small_data (context->context_fsi->in_dps)))
    {
      STAT_INCR (context->context_fsstats, STAT_ARCHIVE_INPUT_ERROR);
      return false;
    }

  filestore_id_t id = parse_filestore_id (id_buf);

  if (0 == context->context_id)
    {
      LOGF (NORMAL, "Archive ID: %" PRIx64, id);
      context->context_id = id;
    }
  else if (id != context->context_id)
    {
      LOGF (ERROR, "Archive ID differs from previous ID.  This archive may been tampered with:  New ID: %" PRIx64, id);
      // FIXME: Should be a stat for this
      context->context_id = id;
    }


  return true;
}

/* Handle an error */
static bool
do_error (filestore_in_t fsi, filestore_stats_t * stats)
{
  LOG (ERROR, "Encountered error, trying to recover");
  STAT_INCR (stats, STAT_ARCHIVE_INPUT_ERROR_RECOVERY);
  if (dps_input_recovery (fsi->in_dps))
    {
      LOG (NORMAL, "Resynced after error");
      return true;
    }
  else
    {
      LOG (ERROR, "Couldn't recover from error");
      return false;
    }
}

/* Type of a function which deals with input. */
typedef bool (*input_handler_f) (input_context_t *);

/* create the "input switch", which is a hashtable mapping tags to functions
 * which deal with that tag. */
static hashtable_t
create_input_switch ()
{
  hashtable_t ht = create_string_hashtable (1, false);

  if (!ht)
    return NULL;

  hashtable_add (ht, TAG_TEXT (CHUNK_FILTER_TAG),       handle_chunk_filter_tag);
  hashtable_add (ht, TAG_TEXT (FILETYPE_TAG),           handle_filetype_tag);
  hashtable_add (ht, TAG_TEXT (PATH_TAG),               handle_path_tag);
  hashtable_add (ht, TAG_TEXT (FILEDATA_TAG),           handle_filedata_tag);
  hashtable_add (ht, TAG_TEXT (UNIX_PERM_TAG),          handle_unix_perm_tag);
  hashtable_add (ht, TAG_TEXT (MD5SUM_TAG),             handle_md5sum_tag);
  hashtable_add (ht, TAG_TEXT (SHA1SUM_TAG),            handle_sha1sum_tag);
  hashtable_add (ht, TAG_TEXT (FILE_OWNER_TAG),         handle_file_owner_tag);
  hashtable_add (ht, TAG_TEXT (FILE_GROUP_TAG),         handle_file_group_tag);
  hashtable_add (ht, TAG_TEXT (LINK_TAG),               handle_link_tag);
  hashtable_add (ht, TAG_TEXT (CREATION_TIME_TAG),      handle_creation_time_tag);
  hashtable_add (ht, TAG_TEXT (MODIFICATION_TIME_TAG),  handle_modification_time_tag);
  hashtable_add (ht, TAG_TEXT (ARCHIVE_TIMESTAMP_TAG),  handle_archive_timestamp_tag);
  hashtable_add (ht, TAG_TEXT (ARCHIVE_LABEL_TAG),      handle_archive_label_tag);
  hashtable_add (ht, TAG_TEXT (ARCHIVE_ID_TAG),         handle_archive_id_tag);

  return ht;
}

/* Input the data from a filestore, and do something - determined by mode -
 * with the contents. */
static bool
do_input (filestore_in_t fsi, filestore_stats_t * stats, path_t pfix,
          input_mode_t mode, void (*file_callback) (void *, path_t),
          void *callback_data)
{
  hashtable_t input_switch = create_input_switch ();
  if (!input_switch)
    {
      LOG (ERROR, "Unable to create input switch");
      return false;
    }

  input_context_t context;
  context.context_fsi = fsi;
  context.context_id = 0;
  context.context_input_mode = mode;
  context.context_fsstats = stats;
  context.context_pfix = pfix;
  reset_context (&context);

  bool fatal = false;
  input_err_t err = INPUT_OK;
  while (!fatal && INPUT_EOF != err)
    {
      if (!dps_input_pair (fsi->in_dps, &err))
        {
          // TODO: Did we stop because of EOF, or due to some recoverable error?
          if (INPUT_ERR_DATALOSS == err)
            {
              fatal = !do_error (fsi, stats);
            }
          else if (INPUT_OK == err)
            {
              // This means the FILE itself is not reporting any IO errors, but
              // the data itself is bad
              LOG (ERROR, "Couldn't read pair");
              fatal = !do_error (fsi, stats);
            }
          else if (INPUT_MARK == err)
            {
              fatal = !dps_input_mark (fsi->in_dps);
              if (fatal)
                {
                  fatal = true;
                  LOG (ERROR, "Error at end of chunk");
                }
              else
                {
                  LOG (DEBUG, "End of chunk");
                  if (file_callback)
                    (*file_callback) (callback_data, context.context_path);
                }
            }
          else if (INPUT_EOF != err)
            {
              LOGF (ERROR, "Unrecoverable error in underlying stream: %s",
                    INPUT_ERR_NAME (err));
              fatal = true;
            }
        }
      else
        {
          const char *pair_name = dps_input_pair_name (fsi->in_dps);
          // TODO: This code is probably assuming that the pairs in the store are
          // coming in the correct order.  We should probably maintain some state
          // that would allow validation
          input_handler_f f = hashtable_find (input_switch, pair_name);
          if (f)
            fatal = ! f (&context);
          else
            {
              LOGF (WARNING, "Warning: Unrecognized tag: %s", pair_name);
              STAT_INCR (stats, STAT_ARCHIVE_WARNING_UNRECOGNISED_TAG);
              if (!dps_skip_data_in (fsi->in_dps))
                {
                  LOG (ERROR, "Error handling unrecognised tag");
                  fatal = !do_error (fsi, stats);
                }
            }
        }
    } // end while

  // Final action depends on reason for stopping
  switch (err)
    {
    case INPUT_EOF:
      LOG (VERBOSE, "End of file");
      break;
    case INPUT_OK:
      if (IN_MODE_IDENTIFY == mode)
        break;
      LOG (WARNING, "Warning: Input stopped early");
      STAT_INCR (stats, STAT_ARCHIVE_INPUT_ERROR_PROCESSING_STOPPED_EARLY);
      break;
    default:
      LOGF (ERROR, "Error: Unexpected end of data: %s", INPUT_ERR_NAME (err));
      STAT_INCR (stats, STAT_ARCHIVE_INPUT_ERROR_DATA_STOPPED_EARLY);
      fatal = true;
      break;
    }

  if (context.context_path)
    release_path (context.context_path);

  DEBUG ("Input done");

  hashtable_release (input_switch);

  return !fatal;
}

bool
filestore_verify_in (filestore_in_t fsi, filestore_stats_t * stats,
                     path_t pfix, void (*file_callback) (void *, path_t),
                     void *callback_data)
{
  return do_input (fsi, stats, pfix, IN_MODE_VERIFY, file_callback,
                   callback_data);
}

bool
filestore_check_in (filestore_in_t fsi, filestore_stats_t * stats,
                    path_t pfix, void (*file_callback) (void *, path_t),
                    void *callback_data)
{
  return do_input (fsi, stats, pfix, IN_MODE_CHECK, file_callback,
                   callback_data);
}

bool
filestore_extract_in (filestore_in_t fsi, filestore_stats_t * stats,
                      path_t pfix, void (*file_callback) (void *, path_t),
                      void *callback_data)
{
  return do_input (fsi, stats, pfix, IN_MODE_EXTRACT, file_callback,
                   callback_data);
}

bool
filestore_identify_in (filestore_in_t fsi, filestore_stats_t * stats)
{
  return do_input (fsi, stats, NULL, IN_MODE_IDENTIFY, NULL,
                   NULL);
}


