/*  Tarlz - Archiver with multimember lzip compression
    Copyright (C) 2013-2018 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _FILE_OFFSET_BITS 64

#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#if defined(__GNU_LIBRARY__)
#include <sys/sysmacros.h>		// for major, minor
#endif
#include <ftw.h>
#include <grp.h>
#include <pwd.h>
#include <lzlib.h>

#include "arg_parser.h"
#include "lzip.h"
#include "tarlz.h"


const CRC32C crc32c;

int cl_owner = -1;		// global vars needed by add_member
int cl_group = -1;
int cl_solid = 0;		// 1 = dsolid, 2 = asolid, 3 = solid

namespace {

LZ_Encoder * encoder = 0;	// local vars needed by add_member
int outfd = -1;
int gretval = 0;

int seek_read( const int fd, uint8_t * const buf, const int size,
               const long long pos )
  {
  if( lseek( fd, pos, SEEK_SET ) == pos )
    return readblock( fd, buf, size );
  return 0;
  }

// infd and outfd can refer to the same file if copying to a lower file
// position or if source and destination blocks don't overlap.
// max_size < 0 means no size limit.
bool copy_file( const int infd, const int outfd, const long long max_size = -1 )
  {
  const int buffer_size = 65536;
  // remaining number of bytes to copy
  long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
  long long copied_size = 0;
  uint8_t * const buffer = new uint8_t[buffer_size];
  bool error = false;

  while( rest > 0 )
    {
    const int size = std::min( (long long)buffer_size, rest );
    if( max_size >= 0 ) rest -= size;
    const int rd = readblock( infd, buffer, size );
    if( rd != size && errno )
      { show_error( "Error reading input file", errno ); error = true; break; }
    if( rd > 0 )
      {
      const int wr = writeblock( outfd, buffer, rd );
      if( wr != rd )
        { show_error( "Error writing output file", errno );
          error = true; break; }
      copied_size += rd;
      }
    if( rd < size ) break;				// EOF
    }
  delete[] buffer;
  return ( !error && ( max_size < 0 || copied_size == max_size ) );
  }


/* Check archive type. If success, leave fd file pos at 0.
   If remove_eof, remove EOF blocks, and leave fd file pos at EOF. */
bool check_appendable( const int fd, const bool remove_eof )
  {
  struct stat st;
  if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false;
  if( lseek( fd, 0, SEEK_SET ) != 0 ) return false;
  enum { bufsize = header_size + ( header_size / 8 ) };
  uint8_t buf[bufsize];
  int rd = readblock( fd, buf, bufsize );
  if( rd == 0 && errno == 0 ) return true;	// append to empty archive
  if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
  const Lzip_header * const p = (const Lzip_header *)buf;	// shut up gcc
  if( !p->verify_magic() ) return false;
  LZ_Decoder * decoder = LZ_decompress_open();	// decompress first header
  if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
      LZ_decompress_write( decoder, buf, rd ) != rd ||
      ( rd = LZ_decompress_read( decoder, buf, header_size ) ) != header_size )
    { LZ_decompress_close( decoder ); return false; }
  LZ_decompress_close( decoder );
  const bool maybe_eof = ( buf[0] == 0 );
  if( !verify_ustar_chksum( buf ) && !maybe_eof ) return false;
  const long long end = lseek( fd, 0, SEEK_END );
  if( end < min_member_size ) return false;

  Lzip_trailer trailer;
  if( seek_read( fd, trailer.data, Lzip_trailer::size,
                 end - Lzip_trailer::size ) != Lzip_trailer::size )
    return false;
  const long long member_size = trailer.member_size();
  if( member_size < min_member_size || member_size > end ||
      ( maybe_eof && member_size != end ) ) return false;

  Lzip_header header;
  if( seek_read( fd, header.data, Lzip_header::size,
                 end - member_size ) != Lzip_header::size ) return false;
  if( !header.verify_magic() || !isvalid_ds( header.dictionary_size() ) )
    return false;

  const unsigned long long data_size = trailer.data_size();
  if( data_size < header_size || data_size > 32256 ) return false;
  const unsigned data_crc = trailer.data_crc();
  const CRC32 crc32;
  uint32_t crc = 0xFFFFFFFFU;
  for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 );
  crc ^= 0xFFFFFFFFU;
  if( crc != data_crc ) return false;

  if( remove_eof )
    { if( lseek( fd, end - member_size, SEEK_SET ) != end - member_size ||
        ftruncate( fd, end - member_size ) != 0 ) return false; }
  else
    if( lseek( fd, 0, SEEK_SET ) != 0 ) return false;
  return true;
  }


bool archive_write( const uint8_t * const buf, const int size )
  {
  if( !encoder )					// uncompressed
    return ( writeblock( outfd, buf, size ) == size );
  enum { obuf_size = 65536 };
  uint8_t obuf[obuf_size];
  int sz = 0;
  if( size <= 0 ) LZ_compress_finish( encoder );	// flush encoder
  while( sz < size || size <= 0 )
    {
    const int wr = LZ_compress_write( encoder, buf + sz, size - sz );
    if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
    sz += wr;
    const int rd = LZ_compress_read( encoder, obuf, obuf_size );
    if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
    if( rd == 0 && sz == size ) break;
    if( writeblock( outfd, obuf, rd ) != rd ) return false;
    }
  if( LZ_compress_finished( encoder ) == 1 &&
      LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
    internal_error( "library error (LZ_compress_restart_member)." );
  return true;
  }


unsigned char xdigit( const unsigned value )
  {
  if( value <= 9 ) return '0' + value;
  if( value <= 15 ) return 'A' + value - 10;
  return 0;
  }

void print_hex( char * const buf, int size, unsigned long long num )
  {
  while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
  }

void print_octal( char * const buf, int size, unsigned long long num )
  {
  while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
  }

unsigned decimal_digits( unsigned long long value )
  {
  unsigned digits = 1;
  while( value >= 10 ) { value /= 10; ++digits; }
  return digits;
  }

unsigned long long record_size( const unsigned keyword_size,
                                const unsigned long long value_size )
  {
  // size = ' ' + keyword + '=' + value + '\n'
  const unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
  const unsigned d1 = decimal_digits( size );
  return decimal_digits( d1 + size ) + size;
  }

bool write_extended( const Extended & extended, const Tar_header uheader )
  {
  const int path_rec = extended.path.size() ?
                       record_size( 4, extended.path.size() ) : 0;
  const int lpath_rec = extended.linkpath.size() ?
                        record_size( 8, extended.linkpath.size() ) : 0;
  const int size_rec = ( extended.size > 0 ) ?
                       record_size( 4, decimal_digits( extended.size ) ) : 0;
  const unsigned long long edsize = path_rec + lpath_rec + size_rec + 22;
  const unsigned long long bufsize = round_up( edsize );
  if( bufsize == 0 ) return edsize == 0;	// overflow or no extended data
  char * const buf = new char[bufsize+1];	// extended records buffer
  unsigned long long pos = path_rec;		// goto can't cross this
  if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n",
                            path_rec, extended.path.c_str() ) != path_rec )
    goto error;
  if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n",
                     lpath_rec, extended.linkpath.c_str() ) != lpath_rec )
    goto error;
  pos += lpath_rec;
  if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n",
                            size_rec, extended.size ) != size_rec )
    goto error;
  pos += size_rec;
  if( snprintf( buf + pos, 23, "22 GNU.crc32=00000000\n" ) != 22 ) goto error;
  pos += 22;
  if( pos != edsize ) goto error;
  print_hex( buf + edsize - 9, 8,
             crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) );
  std::memset( buf + edsize, 0, bufsize - edsize );	// wipe padding
  Tar_header header;				// extended header
  std::memcpy( header, uheader, header_size );	// copy from ustar header
  header[typeflag_o] = tf_extended;		// then modify some fields
  print_octal( header + size_o, size_l - 1, edsize );
  print_octal( header + chksum_o, chksum_l - 1,
               ustar_chksum( (const uint8_t *)header ) );
  if( !archive_write( (const uint8_t *)header, header_size ) ) goto error;
  for( pos = 0; pos < bufsize; )	// write extended records to archive
    {
    int size = std::min( bufsize - pos, 1ULL << 20 );
    if( !archive_write( (const uint8_t *)buf + pos, size ) ) goto error;
    pos += size;
    }
  delete[] buf;
  return true;
error:
  delete[] buf;
  return false;
  }


const char * remove_leading_dotdot( const char * const filename )
  {
  static std::string prefix;
  const char * p = filename;

  for( int i = 0; filename[i]; ++i )
    if( filename[i] == '.' && filename[i+1] == '.' &&
        ( i == 0 || filename[i-1] == '/' ) &&
        ( filename[i+2] == 0 || filename[i+2] == '/' ) ) p = filename + i + 2;
  while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
  if( p != filename )
    {
    std::string msg( filename, p - filename );
    if( prefix != msg )
      {
      prefix = msg;
      msg = "Removing leading '"; msg += prefix; msg += "' from member names.";
      show_error( msg.c_str() );
      }
    }
  if( *p == 0 ) p = ".";
  return p;
  }


bool store_name( const char * const filename, Extended & extended,
                 Tar_header header )
  {
  const char * const stored_name = remove_leading_dotdot( filename );
  const int len = std::strlen( stored_name );
  enum { max_len = prefix_l + 1 + name_l };	// prefix + '/' + name
  if( len <= name_l )				// stored_name fits in name
    { std::memcpy( header + name_o, stored_name, len ); return true; }
  if( len <= max_len )				// find shortest prefix
    for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
      if( stored_name[i] == '/' )		// stored_name can be split
        {
        std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
        std::memcpy( header + prefix_o, stored_name, i );
        return true;
        }
  // store full name in extended record, truncated name in ustar header
  extended.path = stored_name;
  std::strncpy( header + name_o, stored_name, name_l );
  return false;
  }


int add_member( const char * const filename, const struct stat *,
                const int flag, struct FTW * )
  {
  struct stat st;
  if( lstat( filename, &st ) != 0 )
    { show_file_error( filename, "Can't stat input file", errno );
      gretval = 1; return 0; }
  Extended extended;		// metadata for extended records
  Tar_header header;
  std::memset( header, 0, header_size );
  store_name( filename, extended, header );

  const mode_t mode = st.st_mode;
  print_octal( header + mode_o, mode_l - 1,
               mode & ( S_ISUID | S_ISGID | S_ISVTX |
                        S_IRWXU | S_IRWXG | S_IRWXO ) );
  const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
  const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
  if( uid >= 2 << 20 || gid >= 2 << 20 )
    { show_file_error( filename, "uid or gid is larger than 2_097_151." );
      gretval = 1; return 0; }
  print_octal( header + uid_o, uid_l - 1, uid );
  print_octal( header + gid_o, gid_l - 1, gid );
  const long long mtime = st.st_mtime;			// shut up gcc
  if( mtime < 0 || mtime >= 1LL << 33 )
    { show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
      gretval = 1; return 0; }
  print_octal( header + mtime_o, mtime_l - 1, mtime );
  unsigned long long file_size = 0;
  Typeflag typeflag;
  if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
  else if( S_ISDIR( mode ) )
    {
    typeflag = tf_directory;
    if( flag == FTW_DNR )
      { show_file_error( filename, "Can't open directory", errno );
        gretval = 1; return 0; }
    }
  else if( S_ISLNK( mode ) )
    {
    typeflag = tf_symlink;
    if( st.st_size > linkname_l ||
        readlink( filename, header + linkname_o, linkname_l ) != st.st_size )
      {
      char * const buf = new char[st.st_size+1];
      const long len = readlink( filename, buf, st.st_size );
      if( len == st.st_size ) { buf[len] = 0; extended.linkpath = buf; }
      delete[] buf;
      if( len != st.st_size )
      { show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
        gretval = 1; return 0; }
      }
    }
  else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
    {
    typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
    if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
      { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
        gretval = 1; return 0; }
    print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
    print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
    }
  else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
  else { show_file_error( filename, "Unknown file type." );
         gretval = 2; return 0; }
  header[typeflag_o] = typeflag;
  std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
  header[version_o] = header[version_o+1] = '0';
  const struct passwd * const pw = getpwuid( uid );
  if( pw && pw->pw_name )
    std::strncpy( header + uname_o, pw->pw_name, uname_l - 1 );
  const struct group * const gr = getgrgid( gid );
  if( gr && gr->gr_name )
    std::strncpy( header + gname_o, gr->gr_name, gname_l - 1 );
  if( file_size >= 1ULL << 33 ) extended.size = file_size;
  else print_octal( header + size_o, size_l - 1, file_size );
  print_octal( header + chksum_o, chksum_l - 1,
               ustar_chksum( (const uint8_t *)header ) );

  const int infd = file_size ? open_instream( filename ) : -1;
  if( file_size && infd < 0 ) { gretval = 1; return 0; }
  if( !extended.empty() && !write_extended( extended, header ) )
    { show_error( "Error writing extended header", errno ); return 1; }
  if( !archive_write( (const uint8_t *)header, header_size ) )
    { show_error( "Error writing ustar header", errno ); return 1; }
  if( file_size )
    {
    enum { bufsize = 32 * header_size };
    uint8_t buf[bufsize];
    unsigned long long rest = file_size;
    while( rest > 0 )
      {
      int size = std::min( rest, (unsigned long long)bufsize );
      const int rd = readblock( infd, buf, size );
      rest -= rd;
      if( rd != size )
        {
        if( verbosity >= 0 )
          std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
                        filename, file_size - rest );
        close( infd ); return 1;
        }
      if( rest == 0 )				// last read
        {
        const int rem = file_size % header_size;
        if( rem > 0 )
          { const int padding = header_size - rem;
            std::memset( buf + size, 0, padding ); size += padding; }
        }
      if( !archive_write( buf, size ) )
        { show_error( "Error writing archive", errno ); close( infd );
          return 1; }
      }
    if( close( infd ) != 0 )
      { show_file_error( filename, "Error closing file", errno ); return 1; }
    }
  if( encoder && cl_solid == 0 && !archive_write( 0, 0 ) )	// flush encoder
    { show_error( "Error flushing encoder", errno ); return 1; }
  if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
  return 0;
  }

} // end namespace


unsigned ustar_chksum( const uint8_t * const buf )
  {
  unsigned chksum = chksum_l * 0x20;	// treat chksum field as spaces
  for( int i = 0; i < chksum_o; ++i ) chksum += buf[i];
  for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += buf[i];
  return chksum;
  }


bool verify_ustar_chksum( const uint8_t * const buf )
  { return ( verify_ustar_magic( buf ) &&
    ustar_chksum( buf ) == strtoul( (const char *)buf + chksum_o, 0, 8 ) ); }


int concatenate( const std::string & archive_name, const Arg_parser & parser,
                 const int filenames )
  {
  if( !filenames )
    { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; }
  if( archive_name.empty() )
    { show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
      return 1; }
  if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;

  int retval = 0;
  for( int i = 0; i < parser.arguments(); ++i )		// copy archives
    {
    if( parser.code( i ) ) continue;			// skip options
    const char * const filename = parser.argument( i ).c_str();
    const int infd = open_instream( filename );
    if( infd < 0 )
      { show_file_error( filename, "Can't open input file", errno );
        retval = 1; break; }
    if( !check_appendable( infd, false ) )
      { show_file_error( filename, "Not an appendable tar.lz archive." );
        close( infd ); retval = 2; break; }
    if( !check_appendable( outfd, true ) )
      { show_error( "This does not look like an appendable tar.lz archive." );
        close( infd ); retval = 2; break; }
    if( !copy_file( infd, outfd ) || close( infd ) != 0 )
      { show_file_error( filename, "Error copying archive", errno );
        retval = 1; break; }
    if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
    }

  if( close( outfd ) != 0 && !retval )
    { show_error( "Error closing archive", errno ); retval = 1; }
  return retval;
  }


int encode( const std::string & archive_name, const Arg_parser & parser,
            const int filenames, const int level, const bool append )
  {
  struct Lzma_options
    {
    int dictionary_size;		// 4 KiB .. 512 MiB
    int match_len_limit;		// 5 .. 273
    };
  const Lzma_options option_mapping[] =
    {
    {   65535,  16 },		// -0
    { 1 << 20,   5 },		// -1
    { 3 << 19,   6 },		// -2
    { 1 << 21,   8 },		// -3
    { 3 << 20,  12 },		// -4
    { 1 << 22,  20 },		// -5
    { 1 << 23,  36 },		// -6
    { 1 << 24,  68 },		// -7
    { 3 << 23, 132 },		// -8
    { 1 << 25, 273 } };		// -9
  const bool compressed = ( level >= 0 && level <= 9 );

  if( !append )
    {
    if( !filenames )
      { show_error( "Cowardly refusing to create an empty archive.", 0, true );
        return 1; }
    if( archive_name.empty() ) outfd = STDOUT_FILENO;
    else if( ( outfd = open_outstream( archive_name ) ) < 0 ) return 1;
    }
  else
    {
    if( !filenames )
      { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; }
    if( archive_name.empty() )
      { show_error( "'--append' is incompatible with '-f -'.", 0, true );
        return 1; }
    if( !compressed )
      { show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
        return 1; }
    if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
    if( !check_appendable( outfd, true ) )
      { show_error( "This does not look like an appendable tar.lz archive." );
        return 2; }
    }

  if( compressed )
    {
    encoder = LZ_compress_open( option_mapping[level].dictionary_size,
                option_mapping[level].match_len_limit, LLONG_MAX );
    if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
      {
      if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
        show_error( "Not enough memory. Try a lower compression level." );
      else
        internal_error( "invalid argument to encoder." );
      return 1;
      }
    }

  int retval = 0;
  for( int i = 0; i < parser.arguments(); ++i )		// write members
    {
    const int code = parser.code( i );
    const std::string & arg = parser.argument( i );
    const char * filename = arg.c_str();
    if( code == 'C' && chdir( filename ) != 0 )
      { show_file_error( filename, "Error changing working directory", errno );
        retval = 1; break; }
    if( code ) continue;				// skip options
    std::string deslashed;		// arg without trailing slashes
    unsigned len = arg.size();
    while( len > 1 && arg[len-1] == '/' ) --len;
    if( len < arg.size() )
      { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
    struct stat st;
    if( lstat( filename, &st ) != 0 )
      { show_file_error( filename, "Can't stat input file", errno );
        if( gretval < 1 ) gretval = 1; }
    else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
      break;			// write error
    else if( encoder && cl_solid == 1 && !archive_write( 0, 0 ) )	// flush encoder
      { show_error( "Error flushing encoder", errno ); retval = 1; }
    }

  if( !retval )			// write End-Of-Archive records
    {
    uint8_t buf[header_size];
    std::memset( buf, 0, header_size );
    if( encoder && cl_solid == 2 && !archive_write( 0, 0 ) )	// flush encoder
      { show_error( "Error flushing encoder", errno ); retval = 1; }
    else if( !archive_write( buf, header_size ) ||
             !archive_write( buf, header_size ) ||
             ( encoder && !archive_write( 0, 0 ) ) )	// flush encoder
      { show_error( "Error writing end-of-archive blocks", errno );
        retval = 1; }
    }
  if( close( outfd ) != 0 && !retval )
    { show_error( "Error closing archive", errno ); retval = 1; }
  if( retval && archive_name.size() && !append )
    std::remove( archive_name.c_str() );
  if( !retval && gretval )
    { show_error( "Exiting with failure status due to previous errors." );
      retval = gretval; }
  return retval;
  }
