/*  Lziprecover - Data recovery tool for lzipped files
    Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _FILE_OFFSET_BITS 64

#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>

#include "lzip.h"
#include "decoder.h"


namespace {

bool copy_and_diff_file( const std::vector< int > & infd_vector,
                         const int outfd, std::vector< Block > & block_vector )
  {
  const int buffer_size = 65536;
  std::vector< uint8_t * > buffer_vector( infd_vector.size() );
  for( unsigned int i = 0; i < infd_vector.size(); ++i )
    buffer_vector[i] = new uint8_t[buffer_size];
  Block b( 0, 0 );
  long long partial_pos = 0;
  int equal_bytes = 0;
  bool error = false;

  while( true )
    {
    const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size );
    if( rd != buffer_size && errno )
      { show_error( "Error reading input file", errno ); error = true; break; }
    if( rd > 0 )
      {
      for( unsigned int i = 1; i < infd_vector.size(); ++i )
        if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd )
          { show_error( "Error reading input file", errno );
            error = true; break; }
      if( error ) break;
      const int wr = writeblock( outfd, buffer_vector[0], rd );
      if( wr != rd )
        { show_error( "Error writing output file", errno );
          error = true; break; }
      for( int i = 0; i < rd; ++i )
        {
        while( i < rd && b.pos() == 0 )
          {
          for( unsigned int j = 1; j < infd_vector.size(); ++j )
            if( buffer_vector[0][i] != buffer_vector[j][i] )
              { b.pos( partial_pos + i ); break; }	// begin block
          ++i;
          }
        while( i < rd && b.pos() > 0 )
          {
          ++equal_bytes;
          for( unsigned int j = 1; j < infd_vector.size(); ++j )
            if( buffer_vector[0][i] != buffer_vector[j][i] )
              { equal_bytes = 0; break; }
          if( equal_bytes >= 2 )			// end block
            {
            b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() );
            block_vector.push_back( b );
            b.pos( 0 );
            equal_bytes = 0;
            }
          ++i;
          }
        }
      partial_pos += rd;
      }
    if( rd < buffer_size ) break;			// EOF
    }
  if( b.pos() > 0 )					// finish last block
    {
    b.size( partial_pos - b.pos() );
    block_vector.push_back( b );
    }
  for( unsigned int i = 0; i < infd_vector.size(); ++i )
    delete[] buffer_vector[i];
  return !error;
  }


int ipow( const unsigned int base, const unsigned int exponent ) throw()
  {
  int result = 1;
  for( unsigned int i = 0; i < exponent; ++i )
    {
    if( INT_MAX / base >= (unsigned int)result ) result *= base;
    else { result = INT_MAX; break; }
    }
  return result;
  }


int open_input_files( const std::vector< std::string > & filenames,
                      std::vector< int > & infd_vector, long long & isize )
  {
  bool identical = false;
  for( unsigned int i = 1; i < filenames.size(); ++i )
    if( filenames[0] == filenames[i] )
      { identical = true; break; }
  if( !identical )
    for( unsigned int i = 0; i < filenames.size(); ++i )
      {
      struct stat in_stats;
      ino_t st_ino0 = 0;
      dev_t st_dev0 = 0;
      infd_vector[i] = open_instream( filenames[i], &in_stats, true, true );
      if( infd_vector[i] < 0 ) return 1;
      if( i == 0 ) { st_ino0 = in_stats.st_ino; st_dev0 = in_stats.st_dev; }
      else if( st_ino0 == in_stats.st_ino && st_dev0 == in_stats.st_dev )
        { identical = true; break; }
      }
  if( identical ) { show_error( "Two input files are the same." ); return 1; }

  isize = 0;
  for( unsigned int i = 0; i < filenames.size(); ++i )
    {
    const long long tmp = lseek( infd_vector[i], 0, SEEK_END );
    if( tmp < 0 )
      {
      if( verbosity >= 0 )
        std::fprintf( stderr, "File `%s' is not seekable.\n", filenames[i].c_str() );
      return 1;
      }
    if( i == 0 )
      {
      isize = tmp;
      if( isize < min_member_size ) { show_error( "Input file is too short." ); return 2; }
      }
    else if( isize != tmp )
      { show_error( "Sizes of input files are different." ); return 1; }
    }

  for( unsigned int i = 0; i < filenames.size(); ++i )
    if( !verify_single_member( infd_vector[i], isize ) )
      return 2;

  for( unsigned int i = 0; i < filenames.size(); ++i )
    {
    if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
      { show_error( "Seek error in input file", errno ); return 1; }
    if( try_decompress( infd_vector[i], isize ) )
      {
      if( verbosity >= 1 )
        std::printf( "File `%s' has no errors. Recovery is not needed.\n",
                     filenames[i].c_str() );
      return 0;
      }
    if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
      { show_error( "Seek error in input file", errno ); return 1; }
    }
  return -1;
  }

} // end namespace


void cleanup_and_fail( const std::string & output_filename,
                       const int outfd, const int retval ) throw()
  {
  if( outfd >= 0 ) close( outfd );
  if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
    show_error( "WARNING: deletion of output file (apparently) failed." );
  std::exit( retval );
  }


bool copy_file( const int infd, const int outfd, const long long size )
  {
  long long rest = size;
  const int buffer_size = 65536;
  uint8_t * const buffer = new uint8_t[buffer_size];
  bool error = false;

  while( true )
    {
    const int block_size = std::min( (long long)buffer_size, rest );
    if( block_size <= 0 ) break;
    const int rd = readblock( infd, buffer, block_size );
    if( rd != block_size && errno )
      { show_error( "Error reading input file", errno ); error = true; break; }
    if( rd > 0 )
      {
      const int wr = writeblock( outfd, buffer, rd );
      if( wr != rd )
        { show_error( "Error writing output file", errno );
          error = true; break; }
      rest -= rd;
      }
    if( rd < block_size ) break;			// EOF
    }
  delete[] buffer;
  return !error;
  }


bool try_decompress( const int fd, const long long file_size,
                     long long * failure_posp )
  {
  try {
    Range_decoder rdec( fd );
    File_header header;
    rdec.reset_member_position();
    for( int i = 0; i < File_header::size; ++i )
      header.data[i] = rdec.get_byte();
    if( !rdec.finished() &&			// End Of File
        header.verify_magic() &&
        header.version() == 1 &&
        header.dictionary_size() >= min_dictionary_size &&
        header.dictionary_size() <= max_dictionary_size )
      {
      LZ_decoder decoder( header, rdec, -1 );
      Pretty_print dummy( "", -1 );

      if( decoder.decode_member( dummy ) == 0 &&
          rdec.member_position() == file_size ) return true;
      if( failure_posp ) *failure_posp = rdec.member_position();
      }
    }
  catch( std::bad_alloc )
    {
    show_error( "Not enough memory. Find a machine with more memory." );
    std::exit( 1 );
    }
  catch( Error e ) {}
  return false;
  }


bool verify_header( const File_header & header )
  {
  if( !header.verify_magic() )
    {
    show_error( "Bad magic number (file not in lzip format)." );
    return false;
    }
  if( header.version() == 0 )
    {
    show_error( "Version 0 member format can't be recovered." );
    return false;
    }
  if( header.version() != 1 )
    {
    if( verbosity >= 0 )
      std::fprintf( stderr, "Version %d member format not supported.\n",
                    header.version() );
    return false;
    }
  return true;
  }


bool verify_single_member( const int fd, const long long file_size )
  {
  File_header header;
  if( lseek( fd, 0, SEEK_SET ) < 0 ||
      readblock( fd, header.data, File_header::size ) != File_header::size )
    { show_error( "Error reading member header", errno ); return false; }
  if( !verify_header( header ) ) return false;

  File_trailer trailer;
  if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 ||
      readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() )
    { show_error( "Error reading member trailer", errno ); return false; }
  const long long member_size = trailer.member_size();
  if( member_size != file_size )
    {
    if( member_size < file_size &&
        lseek( fd, -member_size, SEEK_END ) > 0 &&
        readblock( fd, header.data, File_header::size ) == File_header::size &&
        verify_header( header ) )
      show_error( "Input file has more than 1 member. Split it first." );
    else
      show_error( "Member size in input file trailer is corrupt." );
    return false;
    }
  return true;
  }


int merge_files( const std::vector< std::string > & filenames,
                 const std::string & output_filename, const bool force )
  {
  std::vector< int > infd_vector( filenames.size() );
  long long isize = 0;
  const int retval = open_input_files( filenames, infd_vector, isize );
  if( retval >= 0 ) return retval;

  const int outfd = open_outstream_rw( output_filename, force );
  if( outfd < 0 ) return 1;

  // vector of data blocks differing among the copies of the input file.
  std::vector< Block > block_vector;
  if( !copy_and_diff_file( infd_vector, outfd, block_vector ) )
    cleanup_and_fail( output_filename, outfd, 1 );

  if( block_vector.size() == 0 )
    { show_error( "Input files are identical. Recovery is not possible." );
      cleanup_and_fail( output_filename, outfd, 2 ); }

  const bool single_block = ( block_vector.size() == 1 );
  if( single_block && block_vector[0].size() < 2 )
    { show_error( "Input files have the same byte damaged."
                  " Try repairing one of them." );
      cleanup_and_fail( output_filename, outfd, 2 ); }

  if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX ||
      ( single_block &&
        ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) )
    { show_error( "Input files are too damaged. Recovery is not possible." );
      cleanup_and_fail( output_filename, outfd, 2 ); }

  const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 );
  if( single_block )
    {
    Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 );
    block_vector[0].size( 1 );
    block_vector.push_back( b );
    }

  const int base_variations = ipow( filenames.size(), block_vector.size() );
  const int variations = ( base_variations * shifts ) - 2;
  bool done = false;
  for( int var = 1; var <= variations; ++var )
    {
    if( verbosity >= 1 )
      {
      std::printf( "Trying variation %d of %d \r", var, variations );
      std::fflush( stdout );
      }
    int tmp = var;
    for( unsigned int i = 0; i < block_vector.size(); ++i )
      {
      const int infd = infd_vector[tmp % filenames.size()];
      tmp /= filenames.size();
      if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
          lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
          !copy_file( infd, outfd, block_vector[i].size() ) )
        { show_error( "Error reading output file", errno );
          cleanup_and_fail( output_filename, outfd, 1 ); }
      }
    if( lseek( outfd, 0, SEEK_SET ) < 0 )
      { show_error( "Seek error in output file", errno );
        cleanup_and_fail( output_filename, outfd, 1 ); }
    if( try_decompress( outfd, isize ) )
      { done = true; break; }
    if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] );
    }
  if( verbosity >= 1 ) std::printf( "\n" );

  if( close( outfd ) != 0 )
    {
    show_error( "Error closing output file", errno );
    cleanup_and_fail( output_filename, -1, 1 );
    }
  if( !done )
    {
    show_error( "Some error areas overlap. Can't recover input file." );
    cleanup_and_fail( output_filename, -1, 2 );
    }
  if( verbosity >= 1 )
    std::printf( "Input files merged successfully.\n" );
  return 0;
  }
