/*
	Copyright (C) 2005 Brian Gunlogson

	This program is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 2 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/

/*FIXME: The quality of the error return values is lacking. Make use of all the error return values.
    -> TODO: Either fix only the buggy error return values or implement error handling differently. There are many overlapping errors, hard to handle. Use C++ error class? Enum? exceptions?
IMPROVEMENT: Add to the archiver speciffic metadata helper functions in this class and maybe later break them off into another class
*/
#include <errno.h>
#include <string.h>
#include <stdio.h>

#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

#include <string>
#include <map>

#include "Archiver.h"
#include "flatcheck.h"
#include "sha1sum.h"

/*
	Constructor
		
	Arguments:
		read_callback - Read callback
    seek - Seek callback
*/
Archiver::Archiver(TArchiverReadCallback read_callback, TArchiverSeek seek)
{
  m_index = NULL;
  m_read = read_callback;
  m_seek = seek;
  m_current_file_number = 0;
  m_current_file_offset = 0;
  m_cached_metadata_number = 0;
}

Archiver::~Archiver()
{
	if(m_index)
		delete m_index;
}

/*
	Function:
		OpenArchive
		
	Arguments:
		use_index - Use an index?
    compression_level - Use compression?
	
	Returns:
    2 - Other error
		1 - Filesystem error
		0 - Success
    -1 - Unsupported archive version
    -2 - Archive integrity error or endianness check failed
    
	Remarks:
		Reads in archive header.
*/
int Archiver::OpenArchive(bool use_index)
{
	if(m_index)
  {
		delete m_index;
    m_index = NULL;
  }

  if(use_index)
  {
    m_index = new Index(m_read, m_seek);
    if(!m_index)
      return 2;
    /* REMARK: Was thinking about reading in index here so the user does not have to switch slices.
               The problem is that nothing is known about the archive at this point. */
  }

	archive_header hdr;
  if(!m_read((char *)&hdr, sizeof(hdr)))
    return 1;

  if(hdr.format_version > FORMAT_VERSION)
    return -1;

  if(hdr.endian_arch != 255)
    return -2;

  return 0;
}

/*
	Function:
		CloseArchive
		
	Arguments:
		None
	
	Returns:
		true - success
		false - failure
		
	Remarks:
		Closes archive
*/
bool Archiver::CloseArchive()
{
	if(m_index)
  {
		delete m_index;
    m_index = NULL;
  }

	return true;
}

/*
  Function:
    SeekFileNumber

  Arguments:
    file_number - Number of file to seek to
    force_index - Force use of index to speed up non-sequential seeking
    file_offset - pointer to store the file offset in

  Returns:
    1 - END OF ARCHIVE
    0 - Success, a successful seek to a place where the file number is assumed to be. No other verifications can be performed.
    -1 - User canceled seek or filesystem seek error.
    -2 - Unrecoverable error while seeking to point (common if a part of the archive is corrupt and the index is corrupt/missing too)
    -3 - Unknown error
    
  Remarks:
    Seeking backwards is extremely inefficent without force_index. Seeking forewards by small ammounts is reasonable.
    Seeking by number does not work when the archive is corrupt; And when the index is corrupt/missing.
    May be called externally for optimization (such as enabling the force_index).
    Also provides more detailed error messages.
*/
int Archiver::SeekFileNumber(u_int64_t file_number, bool force_index, off_t *file_offset)
{
  if(!file_number)
    return -3;

  m_current_file_name.erase();

  /* Check what file number is stored in the offset cache */
  if(m_current_file_number != file_number)
  {
    if(!force_index)
    {
      /* Check what file number is stored in the offset cache */
      if((!m_current_file_number) || (m_current_file_number > file_number))
      {
        /* Don't know where to start, so rewind to the beginning */
        m_current_file_number = 1;
        m_current_file_offset = 0;
      }
      //Step through the archive (without confirming checksums) to get to the desired file
      for(; m_current_file_number < file_number; m_current_file_number++)
      {
        std::string metadata;
        off_t entry_size;
        
        /* Read metadata from this entry */
        switch(GetFileMetadata(&metadata))
        {
          case 1:
            return 1;
          case 0:
            break;
          default:
            m_current_file_number = 0;
            return SeekFileNumber(file_number, true, file_offset);
        }
        
        switch(HelperGetEntrySize(metadata, &entry_size))
        {
          case 0: /* Success */
            break;
          case -1: /* End of archive */
            m_current_file_number = 0;
            return 1;
            break;
          case -2: /* GetEntrySize failure */
            m_current_file_number = 0;
            return SeekFileNumber(file_number, true, file_offset);
            break;
          default: /* Unknown error */
            m_current_file_number = 0;
            return -3;
            break;
        }
        
        /* Increase the offset by the length of this entry */
        m_current_file_offset += entry_size;
      }
    } else {
      if(!m_index)
        return -2;

      m_current_file_number = file_number;
      if(m_index->GetEntryOffsetByNumber(file_number, &m_current_file_offset))
      {
        m_current_file_number = 0;
        return -2;
      }
    }
  }

  if(file_offset)
    *file_offset = m_current_file_offset+sizeof(archive_header);

  return 0;
}

/*
  Function:
    Index_GetEntry

  Arguments:
    file_number - Number of file to get entry in index
    entry_offset - offset in index

  Returns:
    1 - No Index found
    0 - Success
    -1 - Index read error
    -2 - Corrupt index
    -3 - Other error
    
  Remarks:
*/
int Archiver::Index_GetEntry(u_int64_t file_number, off_t *entry_offset)
{
  if(!m_index)
    return -3;
  
  return m_index->GetEntryOffsetByNumber(file_number, entry_offset);
}

/*
  Function:
    FileMetadataByNumber

  Arguments:
    file_number - Number of file to seek to
    metadata - std::string object in which to return the metadata

  Returns:
    1 - EOA
    0 - Success
    -1 - Filename too long
    -2 - Filesystem read error
    -10 - SeekFileNumber failure
*/
int Archiver::FileMetadataByNumber(u_int64_t file_number, std::string *metadata)
{
  if(SeekFileNumber(file_number))
    return -10;

  return GetFileMetadata(metadata);
}

/* TODO: Move most of the code from FileDataByNumber to another function named something like GetFileData. Make this function like FileMetadataByNumber. */
/*
  Function:
    FileDataByNumber

  Arguments:
    file_number - Number of file to seek to
    write_callback - callback where buffers of data will be sent 
    callback_data - void pointer to be passed to callback

  Returns:
    2 - End of archive
    1 - Write callback returned false
    0 - Success, not checksummed though
    -1 - FileMetadataByNumber returned failure
    -2 - Read error reread the file with FileRawByNumber
    -3 - Corrupt data, decompression error
    -4 - Zstream error
    -5 - Other error
    
  Remarks:
    First calls FileMetadataByNumber to read the metadata (which will also seek to where we want to be).
    Then just starts reading until a filesystem read error or until the complete file has been submitted to the callback
*/
int Archiver::FileDataByNumber(u_int64_t file_number, TArchiverFileDataCallback write_callback, void *callback_data)
{
  std::string metadata;
  off_t file_size;

  if(FileMetadataByNumber(file_number, &metadata))
    return -1;
  
  switch(HelperGetFileSize(metadata, &file_size))
  {
    case 0:
      break;
    case -1:
      return 2;
      break;
    default:
      return -1;
  }
  
  if(!m_seek(sizeof(archive_header)+m_current_file_offset+metadata.length()+20, SEEK_SET)) /* Seek to the determined file data offset */
    return -1;
 
  FilterInBase *in_filter;
  
  if(*((const unsigned char *)metadata.data()) & 1)
  {
    //compressed
    in_filter = (FilterInBase *)new ZLibInFilter;
  }
  else
  {
    //not compressed
    in_filter = (FilterInBase *)new NullInFilter;
  }
  if(!in_filter)
    return -5;
  
  off_t file_position;
  for(file_position = 0; file_position < file_size; file_position += 1024)
  {
    char buffer[1024];
    std::string filtered_buf;
    size_t buf_size = MIN(1024, file_size-file_position);

    if(!m_read(buffer, buf_size))
    {
      in_filter->Reset();
      delete in_filter;
      return -2;
    }

    switch(in_filter->PutData(buffer, buf_size))
    {
      case 2:
        //Zstream error
        in_filter->Reset();
        delete in_filter;
        return -4;
      case 1:
        //Corrupt data
        //TODO: Offer to save to a file with a gz extension for restoration with gzip recovery tools
        in_filter->Reset();
        delete in_filter;
        return -3;
      case -1:
        in_filter->Reset();
        delete in_filter;
        return -5;
      case 0:
        if(!in_filter->RemainingData(filtered_buf))
        {
          in_filter->Reset();
          delete in_filter;
          return -4;
        }
        //Append data to write callback
        if(!write_callback(filtered_buf.data(), filtered_buf.length(), callback_data))
        {
          in_filter->Reset();
          delete in_filter;
          return 1;
        }
        break;
      default:
        in_filter->Reset();
        delete in_filter;
        return -5;
    }
  }

  in_filter->Reset();
  delete in_filter;
  return 0;
}

/*
  Function:
    MetaChecksumByNumber

  Arguments:
    file_number - Number of file to seek to
    checksum - unsigned char in which to store the checksum, must be at least 20 bytes

  Returns:
    0 - Success, not checksummed though
    -1 - SeekFileNumber returned failure
    -2 - Filesystem read error
    -3 - Other error
*/
int Archiver::MetaChecksumByNumber(u_int64_t file_number, unsigned char *checksum)
{
  if(SeekFileNumber(file_number))
    return -1;

  if(GetMetaChecksum(checksum))
    return -3;

  return 0;
}

/*
  Function:
    GetMetaChecksum

  Arguments:
    checksum - unsigned char in which to store the checksum, must be at least 20 bytes

  Returns:
    0 - Success
    -1 - GetFileMetadata returned failure
    -2 - HelperGetFileSize returned failure
    -3 - Filesystem seek/read error
*/
int Archiver::GetMetaChecksum(unsigned char *checksum)
{
  std::string metadata;

  if(GetFileMetadata(&metadata))
    return -1;

  if(!m_seek(sizeof(archive_header)+m_current_file_offset+metadata.length(), SEEK_SET)) /* Seek to the determined checksum data offset */
    return -3;

  //Read in the checksum
  if(!m_read((char *)checksum, 20))
    return -3;

  return 0;
}

/*
  Function:
    FileChecksumByNumber

  Arguments:
    file_number - Number of file to seek to
    checksum - unsigned char in which to store the checksum, must be at least 20 bytes

  Returns:
    0 - Success, not checksummed though
    -1 - SeekFileNumber returned failure
    -2 - Filesystem read error
    -3 - Other error
*/
int Archiver::FileChecksumByNumber(u_int64_t file_number, unsigned char *checksum)
{
  if(SeekFileNumber(file_number))
    return -1;

  if(GetFileChecksum(checksum))
    return -3;

  return 0;
}

/*
  Function:
    GetFileChecksum

  Arguments:
    checksum - unsigned char in which to store the checksum, must be at least 20 bytes

  Returns:
    0 - Success
    -1 - GetFileMetadata returned failure
    -2 - HelperGetFileSize returned failure
    -3 - Filesystem seek/read error
*/
int Archiver::GetFileChecksum(unsigned char *checksum)
{
  std::string metadata;
  off_t file_size;

  if(GetFileMetadata(&metadata))
    return -1;

  if(HelperGetFileSize(metadata, &file_size))
    return -2;

  if(!m_seek(sizeof(archive_header)+m_current_file_offset+metadata.length()+20+file_size, SEEK_SET)) /* Seek to the determined checksum data offset */
    return -3;

  //Read in the checksum
  if(!m_read((char *)checksum, 20))
    return -3;

  return 0;
}

/*
  Function:
    GetFileMetadata
  
  Returns:
    1 - EOA
    0 - Success
    -1 - Filename too long
    -2 - Filesystem read error

  Remarks:
    Internal function used by FileMetadataByNumber and other functions such as SeekFileNumber
    (m_current_file_number or m_current_file_name) and m_current_file_offset must be set
*/
int Archiver::GetFileMetadata(std::string *metadata)
{
  /* FIXME: This function is ineffecient */
  char buffer[sizeof(mode_t)+sizeof(uid_t)+sizeof(gid_t)+sizeof(dev_t)+sizeof(off_t)+sizeof(time_t)+sizeof(time_t)+sizeof(time_t)];
  unsigned int i;

  //First check if it is cached
  if((m_cached_metadata_number && (m_cached_metadata_number == m_current_file_number))||((!m_cached_metadata_filename.empty()) && (m_cached_metadata_filename == m_current_file_name)))
  {
    *metadata = m_cached_metadata;
    return 0;
  }

  //Then seek to the m_current_file_offset
  if(!m_seek(sizeof(archive_header)+m_current_file_offset, SEEK_SET)) /* Seek to the determined file offset */
    return -2;

  //First read the flag
  if(!m_read(buffer, sizeof(u_int8_t)))
    return -2;
  metadata->assign(buffer, sizeof(u_int8_t));

  u_int8_t flag = *((u_int8_t*)buffer);

  //Now check the flag for hardlink or end of archive indicators
  if(!(flag & 128))
  {
    //Allow max 4MB filenames
    //For file names seek 1 byte at a time to find null
    //  To prevent this, make available a callback function m_read_nonull that will read a buffer but stop at the first null and also not go past archive boundaries unless no null has been found so far.
    //Regular file
    for(i = 0; ; i++)
    {
      //Now read in the filename
      if(!m_read(buffer, 1))
        return -2;
      //Append it to the metadata
      metadata->append(buffer, 1);
      //Check for NULL
      if(!(*(unsigned char *)buffer))
        break;
      //If NULL is not hit by 4MB give up
      if(i == (1024*1024*4))
        return -1;
    }
    if(flag & 2)
    {
      //Hardlink
      for(i = 0; ; i++)
      {
        //Now read in the filename
        if(!m_read(buffer, 1))
          return -2;
        //Append it to the metadata
        metadata->append(buffer, 1);
        //Check for NULL
        if(!(*(unsigned char *)buffer))
          break;
        //If NULL is not hit by 4MB give up
        if(i == (1024*1024*4))
          return -1;
      }
    }

    //Now read in the rest of the metadata in one swipe
    if(!m_read(buffer, sizeof(mode_t)+sizeof(uid_t)+sizeof(gid_t)+sizeof(dev_t)+sizeof(off_t)+sizeof(time_t)+sizeof(time_t)+sizeof(time_t)))
      return -2;

    //Append it to the metadata
    metadata->append(buffer, sizeof(mode_t)+sizeof(uid_t)+sizeof(gid_t)+sizeof(dev_t)+sizeof(off_t)+sizeof(time_t)+sizeof(time_t)+sizeof(time_t));
  } else {
    return 1;
  }

  m_cached_metadata_filename.erase();
  m_cached_metadata_number = m_current_file_number;
  m_cached_metadata = *metadata;

  return 0;
}

/*
  Extracts the file size from the metadata. hardlink file_size is reported as 0
  Returns:
    0 - Success
    -1 - End of archive
    -2 - Other known failure
  NOTE: THIS RETURNS THE SIZE OF THE FILE AS STORED IN THE ARCHIVE WHICH CAN BE DIFFERENT FROM THE ACTUCAL FILE SIZE. THIS IS ESPECIALLY TRUE WHEN COMPRESSION IS ENABLED.
*/
int Archiver::HelperGetFileSize(std::string &metadata, off_t *file_size)
{
  *file_size = 0;

  if(metadata.length() < sizeof(u_int8_t))
    return -2;

  u_int8_t flags = *((u_int8_t *)metadata.data());

  if(flags&128)
  {
    //End of archive
    return -1;
  }
  else if(flags&2)
  {
    //Hardlink
    return 0;
  }
  
  //Find out filename length
  size_t filename_len = strnlen(metadata.c_str()+sizeof(u_int8_t), metadata.length()-sizeof(u_int8_t))+1;
  if(filename_len >= metadata.length()-sizeof(u_int8_t))
    return -2;
  
  //Make sure there is still enough data remaining
  if((metadata.length()-sizeof(u_int8_t)-filename_len) < sizeof(mode_t)+sizeof(uid_t)+sizeof(gid_t)+sizeof(dev_t)+sizeof(off_t))
    return -2;
  
  *file_size = *((off_t *)(metadata.data()+sizeof(u_int8_t)+filename_len+sizeof(mode_t)+sizeof(uid_t)+sizeof(gid_t)+sizeof(dev_t)));
  
  return 0;
}

/*
  Determines the size on an archive entry
  Return values are the same as for HelperGetFileSize
*/
int Archiver::HelperGetEntrySize(std::string &metadata, off_t *entry_size)
{
  int retval = HelperGetFileSize(metadata, entry_size);

  if(!(*entry_size))
    *entry_size += metadata.length()+20; /* Zero length files do not have file checksums */
  else
    *entry_size += metadata.length()+40;
  
  return retval;
}

/*
  Extracts the file name from the metadata.
  Returns:
    0 - Success
    -1 - End of archive
    -2 - Other known failure
*/
int Archiver::HelperGetFileName(std::string &metadata, std::string *file_name)
{
  *file_name = "";

  if(metadata.length() < sizeof(u_int8_t))
    return -2;

  u_int8_t flags = *((u_int8_t *)metadata.data());

  if(flags&128)
  {
    //End of archive
    return -1;
  }
  
  //Find out filename length
  size_t filename_len = strnlen(metadata.c_str()+sizeof(u_int8_t), metadata.length()-sizeof(u_int8_t))+1;
  if(filename_len >= metadata.length()-sizeof(u_int8_t))
    return -2;

  *file_name = metadata.c_str()+sizeof(u_int8_t);
  
  return 0;
}

/*
  Extracts the file mode, compression state, and hardlink state from the metadata.
  Returns:
    0 - Success
    -1 - End of archive
    -2 - Other known failure
*/
int Archiver::HelperGetMode(std::string &metadata, bool *is_hardlink, bool *is_compressed, mode_t *file_mode)
{
  *is_hardlink = false;
  *is_compressed = false;
  *file_mode = 0;

  if(metadata.length() < sizeof(u_int8_t))
    return -2;

  u_int8_t flags = *((u_int8_t *)metadata.data());

  if(flags&128)
    return -1; //End of archive

  if(flags&1)
    *is_compressed = true; //Compressed

  if(flags&2)
  {
    *is_hardlink = true; //Hardlink
    return 0;
  }

  //Find out filename length
  size_t filename_len = strnlen(metadata.c_str()+sizeof(u_int8_t), metadata.length()-sizeof(u_int8_t))+1;
  if(filename_len >= metadata.length()-sizeof(u_int8_t))
    return -2;
  
  //Make sure there is still enough data remaining
  if((metadata.length()-sizeof(u_int8_t)-filename_len) < sizeof(mode_t))
    return -2;
  
  *file_mode = *((mode_t *)(metadata.data()+sizeof(u_int8_t)+filename_len));
  
  return 0;
}

/*
	Function:
		PrintSettings
		
	Arguments:
		None
	
	Returns:
		Nothing
		
	Remarks:
		Prints the important class variables that can be set externally.
*/
void Archiver::PrintSettings()
{
}

