/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/


/*!
  \file gzfile_compress.h
  \brief "gzip" compress a vector of unsigned ints (on [0,1000] for the moment)
 */
#ifndef __GENEZIP__GZFILE_COMPRESS_H__
#define __GENEZIP__GZFILE_COMPRESS_H__

#include <string>
#include <stdexcept>
#include "genezip/uncompressed_buffer.h"
#include "genezip/custom_hash_function.h"
#include "genezip/helper_functions.h"
#include "genezip/huffman_code.h"
#include "genezip/huffman_encoded_stream_generator.h"
#include "genezip/vectorlist.h"
#include "genezip/suffix_tree.h"
namespace genezip_utils {
  //! \class compression_state
  //! state object describing a compressed dataset
  class compression_state {
  public:
    //! constructor: set number of bits required per literal for
    //! suffix tree
    //! @param bits_per_element number of bits required to represent
    //! max literal handled by suffix tree
  compression_state(unsigned bits_per_element) 
    : skip_counter(0),
      tree_vec_counter(0),
      stree(NULL),
      previous_literal(0),
      waiting(false),
      pending_match(false),
      buffer(65536),
      _bits_per_element(bits_per_element) {
	stree = new suffix_tree(static_cast<unsigned>
				(pow(2, _bits_per_element) + 0.5));
	stree->set_raw_data_buffer(&buffer);
      }
    //! destructor: clear out internally allocated suffix tree
    ~compression_state() throw() {clear(false);}

    unsigned skip_counter; //!< remaining number of positions to skip for MATCH
    unsigned tree_vec_counter; //!< current search start position
    suffix_tree *stree; //!< current allocated suffix tree
    vectorlist_search_result previous_res; //!< previous search result
    unsigned previous_literal; //!< literal at previous search
    bool waiting; //!< verbose compression output (not recommended)
    bool pending_match; //!< whether a match was at the last search
    suffix_tree_buffer buffer; //!< circular buffer for rev. reference search

    //! reset the compression state to null values
    //! @param reset if true, allocate a new suffix tree instead of nullifying
    void clear(bool reset = true) {
      skip_counter = tree_vec_counter = previous_literal = 0;
      waiting = pending_match = false;
      previous_res.nullify();

      if (stree) {
	delete stree;
	stree = NULL;
      }
      if (reset) {
	stree = new suffix_tree(static_cast<unsigned>
				(pow(2, _bits_per_element) + 0.5));
	stree->set_raw_data_buffer(&buffer);
      }
      buffer.clear();
    }
  private:
    //! copy constructor not permitted
    //! @param other existing file compression state
    //! \warning DO NOT USE
    compression_state(const compression_state &other) 
      : buffer(1) {
      throw std::domain_error("compression_state: copy "
			      "constructor invalid");
    }
    //! assignment operator not permitted
    //! @param other existing file compression state
    //! \warning DO NOT USE
    compression_state &operator=(const compression_state &other) {
      throw std::domain_error("compression_state: assignment "
			      "constructor invalid");
    }
    //! default constructor not permitted
    //! \warning DO NOT USE
    compression_state()
      : buffer(1) {
      throw std::domain_error("compression_state: "
			      "default constructor "
			      "not permitted");
    }
    unsigned _bits_per_element; //! number of bits per literal in suffix tree
  };
  /*!
    \brief compress a vector of unsigned ints (on [0,1000] for the moment)
  
    Uses gzip/DEFLATE-style compression (reverse references and huffman codes),
    but with a modified definition of a "literal" (gzip definition: ASCII
    unsigned char on [0,255];
    new definition: unsigned int on [0,[1,2]000])
  */
  class gzfile_compress {
  public:
    //! constructor: set number of bits used to store max literal
    //! @param bits_per_element number of bits used to store max literal
    gzfile_compress(unsigned bits_per_element)
      : _cstate(bits_per_element) {}
    /*!
      \brief destructor
    */
    ~gzfile_compress() throw() {}
    /*!
      \brief compress a vector of literals (unsigned) into gzip format
      @tparam vector_type type of vector from which to read data
      @param input_vector vector of uncompressed literals
      @param intermediate_data_holder target vector for resulting binary stream
      @param litlen_code huffman code for literals and match lengths;
      will be generated by this method
      @param distance_code huffman code for match offsets; will be generated
      by this method
      \warning flagged for possible removal in later versions
    */
    template <class vector_type>
      inline void compress_from_vector
      (vector_type &input_vector,
       std::vector<bool> &intermediate_data_holder,
       huffman_code &litlen_code,
       huffman_code &distance_code) {
      uncompressed_buffer buffer(input_vector);
      compress_from_vector(buffer, intermediate_data_holder,
			   litlen_code, distance_code);
    }
    //! from current state, compress a literal
    //! @param value the literal to compress
    //! @param intermediate_data_holder destination for compressed bits
    //! @param litlen_code where generated literal/length huffman code
    //!        should be stored
    //! @param distance_code where generated distance huffman code
    //!        should be stored
    void compress_current_value(unsigned value,
				std::vector<bool> &intermediate_data_holder,
				huffman_code &litlen_code,
				huffman_code &distance_code);
    //! trigger the compressed object to flush remaining bits for end of stream
    //! @param intermediate_data_holder destination for compressed bits
    //! @param litlen_code where generated literal/length huffman code
    //!        should be stored
    //! @param distance_code where generated distance huffman code
    //!        should be stored
    void finish_value_stream(std::vector<bool> &intermediate_data_holder,
			     huffman_code & litlen_code,
			     huffman_code & distance_code);
    //! clear internal compression state
    void clear() {
      _generator.clear();
      _cstate.clear();
    }
  private:
    /*!
      \brief constructor
    */
    gzfile_compress() 
      : _cstate(1) {
      throw std::domain_error("gzfile_compress: default constructor invalid");
    }
    /*!
      \brief compress a buffer wrapper of data into gzip format
      @param buffer wrapper on vector of uncompressed literals
      @param intermediate_data_holder target vector for resulting binary stream
      @param litlen_code huffman code for literals and match lengths;
      will be generated by this method
      @param distance_code huffman code for match offsets; will be generated
      by this method
      \warning flagged for possible removal in later versions
    */
    void compress_from_vector(uncompressed_buffer &buffer,
			      std::vector<bool> &intermediate_data_holder,
			      huffman_code &litlen_code,
			      huffman_code &distance_code);
    /*!
      \brief take a token and hand it off to the stream generator; 
      possibly print the token in legible format to cout
      @param res a token generated by compress_from_vector
      @param show_value whether the token should be furthermore verbosely 
      printed to cout
    */
    void emit_result(const vectorlist_search_result &res, bool show_value);
    /*!
      \var _generator
      \brief the stream generator, which converts tokens from 
      compress_from_vector to binary values using two huffman codes
    */
    huffman_encoded_stream_generator _generator;
    compression_state _cstate; //!< current compression state
  };
}
#endif //__GZFILE_COMPRESS_H__
