/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/

/*!
  \file huffman_encoded_stream_generator.h
  \brief collect some gzip tokens, and encode them using huffman codes
 */
#ifndef __GENEZIP__HUFFMAN_ENCODED_STREAM_GENERATOR_H__
#define __GENEZIP__HUFFMAN_ENCODED_STREAM_GENERATOR_H__

#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <map>
#include <utility>
#include <stdexcept>
#include "genezip/huffman_code.h"
#include "genezip/vectorlist.h"
#include "genezip/helper_functions.h"
namespace genezip_utils {
  /*!
    \class huffman_token
    \brief a token representing a reverse reference or a literal
  */
  class huffman_token {
  public:
    /*!
      \enum HUFFMAN_TOKEN_TYPE
      \brief flag associated with a given token
    */
    typedef enum {
      LENGTH, DISTANCE, LITERAL, NONE
    } HUFFMAN_TOKEN_TYPE;
    /*!
      \brief constructor
    */
    huffman_token()
      : _value_to_huffman(GENEZIP_LITERAL_VALUE_UPPER_BOUND + 30),
      _type(NONE),
      _additional_bits(0),
      _nbits(0) {}
    /*!
      \brief copy constructor
      @param other existing token
    */
    huffman_token(const huffman_token &other) 
      : _value_to_huffman(other._value_to_huffman),
      _type(other._type),
      _additional_bits(other._additional_bits),
      _nbits(other._nbits) {}
    /*!
      \brief destructor
    */
    ~huffman_token() throw() {}
    /*!
      \brief set the type flag of the token
      @param type new type flag for this token
    */
    inline void set_type(HUFFMAN_TOKEN_TYPE type) {_type = type;}
    /*!
      \brief get the type flag of the token
      \return the type flag of the token
    */
    inline HUFFMAN_TOKEN_TYPE get_type() const {return _type;}
    /*!
      \brief set the lit/length/distance code associated with this token
      @param value new code for this token
    */
    inline void set_value(unsigned value) {_value_to_huffman = value;}
    /*!
      \brief get the lit/length/distance code associated with this token
      \return current code for this token
    */
    inline unsigned get_value() const {return _value_to_huffman;}
    /*!
      \brief set additional bits associated with length or distance code
      @param additional_data pair of (additional bits, number of bits) to be
      associated with this token
    */
    void set_additional(const std::pair<unsigned, unsigned> &additional_data) {
      _additional_bits = additional_data.first;
      _nbits = additional_data.second;
    }
    /*!
      \brief get additional bits associated with length or distance code
      \return additional bits associated with this token
    */
    inline unsigned get_additional_content() const {return _additional_bits;}
    /*!
      \brief get the number of additional bits associated with this token
      \return the number of bits associated with this token
    */
    inline unsigned get_number_additional_bits() const {return _nbits;}
    /*!
      \brief clear internal data
    */
    inline void clear() {
      _value_to_huffman = _additional_bits = _nbits = 0;
      _type = NONE;
    }
  private:
    unsigned _value_to_huffman; //!< input to huffman coder
    HUFFMAN_TOKEN_TYPE _type; //!< interpretation of input value
    unsigned _additional_bits; //!< input literal bits to be added to code
    unsigned _nbits; //!< number of literal bits to be added
  };
  /*!
    \class huffman_encoded_stream_generator
    \brief collect a vector of tokens, encode them using a huffman code based on the frequency of observed tokens
  */
  class huffman_encoded_stream_generator {
  public:
    /*!
      \brief constructor
    */
    huffman_encoded_stream_generator() {
      _lengthlit_counts.resize(GENEZIP_LITERAL_VALUE_UPPER_BOUND + 30, 0);
      _distance_counts.resize(GENEZIP_MAX_OFFSET_POINTER + 1, 0);
    }
    /*!
      \brief copy constructor
      @param other existing object
    */
    huffman_encoded_stream_generator(const huffman_encoded_stream_generator &
				     other)
      : _tokens(other._tokens),
      _lengthlit_counts(other._lengthlit_counts),
      _distance_counts(other._distance_counts) {}
    /*!
      \brief destructor
    */
    ~huffman_encoded_stream_generator() throw() {}
    /*!
      \brief add a token to this object; eventually will be compressed
      @param token token to be added to this object
    */
    void add_token(const vectorlist_search_result &token);
    /*!
      \brief compress the current vector of tokens and write it to target
      vector
      @param target destination of compressed data
      @param litlen_code destination of generated literal/length huffman code
      (in other words, the code is created here and stored for later use)
      @param distance_code destination of generated match offset huffman code
      (in other words, the code is created here and stored for later use)
    */
    void flush_data(std::vector<bool> &target,
		    huffman_code      &litlen_code,
		    huffman_code      &distance_code);
    //! clear out internal data state to default values
    void clear() {
      _tokens.clear();
      for (std::vector<unsigned>::iterator iter = _lengthlit_counts.begin();
	   iter != _lengthlit_counts.end(); ++iter) *iter = 0;
      for (std::vector<unsigned>::iterator iter = _distance_counts.begin();
	   iter != _distance_counts.end(); ++iter) *iter = 0;
    }
  private:
    /*!
      \brief add a huffman code and any additional bits to a bit vector
      @param value huffman code to be added
      @param value_length number of bits in the huffman code being added
      @param additional any additional bits to be added to the bit vector
      @param number of additional bits to be added (can be 0)
      @param target destination of compressed data
    */
    void add_value_to_vector(unsigned           value,
			     unsigned           value_length,
			     unsigned           additional,
			     unsigned           additional_bits, 
			     std::vector<bool> &target);
    /*!
      \brief encode a literal or match length into a gzip code and some 
      additional bits
      @param true_length the actual literal/length
      \return (code, (additional_bits, number_of_additional_bits))

      Note that this encoding is rather complicated.  The gzip encoding is
      unusual (http://tools.ietf.org/html/rfc1951); the implementation is,
      as always, my own.
    */
    std::pair<unsigned, 
      std::pair<unsigned, unsigned> > encode_length(unsigned true_length) 
      const;
    /*!
      \brief encode a distance into a gzip code and some additional bits
      @param true_distance the actual match offset
      \return (code, (additional_bits, number_of_additional_bits))

      Note that this encoding is rather complicated.  The gzip encoding is
      unusual (http://tools.ietf.org/html/rfc1951); the implementation is,
      as always, my own.
    */
    std::pair<unsigned, 
      std::pair<unsigned, unsigned> > encode_distance(unsigned true_distance)
      const;
    std::vector<huffman_token> _tokens; //!< sequence of input tokens
    std::vector<unsigned> _lengthlit_counts; //!< counts of length/literal vals
    std::vector<unsigned> _distance_counts; //!< counts of distance vals
  };
}
#endif //__HUFFMAN_ENCODED_STREAM_GENERATOR_H__
