/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/

/*!
  \file binary_buffer.h
  \brief interface for decompression of a binary stream of data

  This class streamlines the reading of subsets of bits from a conceptual 
  stream of bits.  Some of this functionality is left over from the original 
  test mode of this library, when it directly read gzipped files and 
  decompressed them.  Such deprecated regions will be noted in comments.
 */
#ifndef __GENEZIP__BINARY_BUFFER_H__
#define __GENEZIP__BINARY_BUFFER_H__

#include <string>
#include <fstream>
#include <stdexcept>
#include <iostream>
#include "genezip/helper_functions.h"
//! namespace for all backend code for this library; generally,
//! this code should not be accessed directly by users
namespace genezip_utils {
  /*!
    \class binary_buffer
    \brief pull bits from a conceptual stream of compressed bits
  */
  class binary_buffer {
  public:
    /*!
      \brief default constructor
    */
    binary_buffer() 
      : input_vector(0), 
      _last_read_success(0),
      _current_byte(0),
      _current_bit(0) {}
    /*!
      \brief constructor: specify desired vector of compressed data (bits)
      @param intermediate_data_holder vector containing compressed data
    
      Note that bits are probably sufficient within the sizeof(unsigned)==4
      bound assumed later on, as we're only going to be providing single SNPs
      at a time to a buffer
    */
  binary_buffer(const std::vector<bool> &intermediate_data_holder) 
    : input_vector(&intermediate_data_holder),
      _last_read_success(0),
      _current_byte(0),
      _current_bit(0) {
      _last_read_success = intermediate_data_holder.size();
    }
    /*!
      \brief destructor
    */
    ~binary_buffer() throw() {}
    /*!
      \brief read the next bit from the stream, using a slightly modified 
      version of the insane gzip format
      \return next bit within the stream (see below)
    
      gzip formats its data strangely.  It stores bits as canonical bytes, and
      assumes that the bits are packed from LSB to MSB within a given byte
      (but that bytes proceed from beginning to end of file).  Within such a 
      stream of bits, Huffman codes are packed from MSB to LSB (preserving 
      the prefix property within the encountered stream), whereas all other
      data types (including bits to arithmetically add
      to decoded Huffman codes) are stored LSB to MSB within the stream.

      Since this stream consists of bits, I have eliminated the byte LSB->MSB 
      progression, such that the overall stream progresses simply from 
      beginning to end of the vector.  The Huffman code/everything else
      alignment within the emerging stream is identical to that in gzip.
    */
    inline bool read_bit() {
      if (!input_vector) 
	throw std::domain_error("binary_buffer::read_bit: null vector");
      if (_current_bit >= 8) {
	++_current_byte;
	_current_bit = 0;
      }
      if (((_current_byte << 3) + _current_bit) >= _last_read_success) {
	throw std::domain_error("binary_buffer::read_bit: read called "
				"beyond bound of available data: "
				+ to_string<unsigned>(_current_byte) 
				+ " " 
				+ to_string<unsigned>(_last_read_success));
      }
      return input_vector->at((_current_byte << 3) + (_current_bit++));
    }
    /*!
      \brief read some number of bytes, then some number of bits,
      from a binary stream
      @param nbytes the number of bytes to return
      @param nbits the number of bits to return
      @param raw whether the bits encountered should be flipped
      (ABCD -> DCBA) before returning
      \return an unsigned int containing "right justified" (8*nbytes+nbits) 
      bits requested
      \warning if 8*nbytes+nbits>8*sizeof(unsigned), extra bits will be 
      discarded (starting with the first encountered)
    */
    unsigned read(unsigned nbytes, unsigned nbits, bool raw = false);
  private:
    /*!
      \var input_vector
      \brief (constant) pointer to binary vector of compressed data; 
      currently supplied to constructor
    */
    const std::vector<bool> *input_vector;
    /*!
      \var _last_read_success
      \brief size (in bits) of stored data.  Remnant of previous build, 
      and will be eventually discarded in favor of input_vector->size()
    */
    unsigned _last_read_success;
    /*!
      \var _current_byte
      \brief floor(read_position / 8)
    */
    unsigned _current_byte;
    /*!
      \brief read_position % 8
    */
    unsigned _current_bit;
  };
}
#endif //__BINARY_BUFFER_H__
