/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/

#ifndef __SUFFIX_TREE_H__
#define __SUFFIX_TREE_H__

#include <vector>
#include <utility>
#include <queue>
#include <stdexcept>
#include <map>

#include "genezip/defines.h"
#include "genezip/uncompressed_buffer.h"

namespace genezip_utils {
  //! data structure internal to suffix tree.
  //! holds stored suffix tree values, allows
  //! insert/delete in constant time without
  //! memory resizing.
  class suffix_tree_buffer {
  public:
    //! constructor: initialize to max permissible size
    //! @param max_size maximum number of elements to be stored
    suffix_tree_buffer(unsigned max_size)
      : _min_pos(0),
      _n_contained(0),
      _nclears(0) {
      _data.resize(max_size, 0);
    }
    ~suffix_tree_buffer() throw() {}
    //! add a value to the buffer.  Expands buffer if below max size;
    //! otherwise, deletes oldest value
    //! @param val element to be added to tree
    void add(unsigned val) {
      if (_n_contained == _data.size()) {
	_data.at(_min_pos++) = val;
	if (_min_pos == _data.size()) {
	  _min_pos = 0;
	  ++_nclears;
	}
      } else {
	_data.at(_n_contained++) = val;
      }
    }
    //! get a value from the tree, at a particular index
    //! \warning index is calculated mod max size of tree; the
    //! index should thus be the index of the element in the original
    //! sequence of elements added to the tree.
    //! @param index location of element to be extracted
    //! \return requested element in tree
    unsigned at(unsigned index) const {
      return _data.at(index % _data.size());
    }
    //! "delete" the elements in the buffer
    //! \warning constant time operation; doesn't actually clear the buffer
    void clear() {
      _min_pos = _n_contained = 0;
    }
    //! get the position in the original sequence of elements added
    //! of the most recently added element
    //! index of most recently added element
    unsigned size() const {
      if (_n_contained == _data.size())
	return (_nclears+1) * _data.size() + _min_pos;
      else
	return _n_contained;
    }
  private:
    //! default constructor
    //! \warning blocked from use
    suffix_tree_buffer() {
      throw std::domain_error("genezip_utils::suffix_tree_buffer: "
			      "default constructor not permitted");
    }
    std::vector<unsigned> _data; //!< buffered data
    unsigned _min_pos; //!< current index of cyclic buffer array start
    unsigned _n_contained; //!< current add index in buffer
    unsigned _nclears; //!< number of times the add index has cycled
  };

  class suffix_tree_node;

  //! base class of suffix tree node child sets
  //! \warning virtual
  class child_set_type {
  public:
    //! default constructor: set base class elements to null values
  child_set_type()
    : _language_size(0),
      _end(NULL) {}
    //! base case virtual destructor
    virtual ~child_set_type() throw() {}
    //! base virtual resize declaration.
    //! set the number of nodes to be tracked.  if new size is less
    //! than current, MAY delete nodes beyond the new size bound.
    //! @param size number of nodes to be tracked
    //! \warning may throw std::bad_alloc
    virtual void resize(unsigned size) = 0;
    //! base virtual find_child declaration.
    //! find a node based on a hash, hand a pointer to the node
    //! to the caller.  in this context, hash is the first sequence value
    //! at the sequence starting at the index in the node.
    //! @param code hash corresponding to desired node
    //! @param ptr reference to pointer; on search success, will point
    //! to located node
    //! \return whether a child was found
    virtual bool find_child(unsigned code, suffix_tree_node *&ptr) const = 0;
    //! base virtual add_child declaration.
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, do not overwrite.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added
    //! \return whether a child was already tracked with the given hash.
    //! \sa find_child
    virtual bool add_child(unsigned code, suffix_tree_node *ptr) = 0;
    //! base virtual force_add_child declaration.
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, overwrite it.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added 
    //! \return whether a child was already tracked with the given hash.
    //! \sa add_child
    //! \sa find_child
    virtual bool force_add_child(unsigned code, suffix_tree_node *ptr) = 0;
    //! base virtual remove_child declaration.
    //! stop tracking a child with a given hash
    //! @param code hash corresponding to child to be untracked
    //! \return whether a child was already tracked with the given hash
    //! \warning this operation is not guaranteed to be constant time
    //! \sa find_child
    virtual bool remove_child(unsigned code) = 0;
    //! base virtual iterate_initialize declaration.
    //! set current iterator to (stored data)->begin();
    //! required for iterate_children() to actually iterate across
    //! the entire set of tracked children
    //! \sa iterate_children
    virtual void iterate_initialize() = 0;
    //! base virtual iterate_children declaration.
    //! get the next currently-tracked child (or NULL)
    //! \return pointer to next child tracked, or NULL
    //! \warning NULL corresponds to (stored_data)->end() result,
    //! marking the end of iteration
    virtual suffix_tree_node *iterate_children() = 0;
    //! base virtual clear_children declaration.
    //! clear out all existing children
    virtual void clear_children() = 0;
    //! base virtual consistency_check declaration.
    //! try to check whether the node data correspond correctly
    //! to an input dataset.
    //! @param d pointer to input dataset
    //! \warning not guaranteed to find all possible errors.
    //! mainly checks that hash corresponds to element at the
    //! node's index
    virtual void consistency_check(uncompressed_buffer *d) = 0;
    //! get the amount of memory used by this base class, in bytes
    //! \return memory of two unsigneds
    virtual unsigned memory_used() const {
      return 2 * sizeof(unsigned);
    }
    //! set the language size of hashes tracked by this class
    //! @param language_size number of possible values for hash used
    //! \warning for later error checking
    void set_language_size(unsigned language_size) {
      _language_size = language_size;
    }
    //! get the language size of hashes tracked by this class
    //! \return number of possible values for hash used
    //! \warning for later error checking
    unsigned get_language_size() const {return _language_size;}
    //! get the special "end" node tracked by all child_set_types
    //! \return "end" node for this set of children, or NULL if not present
    //! \warning not covered by iteration!  must be checked separately!
    suffix_tree_node *end() const {return _end;}
    //! set the special "end" node tracked by all child_set_types
    //! @param ptr new "end" node for this set of children, or NULL to clear
    void end(suffix_tree_node *ptr) {_end = ptr;}
    //! nullify the current value for the special "end" node
    void clear_end() {_end = NULL;}

  private:
    unsigned _language_size; //!< number of possible values for hashes
    suffix_tree_node *_end; //!< special "end" node tracked by all sets
  };

  //! child_set_type implementation with vector storage
  //! provides constant time insertion/deletion; memory and iteration
  //! linear in number of possible hash values
  class child_vector : public child_set_type {
  public:
    //! STL format typedef for child node pointer container class
    typedef std::vector<suffix_tree_node *> container_type;
    //! default constructor: invoke base constructor only
  child_vector()
    : child_set_type() {}
    //! destructor: nothing explicitly allocated or managed
    ~child_vector() throw() {}
    //! set the number of nodes to be tracked.  if new size is less
    //! than current, deletes nodes beyond the new size bound.
    //! @param size number of nodes to be tracked
    //! \warning may throw std::bad_alloc
    virtual void resize(unsigned size) {_data.resize(size, NULL);}
    //! find a node based on a hash, hand a pointer to the node
    //! to the caller.  in this context, hash is the first sequence value
    //! at the sequence starting at the index in the node.  Constant time.
    //! @param code hash corresponding to desired node
    //! @param ptr reference to pointer; on search success, will point
    //! to located node
    //! \return whether a child was found
    virtual bool find_child(unsigned code, suffix_tree_node *&ptr) const;
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, do not overwrite.  Constant time.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added
    //! \return whether a child was already tracked with the given hash.
    //! \sa find_child
    virtual bool add_child(unsigned code, suffix_tree_node *ptr);
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, overwrite it.  Constant time.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added 
    //! \return whether a child was already tracked with the given hash.
    //! \sa add_child
    //! \sa find_child
    virtual bool force_add_child(unsigned code, suffix_tree_node *ptr);
    //! stop tracking a child with a given hash.  Constant time.
    //! @param code hash corresponding to child to be untracked
    //! \return whether a child was already tracked with the given hash
    //! \sa find_child
    virtual bool remove_child(unsigned code);
    //! set current iterator to (stored data)->begin();
    //! required for iterate_children() to actually iterate across
    //! the entire set of tracked children
    //! \sa iterate_children
    virtual void iterate_initialize();
    //! get the next currently-tracked child (or NULL).  Silently
    //! skips over empty entries in storage vector.
    //! \return pointer to next child tracked, or NULL
    //! \warning NULL corresponds to (stored_data)->end() result,
    //! marking the end of iteration
    virtual suffix_tree_node *iterate_children();
    //! try to check whether the node data correspond correctly
    //! to an input dataset.
    //! @param d pointer to input dataset
    //! \warning not guaranteed to find all possible errors.
    //! mainly checks that hash corresponds to element at the
    //! node's index
    virtual void consistency_check(uncompressed_buffer *d);
    //! clear out all existing children
    //! \warning linear in number of possible hashes
    //! \warning DOES NOT AFFECT end()
    virtual void clear_children() {
      for (container_type::iterator iter = _data.begin();
	   iter != _data.end(); ++iter) *iter = NULL;
      _data_current_access = _data.begin();
    }
    //! get the amount of memory used by this class, in bytes
    //! \return class size, plus size of alphabet, plus a single
    //! iterator, in bytes
    virtual unsigned memory_used() const {
      return (_data.size()+2)*sizeof(unsigned) + 
	sizeof(container_type::const_iterator);
    }
  private:
    container_type _data; //!< vector of child pointers
    container_type::const_iterator _data_current_access; //!< current iterator
  };
  //! child_set_type implementation with map storage
  //! provides log time insertion/deletion; memory and iteration
  //! linear in number of tracked hash values
  class child_map : public child_set_type {
  public:
    //! STL format typedef for child node pointer container class
    typedef std::map<unsigned, suffix_tree_node *> container_type;
    //! default constructor: invoke base constructor only
  child_map()
    : child_set_type() {}
    //! destructor: nothing explicitly allocated or managed
    ~child_map() throw() {}
    //! this method does literally nothing for map sets.
    //! here only for compatibility.
    //! @param size number of nodes to be tracked
    //! \warning will do absolutely nothing.
    virtual void resize(unsigned size) {}
    //! find a node based on a hash, hand a pointer to the node
    //! to the caller.  in this context, hash is the first sequence value
    //! at the sequence starting at the index in the node.  Log time.
    //! @param code hash corresponding to desired node
    //! @param ptr reference to pointer; on search success, will point
    //! to located node
    //! \return whether a child was found
    virtual bool find_child(unsigned code, suffix_tree_node *&ptr) const;
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, do not overwrite.  Log time.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added
    //! \return whether a child was already tracked with the given hash.
    //! \sa find_child
    virtual bool add_child(unsigned code, suffix_tree_node *ptr);
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, overwrite it.  Log time.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added 
    //! \return whether a child was already tracked with the given hash.
    //! \sa add_child
    //! \sa find_child
    virtual bool force_add_child(unsigned code, suffix_tree_node *ptr);
    //! stop tracking a child with a given hash.  Potentially very costly
    //! with map implementations, should be avoided.
    //! @param code hash corresponding to child to be untracked
    //! \return whether a child was already tracked with the given hash
    //! \sa find_child
    virtual bool remove_child(unsigned code);
    //! set current iterator to (stored data)->begin();
    //! required for iterate_children() to actually iterate across
    //! the entire set of tracked children
    //! \sa iterate_children
    virtual void iterate_initialize();
    //! get the next currently-tracked child (or NULL)
    //! \return pointer to next child tracked, or NULL
    //! \warning NULL corresponds to (stored_data)->end() result,
    //! marking the end of iteration
    virtual suffix_tree_node *iterate_children();
    //! try to check whether the node data correspond correctly
    //! to an input dataset.
    //! @param d pointer to input dataset
    //! \warning not guaranteed to find all possible errors.
    //! mainly checks that hash corresponds to element at the
    //! node's index
    virtual void consistency_check(uncompressed_buffer *d);
    //! clear out all existing children
    //! \warning linear in number of possible hashes
    //! \warning DOES NOT AFFECT end() from base class
    virtual void clear_children() {
      _data.clear();
      _data_current_access = _data.begin();
    }
    //! get the amount of memory used by this class, in bytes
    //! \return class size, plus number of tracked children, plus a single
    //! iterator, in bytes
    virtual unsigned memory_used() const {
      return (_data.size()+2)*sizeof(unsigned) +
	sizeof(container_type::const_iterator);
    }
  private:
    container_type _data; //!< map of child pointers
    container_type::const_iterator _data_current_access; //!< current iterator
  };
  //! a node in the suffix tree
  class suffix_tree_node {
  public:
    //! default constructor: set null parent pointer, labels, child
    //! tracker, internals
  suffix_tree_node()
    : _parent(NULL),
      _node_label(std::pair<unsigned, unsigned>(0, 0)),
      _children(NULL),
      _is_end(false),
      _number_children(0) {}
    //! destructor: delete child tracker
    ~suffix_tree_node() throw() {if (_children) delete _children;}
    //! launch a (partial) consistency check on the tracked children
    //! d buffer of data used to generate current suffix tree
    //! \warning will throw std::domain_error if error encountered
    void scan_children(uncompressed_buffer *d) {
      _children->consistency_check(d);
    }
    //! update "latest encountered" tracking for this node by
    //! checking if a provided value is greater (more recent) than
    //! the currently stored index; update if needed, to indicate
    //! the latest added child of this node.  Used to calculate length.
    //! @param start index (of child)
    void update_maximal_child(unsigned start) {
      if (start > _node_label.second)
	_node_label.second = start;
    }
    //! set the start position of this node, give it a parent,
    //! and inform the parent of this new child
    //! @param start new starting index for this node
    //! @param parent new parent for this node
    //! \warning this method does not cause the parent to track this child node
    void set_start_and_parent(unsigned start, suffix_tree_node *parent) {
      _parent = parent;
      _node_label.first = start;
      if (_parent) {
	_parent->update_maximal_child(start);
      } else if (_node_label.first) {
	throw std::domain_error("suffix_tree_node parent check: null parent "
				"but nonzero start site");
      }
    }
    //! update the start index of this node; do nothing involving the parent
    //! @param start new starting index for this node
    void set_start_only(unsigned start) {
      _node_label.first = start;
    }
    //! give this node a parent, and inform the parent of this new child
    //! @param parent new parent for this node
    void set_and_update_parent(suffix_tree_node *parent) {
      _parent = parent;
      if (_parent) _parent->update_maximal_child(_node_label.first);
    }
    //! set defaults to this node, such that it looks like a root node
    void root_default() {
      _node_label.first = _node_label.second = 0;
      _parent = NULL;
    }
    //! get the currently stored parent for this node
    //! \return the currently stored parent for this node
    suffix_tree_node *get_parent() const {return _parent;}
    //! get the start index of this node
    //! \return the start index of this node
    unsigned get_start() const {return _node_label.first;}
    //! get the length of this node (distance between node start and latest
    //! child node)
    unsigned get_length() const {
      if (!_node_label.second) return 0;
      if (_node_label.second < _node_label.first) {
	throw std::domain_error("suffix_tree_node::get_length: length "
				"relation error: start=\"" + 
				genezip_utils::to_string<unsigned>
				(_node_label.first) + 
				"\", latest child=\"" + 
				genezip_utils::to_string<unsigned>
				(_node_label.second) + "\"");
      }
      return _node_label.second - _node_label.first;
    }
    //! get the number of children (outdegree) of this node
    //! \return the number of children (outdegree) of this node
    unsigned number_children() const {return _number_children;}
    //! make the child tracker of this node use map storage
    //! \warning WILL DELETE ANY CURRENT TRACKER AND NOT COPY ITS CONTENTS
    void enable_map_handler() {
      if (_children) delete _children;
      _children = new child_map;
    }
    //! make the child tracker of this node use vector storage
    //! @param preallocation_size number of possible hashes to the tracker
    //! \warning WILL DELETE ANY CURRENT TRACKER AND NOT COPY ITS CONTENTS
    void enable_vector_handler(unsigned preallocation_size) {
      if (_children) delete _children;
      _children = new child_vector;
      _children->resize(preallocation_size);
    }
    //! set the language size, i.e. the number of possible hashes for children
    //! @param size the number of possible hashes for children
    void set_language_size(unsigned size) {
      if (_children) _children->set_language_size(size);
    }
    //! get the language size, i.e. the number of possible hashes for children
    //! \return the number of possible hashes for children
    unsigned get_language_size() const {
      return _children ? _children->get_language_size() : 0;
    }
    //! find a node based on a hash, hand a pointer to the node
    //! to the caller.  in this context, hash is the first sequence value
    //! at the sequence starting at the index in the node.  Asymptotic
    //! performance depends on type of child tracking class used.
    //! @param code hash corresponding to desired node
    //! @param ptr reference to pointer; on search success, will point
    //! to located node
    //! \return whether a child was found
    bool find_child(unsigned code, suffix_tree_node *&ptr) {
      if (_children)
	_children->find_child(code, ptr);
      else
	ptr = NULL;
      return ptr;
    }
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, do not overwrite.  Asymptotic
    //! performance depends on the type of child tracking class used.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added
    //! \return whether a child was already tracked with the given hash.
    //! \sa find_child
    void add_child(unsigned code, suffix_tree_node *ptr) {
      if (_children &&
	  _children->add_child(code, ptr))
	++_number_children;
    }
    //! add a child to the tracked child list.  if a child is already
    //! tracked with the same hash, overwrite it.  Asymptotic performance
    //! depends on the type of child tracking class used.
    //! @param code hash corresponding to this added child
    //! @param ptr pointer to child being added 
    //! \return whether a child was already tracked with the given hash.
    //! \sa add_child
    //! \sa find_child
    void force_add_child(unsigned code, suffix_tree_node *ptr) {
      if (_children &&
	  _children->force_add_child(code, ptr)) {
	++_number_children;
      }
    }
    //! stop tracking a child with a given hash.  Potentially very costly
    //! with map implementations, should be avoided.
    //! @param code hash corresponding to child to be untracked
    //! \return whether a child was already tracked with the given hash
    //! \sa find_child
    void remove_child(unsigned code) {
      if (_children)
	if (_children->remove_child(code))
	  --_number_children;
    }
    //! Initialize the iteration sequence in the child tracker.
    //! required for iterate_children() to actually iterate across
    //! the entire set of tracked children
    //! \sa iterate_children
    inline void iterate_initialize() {
      if (_children) _children->iterate_initialize();
    }
    //! get the next currently-tracked child (or NULL)
    //! \return pointer to next child tracked, or NULL
    //! \warning NULL corresponds to (stored_data)->end() result,
    //! marking the end of iteration, or no children tracked at all.
    inline suffix_tree_node *iterate_children() {
      if (_children) return _children->iterate_children();
      return NULL;
    }
    //! clear out all existing children, and update child count.
    //! \warning performance depends on type of child tracking class used.
    //! \warning DOES NOT AFFECT end() from base class
    inline void clear_children() {
      if (_children) _children->clear_children();
      _number_children = end() ? 1 : 0;
    }
    //! get the special "end" node tracked by all child_set_types
    //! \return "end" child for this node, or NULL if not present
    //! \warning not covered by iteration!  must be checked separately!
    inline suffix_tree_node *end() const {
      return _children ? _children->end() : NULL;
    }
    //! set the special "end" node tracked by all child_set_types
    //! @param ptr new "end" node for this set of children, or NULL to clear
    inline void end(suffix_tree_node *ptr) {
      if (_children) {
	if (ptr && !end()) ++_number_children;
	if (!ptr && end()) --_number_children;
	if (_children) _children->end(ptr);
      }
    }
    //! clear out the tracked end() node, updating the child count accordingly
    inline void clear_end() {
      if (_children) {
	if (end()) --_number_children;
	_children->clear_end();
      }
    }
    //! is this node an end node for its parent?
    //! \return whether this node is an end node for its parent
    //! \warning this flag is not consistently used/set in the current
    //! implementation
    bool is_end() const {return _is_end;}
    //! set whether this node is an end node for its parent
    //! @param b whether this node should be an end node for its parent
    //! \warning this flag is not consistently used/set in the current
    //! implementation
    void is_end(bool b) {_is_end = b;}
    //! get the number of children (outdegree) of this node.  Note
    //! that this is tracked separately from the child tracking set, since
    //! under the vector implementation this operation would be linear time.
    //! \return the number of children (outdegree) of this node
    unsigned number_of_children() const {return _number_children;}
    //! get the amount of memory used by this class, in bytes
    //! \return five unsigneds (that includes pointers).  Ignore the boolean,
    //! I guess.
    unsigned memory_used() const {
      return 5 * sizeof(unsigned) + (_children ? _children->memory_used() : 0);
    }
  private:
    suffix_tree_node *_parent; //!< pointer to parent node
    std::pair<unsigned, unsigned> _node_label; //!< (start index, latest child)
    child_set_type *_children; //!< pointer to child tracking object
    bool _is_end; //!< flag if this node is an end() node; inconsistently used
    unsigned _number_children; //!< number of children (outdegree) of node
  };
  //! suffix tree with lazy loading
  class suffix_tree {
  public:
    //! constructor: set number of possible child hashes, defaults otherwise
    //! allocates pool of nodes for building the tree
  suffix_tree(unsigned alphabet_size)
    : _alphabet_size(alphabet_size),
      _root(NULL),
      _raw_data(NULL),
      _limited_raw_data(NULL) {allocate(GENEZIP_SIMULTANEOUS_ALLOC_SIZE);}
    //! destructor: deletes all allocated blocks of tree nodes
    ~suffix_tree() throw();
    //! allocate nodes for the tree
    //! @param number_to_allocate number of suffix_tree_nodes to allocate
    //! \warning these nodes are managed by this class
    void allocate(unsigned number_to_allocate);
    //! add a value with an index in the data sequence to the tree
    //! @param code the hash for the node being added
    //! @param index position of this node in the input sequence
    void add_node_to_root(unsigned code, unsigned index);
    //! get an unused node from the allocated pool, or allocate more and then
    //! get one from there
    //! \return an unused node, ready for setting/inclusion in the tree
    suffix_tree_node *next_available_node();
    //! give the tree an input data buffer, in the form of an uncompressed
    //! data buffer
    //! @param buf buffer of data provided to this object
    //! \warning deprecated
    void set_raw_data_buffer(uncompressed_buffer *buf) {_raw_data = buf;}
    //! give the tree an input data buffer, in the form of a cyclic
    //! suffix tree buffer
    //! @param buf buffer of data provided to this object
    void set_raw_data_buffer(suffix_tree_buffer *buf) {
      _limited_raw_data = buf;
    }
    //PRIMARY INTERFACE
    //! execute depth-first search on the tree, returning pointers
    //! to nodes in the order in which they were encountered.
    //! @param target where the pointers should be returned
    //! @param delete_old not currently used
    //! @param delete_old_threshold not currently used
    void dfs(std::queue<suffix_tree_node *> *target, bool delete_old = false,
	     unsigned delete_old_threshold = 1);
    //! run a node consistency check from the root node.
    //! note that this just checks that the node hashes and indices
    //! are consistent with the raw data stream provided.  This is not
    //! a guarantee that the tree is error-free.
    void root_check_consistency() {
      if (_root) _root->scan_children(_raw_data);
    }
    //! add a node to the tree by lazily deleting the oldest node
    //! @param value the hash of the new node to be added
    //! @param index the index in the data stream of the node to be added
    void add_and_delete(unsigned value, unsigned index);
    //! search the tree for a match with a given sequence.  Runs
    //! theoretically in time linear to the length of the best match.
    //! Will update the tree to handle issues created by lazy deletion
    //! along the path of the best search, and nowhere else.
    //! @param start_index index of the beginning of the sequence to be matched
    //! @param search_forward_bound where the search must stop, even
    //! if it breaks a possible match
    //! @param match_start start index of current best match in tree
    //! @param match_length length of current best match in tree
    //! \return whether the search successfully found a match
    bool search(unsigned start_index,
		unsigned search_forward_bound,
		unsigned &match_start,
		unsigned &match_length) {
      match_length = 0;
      return search(_root,
		    start_index,
		    start_index,
		    search_forward_bound,
		    match_start,
		    match_length);
    }
    //! get a pointer to the root of this tree
    //! \return a pointer to the root of this tree
    suffix_tree_node *get_root() const {return _root;}
    //! get the amount of memory used by this class, in bytes
    //! \return memory used by all allocated nodes
    unsigned memory_used() const {
      unsigned res = 0;
      for (std::vector<suffix_tree_node *>::const_iterator iter = 
	     _deleteable.begin(); iter != _deleteable.end(); ++iter) {
	for (unsigned i = 0; i < GENEZIP_SIMULTANEOUS_ALLOC_SIZE; ++i) {
	  res += (*iter)->memory_used();
	}
      }
      return res;
    }
  private:
    //! default constructor: not permitted
  suffix_tree()
    : _alphabet_size(0) {}
    //! add a provided node to the root (or down one level)
    //! @param ptr node to be added to the root
    //! @param value literal corresponding to added node
    //! @param index position in sequence of added node
    void add_node_to_root(suffix_tree_node *ptr,
			  unsigned          value,
			  unsigned          index);
    //! reclaim memory from node and all children
    //! @param ptr node to be reclaimed
    //! @param autopush whether node should be readded to available node queue
    void clearout(suffix_tree_node *ptr, bool autopush = false);
    //! run dfs on node from a given tree root
    //! @param root root of tree to be searched
    //! @param target queue in which found nodes should be reported
    //! @param level flag indicating level of current search; currently ignored
    //! @param delete_old whether "too old" nodes should be deleted (based 
    //!        on index in source sequence)
    //! @param delete_old_threshold cutoff in sequence index for old vs new
    void dfs(suffix_tree_node *root,
	     std::queue<suffix_tree_node *> *target,
	     unsigned level,
	     bool delete_old = false,
	     unsigned delete_old_threshold = 1);
    //! run a search on a suffix tree
    //! @param node start node for search
    //! @param start_index start position in input sequence for search
    //! @param adj_start_index adjusted start position in input sequence
    //!        for recursive search
    //! @param search_forward_bound sequence index where search must end
    //! @param match_start if match found, reported start 
    //!        position of found match
    //! @param match_length if match found, reported length of found match
    bool search(suffix_tree_node *node,
		unsigned start_index,
		unsigned adj_start_index,
		unsigned search_forward_bound,
		unsigned &match_start,
		unsigned &match_length);
    //! remove next queued leaf from tree
    //! \return pointer to removed leaf
    suffix_tree_node *delete_leaf();
    //! remove a node from the tree
    //! @param leaf node to remove from tree
    //! \return pointer to node that was actually removed
    suffix_tree_node *delete_node(suffix_tree_node *leaf);
    //! merge a node into a possibly broken tree
    //! @param parent desired parent of node
    //! @param child_possibly_exists existing node in insertion slot
    //! @param insertion node to be inserted into tree
    void safely_merge_nodes(suffix_tree_node *parent,
			    suffix_tree_node *child_possibly_exists,
			    suffix_tree_node *insertion);
    //! repair a tree structure at a given position
    //! @param parent parent node to be repaired
    //! @param child child node at position of interest
    //! @param end bad end node, which should actually be at child's position
    void safely_repair_tree_structure(suffix_tree_node *parent,
				      suffix_tree_node *child,
				      suffix_tree_node *end);
    //! deal with an aberrant end node in a tree
    //! @param parent parent node containing aberrant end node
    //! @param search_bound current end of suffix tree sequence
    void handle_end_at_site(suffix_tree_node *parent,
			    unsigned search_bound);
    unsigned _alphabet_size; //!< number of literals in alphabet size
    suffix_tree_node *_root; //!< pointer to root of tree
    std::queue<suffix_tree_node *> _available; //!< queue of free nodes
    std::queue<suffix_tree_node *> _leaves; //!< queue of leaves on tree
    std::vector<suffix_tree_node *> _deleteable; //!< queue of allocated nodes
    //! get the value at a given index in the source sequence
    //! @param pos index of desired value in source sequence
    //! \return value at requested position
    inline unsigned at_in_data(unsigned pos) {
      if (_raw_data) return _raw_data->at(pos);
      if (_limited_raw_data) return _limited_raw_data->at(pos);
      throw std::domain_error("suffix_tree::at_in_data: no "
			      "data buffer provided");
    }
    //! determine the size of the suffix tree source sequence
    //! \return size of suffix tree source sequence
    inline unsigned size_of_data() {
      if (_raw_data) return _raw_data->size();
      if (_limited_raw_data) return _limited_raw_data->size();
      throw std::domain_error("suffix_tree::size_of_data: no "
			      "data buffer provided");
    }
    uncompressed_buffer *_raw_data; //!< pointer to raw source data DEPRECATED
    suffix_tree_buffer *_limited_raw_data; //!<circular buffer to raw data
  };
}

#endif //__SUFFIX_TREE_H__
