/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 * 
 */

// 	$Id$	

#ifndef _mira_read_h_
#define _mira_read_h_


#include <assert.h>

#include <ctype.h>
#include <math.h>

#include <string>

#include "stdinc/stlincludes.H"

#include "stdinc/defines.H"
#include "errorhandling/errorhandling.H"

#include "io/exp.H"
#include "io/scf.H"
#include "util/memusage.H"
#include "util/misc.H"
#include "util/dptools.H"
#include "mira/enums.H"
#include "mira/stringcontainer.H"
#include "mira/multitag.H"



#define BQ_UNDEFINED 255

class Read
{

public:
  enum read_output_type {AS_TEXT=0, AS_TEXTSHORT, AS_TEXTCLIPS, AS_CAF, AS_BAF, AS_MAF, AS_FASTA, AS_FASTQ, AS_CLIPPEDFASTA, AS_MASKEDMASKFASTA, AS_SEQVECMASKEDFASTA, AS_FASTAQUAL, AS_CLIPPEDFASTAQUAL, AS_MASKEDMASKFASTAQUAL, AS_SEQVECMASKEDFASTAQUAL, AS_ACE, AS_ACE_COMPLEMENT, AS_GAP4DA};

  // Don't forget to also change the same definition in parameters.H!!!
  enum read_sequencing_type {SEQTYPE_SANGER=0, SEQTYPE_454GS20, SEQTYPE_IONTORRENT, SEQTYPE_PACBIO, SEQTYPE_SOLEXA, SEQTYPE_ABISOLID, SEQTYPE_END};

// cludge to get the C(++) preprocessor know the number of sequencing types, adapt
//  according to the enum array above
#define CPP_READ_SEQTYPE_END 6

  enum bhashstat_bits {BHSBITS_NONE=0,
		       BHSBITS_DEFAULT=0,
		       BHSBITS_FREQUENCY=7,
		       BHSBITS_SEENATLOWPOS=8,
		       BHSBITS_CONFIRMED_FWDREV=0x10, 
		       BHSBITS_CONFIRMED_MULTIPLESEQTYPE=0x20,
		       BHSBITS_BITSAREVALID=0x40,
		       BHSBITS_ALL=0x7f};

  /* the three bits for frequency above are used like this:
     0 no info
     1 single hash
     2 below nmc
     3 non-mc
     4 above nmc
     5 mc
     6 crazy


     the routines below rely on bit numbering 76543210 
  */

  /*
    for each position in read, the bposhashstat_t holds a status
    for the hash made of sequence starting at that position and
    ending k bases *downstream!* of hash calculation

    I.e.: pos 20 and 10-mer
    The fwd holds status of hash for seqence from 20-29
    The rev holds status of hash for rev compl. seq from 20 to 11
  */
    
  struct bhashstat_t {
    uint8 flags;

    bhashstat_t(): flags(BHSBITS_DEFAULT) {};

    inline void clear() { flags=BHSBITS_DEFAULT; }
    inline bool isValid() const {
      return flags & BHSBITS_BITSAREVALID;
    }
    inline void setValid() {
      flags=flags|Read::BHSBITS_BITSAREVALID;
    }
    inline uint8 getFrequency() const {
      return static_cast<uint8>(flags & BHSBITS_FREQUENCY);
    }
    inline void setFrequency(uint8 frequency) {
      if(frequency>7) frequency=7;
      flags=static_cast<uint8>((flags&(~BHSBITS_FREQUENCY))|frequency);
    }

    inline void setSeenAtLowPos() {
      flags=flags|Read::BHSBITS_SEENATLOWPOS;
    }
    inline bool hasSeenAtLowPos() const {
      return flags&Read::BHSBITS_SEENATLOWPOS;
    }
    inline void setConfirmedFwdRev() {
      flags=flags|Read::BHSBITS_CONFIRMED_FWDREV;
    }
    inline bool hasConfirmedFwdRev() const {
      return flags&Read::BHSBITS_CONFIRMED_FWDREV;
    }
    inline void setConfirmedMultipleSeqType() {
      flags=flags|Read::BHSBITS_CONFIRMED_MULTIPLESEQTYPE;
    }
    inline bool hasConfirmedMultipleSeqType() const {
      return flags&Read::BHSBITS_CONFIRMED_MULTIPLESEQTYPE;
    }

    friend ostream & operator<<(ostream &ostr, const bhashstat_t & bhs){
      //ostr << hex << static_cast<uint16>(bhs.flags) << dec << ' ';
      if(bhs.isValid()){
	ostr << "valid ";
      }else{
	ostr << "invld ";
      }
      if(bhs.hasSeenAtLowPos()) ostr << "slp ";
      if(bhs.hasConfirmedFwdRev()) ostr << "fr ";
      if(bhs.hasConfirmedMultipleSeqType()) ostr << "mst ";
      ostr << '#' << static_cast<uint16>(bhs.getFrequency()) << ' ';

      return ostr;
    }

  };

  struct bposhashstat_t {
    bhashstat_t fwd;
    bhashstat_t rev;

    bposhashstat_t() {};

    inline bhashstat_t getBHashStat(int8 direction) const {
      if(direction>0) return fwd;
      return rev;
    }

    inline bool isValid(int8 direction) const {
      if(direction>0) return fwd.isValid();
      return rev.isValid();
    }

    inline uint8 getFrequency(int8 direction) const {
      if(direction>0) return fwd.getFrequency();
      return rev.getFrequency();
    }
    inline void setFrequency(int8 direction, uint8 frequency) {
      if(frequency>7) frequency=7;
      if(direction>0) {
	fwd.setFrequency(frequency);
      }else{
	rev.setFrequency(frequency);
      }
    }

    inline void setSeenAtLowPos(int8 direction) {
      if(direction>0) fwd.setSeenAtLowPos();
      rev.setSeenAtLowPos();
    }
    inline bool hasSeenAtLowPos(int8 direction) const {
      if(direction>0) return fwd.hasSeenAtLowPos();
      return rev.hasSeenAtLowPos();
    }
    inline void setConfirmedFwdRev(int8 direction) {
      if(direction>0) fwd.setConfirmedFwdRev();
      rev.setConfirmedFwdRev();
    }
    inline bool hasConfirmedFwdRev(int8 direction) const {
      if(direction>0) return fwd.hasConfirmedFwdRev();
      return rev.hasConfirmedFwdRev();
    }
    inline void setConfirmedMultipleSeqType(int8 direction) {
      if(direction>0) fwd.setConfirmedMultipleSeqType();
      rev.setConfirmedMultipleSeqType();
    }
    inline bool hasConfirmedMultipleSeqType(int8 direction) const {
      if(direction>0) return fwd.hasConfirmedMultipleSeqType();
      return rev.hasConfirmedMultipleSeqType();
    }

    friend ostream & operator<<(ostream &ostr, const bposhashstat_t & bhs){
      ostr << "f: " << bhs.fwd << ' ';
      ostr << "r; " << bhs.rev << ' ';
      return ostr;
    }
  };
  
  
  // Static variables
public:
  static const bposhashstat_t REA_bposhashstat_default;

  static const vector<string> REA_namesofseqtypes;
  static const vector<string> REA_shortnamesofseqtypes;

  // Note: due to possible static initialization fiasco,
  //       the REA_tagentry_* variables are defined/initialised
  //       in "mira/multitag.C" and not in "mira/read.C"
  static const multitag_t::mte_id_t REA_tagentry_idEmpty;

  static const multitag_t::mte_id_t REA_tagentry_idMINF;
  
  static const multitag_t::mte_id_t REA_tagentry_idED_D;
  static const multitag_t::mte_id_t REA_tagentry_idED_C;
  static const multitag_t::mte_id_t REA_tagentry_idED_I;

  static const multitag_t::mte_id_t REA_tagentry_idESDN;
  
  static const multitag_t::mte_id_t REA_tagentry_idSRMr;
  static const multitag_t::mte_id_t REA_tagentry_idCRMr;
  static const multitag_t::mte_id_t REA_tagentry_idWRMr;
  static const multitag_t::mte_id_t REA_tagentry_idSAOr;
  static const multitag_t::mte_id_t REA_tagentry_idSROr;
  static const multitag_t::mte_id_t REA_tagentry_idSIOr;
  static const multitag_t::mte_id_t REA_tagentry_idPSHP;
  static const multitag_t::mte_id_t REA_tagentry_idCJSP;
  static const multitag_t::mte_id_t REA_tagentry_idUNSr;
  static const multitag_t::mte_id_t REA_tagentry_idMNRr;

  static const multitag_t::mte_id_t REA_tagentry_idHAF0;
  static const multitag_t::mte_id_t REA_tagentry_idHAF1;
  static const multitag_t::mte_id_t REA_tagentry_idHAF2;
  static const multitag_t::mte_id_t REA_tagentry_idHAF3;
  static const multitag_t::mte_id_t REA_tagentry_idHAF4;
  static const multitag_t::mte_id_t REA_tagentry_idHAF5;
  static const multitag_t::mte_id_t REA_tagentry_idHAF6;
  static const multitag_t::mte_id_t REA_tagentry_idHAF7;

  static const multitag_t::mte_id_t REA_tagentry_idMFSM;


  static const multitag_t::mte_id_t REA_tagentry_idALUS;
  static const multitag_t::mte_id_t REA_tagentry_idREPT;

  static const multitag_t::mte_id_t REA_tagentry_idSVEC;

  static const multitag_t::mte_id_t REA_tagentry_idFsrc;
  static const multitag_t::mte_id_t REA_tagentry_idFgen;
  static const multitag_t::mte_id_t REA_tagentry_idFCDS;
  static const multitag_t::mte_id_t REA_tagentry_idFexn;
  static const multitag_t::mte_id_t REA_tagentry_idFint;
  static const multitag_t::mte_id_t REA_tagentry_idFpAS;
 
  static const multitag_t::mte_id_t REA_tagentry_idFmRN;
  static const multitag_t::mte_id_t REA_tagentry_idFm_R;
  static const multitag_t::mte_id_t REA_tagentry_idFpRN;
  static const multitag_t::mte_id_t REA_tagentry_idFrRN;
  static const multitag_t::mte_id_t REA_tagentry_idFscR;
  static const multitag_t::mte_id_t REA_tagentry_idFsnR;
  static const multitag_t::mte_id_t REA_tagentry_idFtRN;


  static const multitag_t::mte_co_t REA_tagentry_coEmpty;
  static const multitag_t::mte_co_t REA_tagentry_coUnknown;
  static const multitag_t::mte_co_t REA_tagentry_coSRMr;
  static const multitag_t::mte_co_t REA_tagentry_coCRMr;
  static const multitag_t::mte_co_t REA_tagentry_coWRMr;
  static const multitag_t::mte_co_t REA_tagentry_coSAOr;
  static const multitag_t::mte_co_t REA_tagentry_coSROr;
  static const multitag_t::mte_co_t REA_tagentry_coSIOr;
  static const multitag_t::mte_co_t REA_tagentry_coFpAS;
  static const multitag_t::mte_co_t REA_tagentry_coPSHP;
  static const multitag_t::mte_co_t REA_tagentry_coUNSr;



private:

  // a dummy variable, used in read.C to call the static initialiser
  // before entering main;
  static const bool REA_initialisedstatics;
  static vector<double> REA_bqual2errorrate;


  static StringContainer<uint32> REA_sc_readname;

  // StringContainer for additional info
  static StringContainer<uint8> REA_sc_machine_type;
  static StringContainer<uint8> REA_sc_primer;
  static StringContainer<uint8> REA_sc_strain;
  static StringContainer<uint8> REA_sc_basecaller;
  static StringContainer<uint8> REA_sc_dye;
  static StringContainer<uint8> REA_sc_processstatus;

  static StringContainer<uint16> REA_sc_clonevec_name;
  static StringContainer<uint16> REA_sc_seqvec_name;

  static StringContainer<uint32> REA_sc_asped;

  // string container for pathanmes
  static StringContainer<uint8> REA_sc_scf_pathname;
  static StringContainer<uint8> REA_sc_exp_pathname;

  // library with insert sizes
  // (save memory using an uint8 libid in Read which points
  //  to corresponding entry of this vector
  // downside: only 255 different combinations of mate pairs
  //  possible ... but which assembly project uses even 100???
  struct insizelib_t {
    int32 insize_from;
    int32 insize_to;
  };
  static vector<insizelib_t> REA_insize_staticlib;

  // controls the behaviour of cout << Read
  // this is not thread safe! calling functions must use mutexes
  static uint8 REA_outtype;

  // A static array for fast complement conversion 
  // has been moved to a util/misc.C function
  //static char  REA_complement_base[256];

  static const char  REA_zerostring;

  // Here's the logic to get the name of the read and the filenames
  //  out of the given init-name:

  //  char * REA_givenname;      // 0 terminated

  static uint8  REA_cut_givenname;  /* cut n chars from name to get
					a base name
				     */ 
  static string REA_add_4expfn;  /* add this to the basename to
					  get the real exp filename 
				       */
  static string REA_add_4caffn;  /* add this to the basename to
					  get the real caf filename 
				       */
  static string REA_add_4scffn;  /* add this to the basename to
					  get the real scf filename 
				       */

private:
  string REA_template;

  string REA_scf_filename;   // got from exp or caf file; evtl. constructed from fasta
  //string REA_exp_filename;      // the constructed exp name; 
  //string REA_caf_filename;      // the constructed caf name; 

  // The padded sequences
  //  changes allowed afterwards
  // NOTE: IMPORTANT
  //  insertions/deletions made to one sequence are not made directly in
  //   the other. Instead, the dirty flag of the other will be set and the
  //   other sequence will be recomputed when needed.
  vector<char>           REA_padded_sequence;
  vector<char>           REA_padded_complementsequence; 


  // The qualities of the bases called, for each trace one byte
  //  (as suggested for the staden package)
  // This vector is always padded
  vector<base_quality_t> REA_qualities;
  
  // The adjustment vector contains the relations between the bases
  //  in this class and the ones in the SCF file
  // Inserted bases have -1 as relation.
  vector<int32>          REA_adjustments;

  // new in 2.9.41x4
  // each base can have flags set to it, at the moment 8 suffice
  vector<bposhashstat_t>    REA_bposhashstats;

  // the tags set to the sequence
  vector<multitag_t> REA_tags;


  // left cutoffs: 10 meaning bases[0..9] cut , take [10..]

  int32 REA_ql;   // REA_qual_left_clipoff;  
  int32 REA_sl;	  // REA_svec_left_clipoff;  
  int32 REA_cl;	  // REA_clon_left_clipoff;  
  int32 REA_ml;	  // REA_maskbase_left_clipoff;  

  // right cutoffs: 100 meaning take bases [..99], cut [100..]
  int32 REA_qr;	  // REA_qual_right_clipoff; 
  int32 REA_sr;	  // REA_svec_right_clipoff; 
  int32 REA_cr;   // REA_clon_right_clipoff; 
  int32 REA_mr;   // REA_maskbase_right_clipoff; 

  /* ergo: xL and xR make normal c delimiters like in
     for(int i=xL, i<xR, i++) ... */


  //int32 REA_leftclip;      // rightmost leftclip, clone and mask clip NOT! cared of
  //int32 REA_rightclip;     // leftmost rightclip, clone and mask clip NOT! cared of
  //int32 REA_len_clipped;   // rightclip - leftclip (0 if lower than 0)

  //int32  REA_insize_from;
  //int32  REA_insize_to;

  //int32  REA_stadenid;

  int32  REA_templateid;               // ID of template
  int32  REA_templatepartnerid;        // ID of read in same template


  //string REA_stolen;

  //string REA_name;           // got from exp, caf or fasta file, 
  StringContainer<uint32>::Entry REA_nameentry;
  StringContainer<uint32>::Entry REA_asped;
  StringContainer<uint16>::Entry REA_clonevec_name;
  StringContainer<uint16>::Entry REA_seqvec_name;

  StringContainer<uint8>::Entry REA_machine_type;
  StringContainer<uint8>::Entry REA_primer;
  StringContainer<uint8>::Entry REA_strain;
  StringContainer<uint8>::Entry REA_basecaller;
  StringContainer<uint8>::Entry REA_dye;
  StringContainer<uint8>::Entry REA_processstatus;

  StringContainer<uint8>::Entry REA_scf_pathname;
  StringContainer<uint8>::Entry REA_exp_pathname;


  int8  REA_strainid;

  uint8 REA_insize_libid;

  uint8 REA_seqtype;         // what kind of data generated that read?

  uint8 REA_readnaming_scheme;    // how was the read named by which center?

  char REA_template_end;    /* equivalent TRACE_END from TRACEINFO file
			       or to the ...F or ...R from TIGR naming scheme
			       or to p1cx and q1cx from Sanger scheme

			       stores whether the read is from Forward,
			       or Reverse end of template
			       or unknowN
			       'F', 'R', 'N' (N is default) */

  // true: templates must be in same direction in contig
  // false: one of the reads must be reverse
  // (on >2 reads per template: no meaning atm)
  bool REA_templatebuilddirection;

  // the dirty flags for the padded and padded complement sequence
  bool REA_ps_dirty:1;
  bool REA_pcs_dirty:1;

  bool REA_has_quality:1;
  bool REA_has_basehashstats:1;
  bool REA_has_freqavg:1;
  bool REA_has_freqrept:1;

  // has the SCF been found and do the data seem valid?
  bool REA_scf_available:1;
  bool REA_scf_loadattempted:1;

  bool REA_name_scheme_valid:1;       /* was the name scheme valid, i.e.,
				       is it paired-end? */

  // Last not least, has this read valid data?
  bool REA_has_valid_data:1;

  // is it used?
  bool REA_used_in_assembly:1;

  bool REA_isbackbone:1;
  bool REA_israil:1;
  bool REA_iscoverageequivalentread:1;

  // space saving
  //
  // true by default
  // is set to false by disallowAdjustments() and from then on
  //  the read does not use the REA_adjustments vector anymore
  //  until re-initialised by discard()

  bool REA_uses_adjustments:1;


// Functions

private:
  static bool staticInitialiser();

  void foolCompiler();
  void zeroVars();

  void dumpAsBAF(ostream & ostr);
  void dumpAsCAF(ostream & ostr);
  void dumpAsMAF(ostream & ostr);
  void dumpAsFASTQ(ostream & ostr, bool clippedonly, bool maskedseqvec, bool maskedmask);
  void dumpAsFASTA(ostream & ostr, bool clippedonly, bool maskedseqvec, bool maskedmask);
  void dumpAsFASTAQual(ostream & ostr, bool clippedonly, bool maskedseqvec, bool maskedmask);
  void dumpAsACE(ostream & ostr, int32 direction);

  const char * sanityCheck() const;

  void checkQualities();

  void postLoadEXPFASTA();

  //void construct_names(const char * givenname);
  void makeComplement(const vector<char> & source, vector<char> & destination);
  inline void updateClipoffs() const {};
  void updateTagBaseInserted(uint32 position);
  void updateTagBaseDeleted(uint32 position);
  void refreshPaddedSequence();
  void refreshPaddedComplementSequence();
  const char * processEXPTag(const char * ptr);

  string getInternalTemplateName_Sanger();
  string getInternalTemplateName_TIGR();
  string getInternalTemplateName_FR();
  string getInternalTemplateName_Solexa();
  string getInternalTemplateName_StLouis();

  static bool tag_t_comparator(const multitag_t & t1, const multitag_t & t2)
  {
    if(t1.from < t2.from) return true;
    if(t1.from > t2.from) return false;
    if(t1.to < t2.to) return true;
    if(t1.to > t2.to) return false;
    return (t1.getIdentifierStr() < t2.getIdentifierStr());
  };
  
  static double qualityToErrorRate_compute(base_quality_t qual);

  void transferMINFTagsToReadInfo();

  void moderateContainerGrowth();

public:
  Read();
  Read(Read const &other);
  ~Read();

  Read const & operator=(Read const & other);
  friend ostream & operator<<(ostream &ostr, Read const &read);

  size_t estimateMemoryUsage() const;

  // reserve works like STL function for vector etc.
  // reserves memory in data structures so that a read
  //  can have up to 'len' bases without need for internal re-allocation
  void reserve(uint32 len);
  size_t capacity() const;
  size_t getWastage() const;

  void integrityCheck() const;

  static void dumpStringContainerStats(ostream & ostr);
  static void setCoutType(uint8 type);
  void dumpAsGAP4DA(ostream & ostr, string & APline, bool outputTags=true);
  void getCIGARString(string & s) {s.clear();};

  void discard();
  const char * checkRead() const;

  void setFileNamesFromEXPFileName(const string & filename);
  void setFileNamesFromFASTAName(const string & fastaname);
  void setFileNamesFromSCFFileName(const string & filename);
  void setSCFFileName(const string & filename);

  void setDirectories(const string & exp, 
		      const string & scf
		      );
  void setSCFDirectory(const string & scf);

  void setSequenceFromString(const char * sequence);
  inline void setSequenceFromString(const string & sequence) {
    setSequenceFromString(sequence.c_str());
  };

  const vector<char> & getActualSequence() const;
  const vector<char> & getActualComplementSequence() const;

  inline void disallowAdjustments(){
    REA_uses_adjustments=false;
    if(REA_adjustments.capacity()>0){
      nukeSTLContainer(REA_adjustments);
    }
  }
  inline bool usesAdjustments() const {return REA_uses_adjustments;};
  inline const vector<int32> & getAdjustments() const
    {return REA_adjustments;}

  // NOT! 0 terminated. getLenClippedSeq() to get its length;
  const char * getClippedSeqAsChar() const;
  const char * getClippedComplementSeqAsChar() const;
  const char * getSeqAsChar() const;

  vector<char>::const_iterator getSeqIteratorBegin() const;
  vector<char>::const_iterator getSeqIteratorEnd() const;
  vector<char>::const_iterator getComplementSeqIteratorBegin() const;
  vector<char>::const_iterator getComplementSeqIteratorEnd() const;

  vector<char>::const_iterator getClippedSeqIterator() const;
  vector<char>::const_iterator getClippedComplementSeqIterator() const;

  char getBaseInSequence(uint32 pos) const ;
  char getBaseInComplementSequence(uint32 pos) const ;
  base_quality_t getQualityInSequence(uint32 pos) const ;
  base_quality_t getQualityInComplementSequence(uint32 pos) const ;
  char getBaseInClippedSequence(uint32 pos) const;
  char getBaseInClippedComplementSequence(uint32 pos);

  // Every change in the sequence also changes the complement (and
  //  vice versa), but the changes are not made immediately 
  //  (caching mechanism in effect), only when needed.
  // inserts a base in front of the given position
  // deletes a base at the given position
  void insertBaseInSequence(char base,
			    base_quality_t quality,
			    uint32 position,
			    bool extends_clipped_area);
  void deleteBaseFromSequence(uint32 position);
  void insertBaseInComplementSequence(char base,
				      base_quality_t quality,
				      uint32 position,
				      bool extends_clipped_area);
  void deleteBaseFromComplementSequence(uint32 uposition);

  // Every change here is made directly in both vectors (costs almost
  //  no time), _IF_ they're valid. 
  // changes a base at the given position
  void changeBaseInSequence(char base, 
			    base_quality_t quality,
			    uint32 position);
  void changeBaseInComplementSequence(char base,
				      base_quality_t quality,
				      uint32 position);

  void changeAdjustment(uint32 position, int32 newadjustment);

  // The 'Clipped' functions work basically like the ones above,
  //  except the positions has to be given relative to the
  //  clipped sequence
  void insertBaseInClippedSequence(char base,
				   base_quality_t quality,
				   uint32 position,
				   bool extends_clipped_area);
  void deleteBaseFromClippedSequence(uint32 position);
  void insertBaseInClippedComplementSequence(char base,
					     base_quality_t quality,
					     uint32 position,
					     bool extends_clipped_area);
  void deleteBaseFromClippedComplementSequence(uint32 position);

  // Every change here is made directly in both vectors (costs almost
  //  no time), _IF_ they're valid. 
  // changes a base at the given position
  void changeBaseInClippedSequence(char base, 
				   base_quality_t quality,
				   uint32 position);
  void changeBaseInClippedComplementSequence(char base,
					     base_quality_t quality,
					     uint32 position);


  inline const vector<base_quality_t> & getQualities() const
    {return REA_qualities;}
  void setQualities(const vector<base_quality_t> & quals);
  void setQualities(base_quality_t qual);

  inline const vector<bposhashstat_t> & getBPosHashStats() const
    {return REA_bposhashstats;}
  bposhashstat_t getBPosHashStats(uint32 pos) const;
  inline bposhashstat_t getBPosHashStatsInClippedSequence(uint32 pos) const {
    return getBPosHashStats(pos+getLeftClipoff());
  }
  void setBPosHashStats(bposhashstat_t bf, uint32 from, uint32 len);
  inline void setBPosHashStatsInClippedSequence(bposhashstat_t bf, uint32 from, uint32 len) {
    setBPosHashStats(bf,from+getLeftClipoff(),len);
  }

  bool hasTheseBaseHashStatsSet(bposhashstat_t bf, uint32 pos) const;
  inline void clearAllBPosHashStats() { 
    REA_bposhashstats.clear(); 
    REA_bposhashstats.resize(REA_qualities.size(),REA_bposhashstat_default);
    REA_has_basehashstats=false;
  }
  inline void setHasBaseHashStats(bool b) { REA_has_basehashstats=b;}
  inline bool hasBaseHashStats() const {return REA_has_basehashstats;}

  inline void setHasFreqAvg(bool b) { REA_has_freqavg=b;}
  inline bool hasFreqAvg() const {return REA_has_freqavg;}
  inline void setHasFreqRept(bool b) { REA_has_freqrept=b;}
  inline bool hasFreqRept() const {return REA_has_freqrept;}

  void setTags(const vector<tag_t> & tags);
  void setTags(const vector<multitag_t> & tags);

  // TODO:: phase out!!!!
  void addTag(const tag_t & tag, bool onlygoodqual=true);
  void addTag(uint32 from, uint32 to,
	      const string & identifier, 
	      const string & comment);
	      //bool onlygoodqual=true);

  // TODO:: multitag_t
  void addTag(const multitag_t & tag, bool onlygoodqual=true);
  void addTag(uint32 from, uint32 to,
	      const multitag_t::mte_id_t identifier, 
	      const multitag_t::mte_co_t comment);

  void addTag(uint32 from, uint32 to,
	      const multitag_t::mte_id_t identifier, 
	      const string & comment);


  //bool hasTag(const string & identifier, int32 pos=-1) const;
  bool hasTag(const multitag_t::mte_id_t identifier, int32 pos=-1) const;
  bool hasTag(const string & identifier, int32 pos) const;
  static multitag_t::mte_id_t getTagID(const string & identifier);


  //uint32 countTags(const string & identifier, int32 pos=-1) const;
  uint32 countTags(const multitag_t::mte_id_t identifier, int32 pos=-1) const;
  const multitag_t & getTag(uint32 tagnumber) const;
  void sortTags() {sort(REA_tags.begin(),REA_tags.end(),tag_t_comparator);};
  inline uint32 getNumOfTags() const { return static_cast<uint32>(REA_tags.size());};

  // TODO: multitag
  //uint32 deleteTag(const string & identifier);
  uint32 deleteTag(const multitag_t::mte_id_t identifier);
  uint32 deleteTag(const uint32 from, 
		   const uint32 to,
		   const string & identifier);


  //inline void setName(const string & name) {
  //  REA_nameentry=REA_sc_readname.addEntryNoDoubleCheck(name);
  //};
  void setName(const string & name);
  inline const string & getName() const { return REA_sc_readname.getEntry(REA_nameentry);};

  inline const string & getSCFName() const { return REA_scf_filename;};
  //inline const string & getEXPName() const { return REA_exp_filename;};
  //inline const string & getCAFName() const { return REA_caf_filename;};

  //void getEXPName(string & fn) const { fn="";return;};


  void getSCFFullPathName(string & path) const;
  void getEXPFullPathName(string & path) const;

  void loadDataFromEXP(const string & filename);
  void loadDataFromSCF(const string & scfname);
  void checkSCFAndLoadQual(bool justcheck, bool loadfailnoerror=false);

  // "sequence" has to be in fasta format (valid IUPAC codes,* is gap)
  // Concession to Staden: - is treated as N
  void initialiseRead(bool preserve_originals,  // if true, vectors are
		                                // copied, else swaped (destr.)
		      bool iscomplement,  // true, iv seq, qual, adj are compl.
		      bool ispadded,
		      vector<char>           & sequence,
		      vector<base_quality_t> & qualities,
		      vector<int32>          & adjustments,
		      vector<tag_t>          & tags,
		      const string & name,
		      const string & SCFname,
		      int32 ql, int32 qr,        // quality clipping left/right
		      int32 sl, int32 sr,        // sequence vector clipping
		      int32 cl, int32 cr         // clone vector clipping
		      );

  void removeGapsFromRead();
  void fixZeroGapQuals();
  void transformGapsToNs();
  void correctNStretch(const int32 from, const int32 to, int32 changeestim);

  void setInsize(int32 from, int32 to);
  //inline void setStadenID(int32 id) {REA_stadenid=id;};
  inline void setTemplate(const string & s) {REA_template=s;};
  inline void setTemplatePartnerID(int32 id) {REA_templatepartnerid=id;}
  inline void setTemplateID(int32 id) {REA_templateid=id;};
  inline void setStrainID(int8 id) {REA_strainid=id;};

  inline void setClonevecName(const string & s) {REA_clonevec_name=REA_sc_clonevec_name.addEntry(s);};
  inline void setSeqvecName(const string & s) {REA_seqvec_name=REA_sc_seqvec_name.addEntry(s);};
  inline void setStrain(const string & s) {REA_strain=REA_sc_strain.addEntry(s);};
  inline void setBasecaller(const string & s) {REA_basecaller=REA_sc_basecaller.addEntry(s);};
  inline void setAsped(const string & s) {REA_asped=REA_sc_asped.addEntry(s);};
  inline void setDye(const string & s) {REA_dye=REA_sc_dye.addEntry(s);};
  inline void setProcessStatus(const string & s) {REA_processstatus=REA_sc_processstatus.addEntry(s);};
  inline void setMachineType(const string & s) {REA_machine_type=REA_sc_machine_type.addEntry(s);};
  inline void setPrimer(const string & s) {REA_primer=REA_sc_primer.addEntry(s);};

  inline int32 getInsizeFrom() const {return REA_insize_staticlib[REA_insize_libid].insize_from;};
  inline int32 getInsizeTo() const {return REA_insize_staticlib[REA_insize_libid].insize_to;};

  //inline int32 getStadenID() const {return REA_stadenid;};
  inline int32 getTemplatePartnerID() const {return REA_templatepartnerid;};
  inline const string & getTemplate() const { return REA_template;};
  inline const string & getClonevecName() const { return REA_sc_clonevec_name.getEntry(REA_clonevec_name);};
  inline const string & getSeqvecName() const { return REA_sc_seqvec_name.getEntry(REA_seqvec_name);};
  inline const string & getStrain() const { return REA_sc_strain.getEntry(REA_strain);};
  inline const string & getBasecaller() const { return REA_sc_basecaller.getEntry(REA_basecaller);};
  inline const string & getAsped() const { return REA_sc_asped.getEntry(REA_asped);};
  inline const string & getDye() const { return REA_sc_dye.getEntry(REA_dye);};
  inline const string & getProcessStatus() const { return REA_sc_processstatus.getEntry(REA_processstatus);};
  inline const string & getMachineType() const { return REA_sc_machine_type.getEntry(REA_machine_type);};
  inline const string & getPrimer() const { return REA_sc_primer.getEntry(REA_primer);};

  void  setReadNamingScheme(uint8 scheme);
  inline uint8 getReadNamingScheme() const {return REA_readnaming_scheme;};
  string getInternalTemplateName();
  inline int32 getTemplateID() const {return REA_templateid;};
  inline int8 getStrainID() const {return REA_strainid;};
  inline char getTemplateEnd() const {return REA_template_end;}
  void setTemplateEnd(char e);
  inline int8 getTemplateBuildDirection() const {if(REA_templatebuilddirection) return 1; return -1;}
  inline void setTemplateBuildDirection(int8 dir) {
    REA_templatebuilddirection=false;
    if(dir>0) REA_templatebuilddirection=true;
  }

  inline int32 getLQClipoff() const { return REA_ql;}
  inline int32 getRQClipoff() const { return REA_qr;}
  inline int32 getLSClipoff() const { return REA_sl;}
  inline int32 getRSClipoff() const { return REA_sr;}
  inline int32 getLMClipoff() const { return REA_ml;}
  inline int32 getRMClipoff() const { return REA_mr;}

  //inline int32 getLeftClipoff()  const { return REA_leftclip;}
  //inline int32 getRightClipoff() const { return REA_rightclip;}
  //inline uint32 getLenClippedSeq() const {return REA_len_clipped;};

  inline int32 getLeftClipoff()  const { return max(REA_ql, REA_sl);}
  inline int32 getRightClipoff() const { return min(REA_qr, REA_sr);}
  inline uint32 getLenClippedSeq() const {
    if(getLeftClipoff()>getRightClipoff()) return 0;
    return getRightClipoff()-getLeftClipoff();
  };

  uint32 getLenSeq() const;


  int32 getLeftExtend() const;
  int32 getRightExtend() const;

  void setClipoffs(int32 lclip, int32 rclip, bool force);
  void setMinimumLeftClipoff(int32, int32, bool, bool);
  void setMinimumRightClipoff(int32, int32);
  void setLMClipoff(int32 l);
  void setRMClipoff(int32 r);
  void setLSClipoff(int32 l);
  void setRSClipoff(int32 r);
  void setLQClipoff(int32 l);
  void setRQClipoff(int32 r);

  int32 getAdjustmentPosOfReadPos(const uint32 position) const;
  int32 getLowerNonGapAdjustmentPosOfReadPos(uint32 position) const;
  int32 getUpperNonGapAdjustmentPosOfReadPos(uint32 position) const;
  int32 getReadPosOfAdjustmentPos(const int32 position) const;

  int32 getLowerNonGapPosOfReadPos(uint32 position) const;
  int32 getUpperNonGapPosOfReadPos(uint32 position) const;

  uint32 getLowerBoundPosOfBaseRun(uint32 pos, 
				   char base, 
				   const bool alsotakegap) const;
  uint32 getUpperBoundPosOfBaseRun(uint32 pos, 
				   char base, 
				   const bool alsotakegap) const;
  uint32 getLenOfGapRun(uint32 pos) const;

  bool hasSCFData(bool loadfailnoerror=false);
  inline bool hasValidNameScheme() const {return REA_name_scheme_valid;}
  inline bool hasQuality() const {return REA_has_quality;}
  inline void setQualityFlag(bool v) {REA_has_quality=v;};
  inline void setValidData(bool v) {REA_has_valid_data=v;}
  inline bool hasValidData() const {return REA_has_valid_data;}
  inline bool isUsedInAssembly() const {return REA_used_in_assembly;}
  inline void setUsedInAssembly(bool used) {REA_used_in_assembly=used;}
  inline bool isBackbone() const {return REA_isbackbone;}
  inline void setBackbone(bool bb) {REA_isbackbone=bb;}
  inline bool isRail() const {return REA_israil;}
  inline void setRail(bool rail) {REA_israil=rail;}
  inline bool isCoverageEquivalentRead() const {return REA_iscoverageequivalentread;}
  inline void setCoverageEquivalentRead(bool v) {REA_iscoverageequivalentread=v;}

  static void trashReadNameContainer() {REA_sc_readname.trash();}
  static inline uint8 getNumSequencingTypes() {return SEQTYPE_END;}
  static const string & getNameOfSequencingType(uint32 st);
  static const string & getShortNameOfSequencingType(uint32 st);
  static uint8 stringToSeqType(const string & value);
  void setSequencingType(uint8 t);
  inline void setSequencingType(const string & t) {setSequencingType(stringToSeqType(t));};
  inline uint8 getSequencingType() const {return REA_seqtype;}
  inline bool isSequencingType(uint8 t) const {return REA_seqtype==t;}


  base_quality_t queryAverageQualInSequence(int32 posl, int32 posr, bool skipNs, bool skipStars);
  base_quality_t queryAverageQualInComplementSequence(int32 posl, int32 posr, bool skipNs, bool skipStars);
  base_quality_t queryAverageQualInClippedSequence(int32 posl, int32 posr, bool skipNs, bool skipStars);
  base_quality_t queryAverageQualInClippedComplementSequence(int32 posl, int32 posr, bool skipNs, bool skipStars);

  int32 calcComplPos(int32 pos) const;
  int32 calcClippedPos2RawPos(int32 pos) const;

  int32 calcRawPos2ClippedPos(int32 pos) const;

  int32 calcClippedComplPos2RawPos(int32 pos) const;

  inline static double qualityToErrorRate(base_quality_t qual) {
    if(qual>100) qual=100;
    return REA_bqual2errorrate[qual];
  };

  static base_quality_t errorRateToQuality(double errProb);

  void performQualityClip(uint32 minqual, uint32 winlen);
  void performHardTrim();
  void setClipoffsToMaskedChars(int32 gapsize, int32 frontgs=-1, int32 endgs=-1, bool allowN=false);
  void deleteWeakestBaseInRun(const char base, const uint32 position, const bool insertgap);
  bool getPosOfWeakestBaseInRun(char base, uint32 & position);

  void transferSVTagsToClip(int32 tolerancelen, int32 clipsearchlen) ;

  void exchangeNsWithGaps();
  void blindSeqData(char newbase);
};


// for Read
inline void emuoc_statsOnType(const Read & c, size_t & total_size, size_t & total_freecap, size_t & total_lostba, size_t & num_size, size_t & bytes_size, size_t & overhead, size_t & lostbyalign, bool & iscontainer)
{
  CEBUG("emuoc_statsOnType(Read): " << typeid(c).name() << endl);

  (void) c;
  (void) total_freecap;
  (void) total_lostba;

  total_size++;
  num_size=1;
  overhead=sizeof(c);
  bytes_size=overhead;//continue
  bytes_size+=c.estimateMemoryUsage();

  iscontainer=false;

  lostbyalign=0;
  if(bytes_size%sizeof(void *)){
    lostbyalign=sizeof(void *)-bytes_size%sizeof(void *);
  }
  CEBUG("Lost by align: " <<lostbyalign << endl);

  return;
}


#endif
