/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 * 
 */

// 	$Id$	

#ifndef _assembly_h_
#define _assembly_h_

#include "mira/assembly_info.H"
#include "mira/assembly_output.H"



#include "stdinc/defines.H"
#include "stdinc/stlincludes.H"

#include "mira/parameters.H"
#include "mira/readpool.H"
#include "mira/skim.H"




//#include "mira/dna_sand32.H"
//#include "mira/blocksearch.H"



#ifdef MIRA_HAS_EDIT
#include "EdIt/hypothesen.H"
#include "examine/scf_look.H"
#else
typedef bool EDITParameters;
#endif



enum { ASREADPOOLOK=1, ASMATCHTABLEOK, ASADSLISTOK, ASCONTIGSOK, 
       ASUSEDIDSOK, ASVECTORSCLIPPED,
       ASNUMOFSTEPS};



struct newedges_t{
  int32 rid1;                   // number of read in seqpool
  int32 linked_with;                   // number of read in seqpool
  uint32 best_weight;                   // score ratio^2*score. Make sure it is >0 !!! (for pathfinder)
  uint32 adsfindex;              // index of adsfacts elements this refers to
  int16 direction;

  bool pf_allowquickoverlap:1;   // if AS_allowquickoverlap for bot rids is true

  bool pf_banned:1;              // temp use by pathfinder: banned overlap

  bool ol_stronggood:1;  // frequency: 2*bph-1 pos at 3, thereof bph-1 contiguous
  bool ol_weakgood:1;   // frequency: bph-1 positions contiguous at 3
  bool ol_belowavgfreq:1; // frequency: bph-1 positions contiguous at <=3
  bool ol_norept:1;      // nothing >3 (but can contain 1 (single hashes == errors)
  bool ol_rept:1;      // bph-1 positions >=5

  friend ostream & operator<<(ostream &ostr, const newedges_t & e){
    ostr << "NE:\t" << e.rid1
	 << '\t' << e.linked_with
	 << '\t' << e.best_weight
	 << '\t' << e.adsfindex
	 << "\tdir " << e.direction
	 << "\tban " << e.pf_banned
	 << "\tsg  " << e.ol_stronggood
	 << "\twg  " << e.ol_weakgood
	 << "\tbaf " << e.ol_belowavgfreq
	 << "\tnrp " << e.ol_norept
	 << "\trep " << e.ol_rept
	 << '\n';
    return ostr;
  }
};


struct skimedges_t{
  int32  rid1;                   // number of read in seqpool
  int32  linked_with;                   // number of read in seqpool
  int32  eoffset;

  uint32 skimweight;                   // score ratio^2*score
  size_t skimindex;         // index of line in original skim file

  int8   rid1dir;  // new for reduceSkimHits2
  int8   rid2dir;
  uint8  scoreratio;

  bool ol_stronggood:1;  // frequency: 2*bph-1 pos at 3, thereof bph-1 contiguous
  bool ol_weakgood:1;   // frequency: bph-1 positions contiguous at 3
  bool ol_belowavgfreq:1; // frequency: bph-1 positions contiguous at <=3
  bool ol_norept:1;      // nothing >3 (but can contain 1 (single hashes == errors)
  bool ol_rept:1;      // bph-1 positions >= 5 freq

  friend ostream & operator<<(ostream &ostr, const skimedges_t & e){
    ostr << "SE:\t" << e.rid1
	 << '\t' << e.linked_with
	 << '\t' << e.eoffset
	 << '\t' << e.skimweight
	 << '\t' << e.skimindex
	 << '\t' << static_cast<int16>(e.rid1dir)
	 << '\t' << static_cast<int16>(e.rid2dir)
	 << '\t' << static_cast<uint16>(e.scoreratio)
	 << '\t' << e.ol_stronggood
	 << '\t' << e.ol_weakgood
	 << '\t' << e.ol_belowavgfreq
	 << '\t' << e.ol_norept
	 << '\t' << e.ol_rept
	 << '\n';
    return ostr;
  }
};


#include "mira/newpathfinder.H"


class Assembly
{
private:

  struct usedtmpfiles_t {
    string basename;
    string filename;

    size_t estimateMemoryUsage() const
      {
	size_t components=0;
	size_t cnum,cbytes,freecap,clba;
  
	components+=estimateMemoryUsageOfContainer(basename,false,cnum,cbytes,freecap,clba);
	components+=estimateMemoryUsageOfContainer(filename,false,cnum,cbytes,freecap,clba);
	return components;
      }
  };


  AssemblyInfo AS_assemblyinfo;

  list<Contig>   AS_contigs;

  // backbonecontigs
  list<Contig>   AS_bbcontigs;

  void (*AS_contigbuilt_callbackfunc)(Contig &, const ReadPool &);


  /***********************************************************************************
   *
   * important for checkpointing
   *
   ***********************************************************************************/

  /** Needs saving *******************************************************************/

  // this holds the reads
  ReadPool       AS_readpool;    // via MAF

  // which read pairs are banned from overlap
  bannedoverlappairs_t AS_permanent_overlap_bans;  // no load/save done yet

  // no dedicated save, but reconstruct from command line save?
  vector<MIRAParameters> AS_miraparams;

  vector<uint32> AS_maxcoveragereached; /* the max coverage each read has
					    attained throughout the whole
					    assembly.  Helps to refine the
					    definition of multicopies.
					 */


  /** May need saving ****************************************************************/
  // this one is slightly different to readpool.size():
  //  backbone are counted, but railreads not
  uint32     AS_num_reads_in_ass;
  uint32     AS_num_reads_valid;
  uint32     AS_num_reads_too_small;


  /***********************************************************************************/
  /***********************************************************************************/

  // these hold the filenames of the "full" skim searches
  string AS_posfmatch_full_filename;
  string AS_poscmatch_full_filename;

  // these hold the filenames of the working set of skim searches
  // these might be a full or a reduced set
  string AS_posfmatch_filename;
  string AS_poscmatch_filename;


  vector<skimedges_t> AS_skim_edges;  // block with x elements
  vector<bool> AS_skimstaken;         // size of total skims in file
  
  vector<bool>   AS_readmaytakeskim;     // size of readpool
  vector<uint32> AS_numskimoverlaps;     // size of readpool
  vector<uint32> AS_numleftextendskims;  // size of readpool
  vector<uint32> AS_numrightextendskims; // size of readpool

  // minimum score ratio hits must have to be taken
  vector<uint8> AS_skimleftextendratio; // size of readpool
  vector<uint8> AS_skimrightextendratio; // size of readpool



  ofstream AS_CUMADSLofstream;
  vector<AlignedDualSeqFacts> AS_adsfacts;
  vector<newedges_t> AS_confirmed_edges;

  // TODO
  //vector<bool> AS_allowquickoverlap;

  uint32 AS_numADSFacts_fromalignments;
  uint32 AS_numADSFacts_fromshreds;

// temporarily used
// emptied at each Skim pass, set when cutBackPossibleChimeras() called
//  queried in ...
  vector<bool> AS_chimeracutflag;
  bool AS_doneskimchimera;

  // Skim will say how many hits it has written to disk per read id
  //  used for reducing skim hits afterwards
  vector<uint32> AS_writtenskimhitsperid;

  // Skim will also give back info what the best overlap criterion levels
  //  are for left and right overlaps
  // also used to reduce skim hits by getting rid of unnecessary matches
  vector<uint8> AS_overlapcritlevell;
  vector<uint8> AS_overlapcritlevelr;

  // if a read is well connected, i.e. has long overlaps (>50% of read)
  // currently initialised with false
  // set to true if AS_overlapcritlevell & -r both < 255
  vector<bool> AS_wellconnected;


  /* temporary use for vector clipping
     holds space for clipped! sequence, more is not needed */
  vector<vector<uint32> > AS_readhitmiss;
  vector<vector<uint32> > AS_readhmcovered;
  vector<uint32>          AS_count_rhm;


  vector<int32>           AS_clipleft;       // holds result of clipping
  vector<int32>           AS_clipright;      //  until can be applied


  vector<int8>   AS_used_ids;         /* IDs which have been already
					  put into a contig are marked 
					  with 1
					 IDs wich could not have been loaded
					  or are too small are marked -1
					 IDs not used yet are marked 0 */

  vector<uint8>  AS_multicopies;   /* reads with more overlaps than 
				      expected on average have 1 here
				      pathfinder will start building
				      elsewhere, and include those
				      last
				   */

  vector<uint8>  AS_hasmcoverlaps;  /* reads that overlap with a read
				       that is categorised as multi-
				       copy get 1 here
				       initialised by pathfinder if
				       vector is empty (==once
				       every pass of MIRA)
				    */



  vector<uint32> AS_coverageperseqtype; /* This stores the median of all
					   average coverages envountered in
					   contigs of the last round (per
					   sequencing type) Used for contig
					   bulding in next round. Also used to
					   analyse contig coverage and refine
					   AS_multicopies
					*/
  

  // has info whether a read is a troublemaker
  vector<uint8>  AS_istroublemaker;

  // has info whether a read is not in a contig
  //  (per pass)
  vector<uint8>  AS_isdebris;

  // has info whether a read once was not in contig
  //  (throughout all assembly) and hence all overlaps should be
  //  taken to ensure it has all possibilities to get back into a
  //  contig (no skim edge reduction)
  vector<bool>   AS_needalloverlaps;

  // this vector is for the repeat resolver after the building
  //  of contigs. if true, all overlaps with the read in question will 
  //  be computed
  // it's only temporarily filled
  vector<bool>   AS_readsforrepeatresolve;

  // the following three vectors are for computing troublemakers
  // they are only temporarily filled
  vector<uint32> AS_allrmbsok;
  vector<uint32> AS_probablermbsnotok;
  vector<uint32> AS_weakrmbsnotok; /* three vectors together define a ratio
				      with which a given read has problems
				      with weak rmb mismatches
				      if notok > ok, then the weights
				      of a read in the overlap graph will be
				      reduced */

  vector<int8>   AS_steps;    // 0 not done, 1 done, -1 need reeval

  
  vector<bool>   AS_seqtypespresent;

  list<usedtmpfiles_t> AS_usedtmpfiles;

  // saved data for deferred save in GFF3 format
  // TODO: not functional yet
  list<string> AS_gff3defer_names;           // contig names
  list<string> AS_gff3defer_paddedcons;      // padded contig cons
  list<string> AS_gff3defer_unpadded_cons;   // unpadded contig cons


  int64 AS_systemmemory; // initialised by constructor: how much mem the system has
                          // is 0 if info not available


  bool AS_454dosimpleedit;
  bool AS_needsskimfornastyrepeats;

  // 
  bool AS_deleteoldresultfiles;

  // if true, then tries to resume an assembly from checkpoint files
  bool AS_resumeasembly;

  bool AS_shouldrun_nfs_check;

  bool AS_logflag_dumprailreads;
  bool AS_logflag_dumphashanalysis;
  bool AS_logflag_oclevel;
  bool AS_logflag_swbbcheck;
  bool AS_logflag_adsdump;
  bool AS_logflag_dumpusedids;

  //Functions

/*************************************************************************
 *
 *  assembly_dataprocessing
 *
 *************************************************************************/
private:
  void mergeSSAHA2SMALTVecScreenData(const string & ssahafile,
				     bool issmalt,
				     const string & tmpname,
				     const string & tmpprefix);
  void mergeTemplateInfo(const string & tifile,
			 const string & logname,
			 const string & logprefix);
  void performHashAnalysis(int32 version, 
			   const string prefix="", 
			   const string postfix="", 
			   const string tmpname="");
  uint64 performNewProposedCutbackClips(const string & tmpname,
					const string & tmpprefix);
  static void buntifyHelper(uint8 allowedfreq, 
			    uint8 basesperhash,
			    vector<Read::bposhashstat_t>::const_iterator bhsI,
			    vector<Read::bposhashstat_t>::const_iterator bhsE,
			    vector<uint8>::iterator tfI,
			    vector<uint8>::iterator tfE);
  void cutBackPossibleChimeras(const string & tmpname,
			       const string & tmpprefix,
			       const vector<int32> & chuntleftcut,
			       const vector<int32> & chuntrightcut,
			       vector<bool> & chimeracutflag);
  void clipBadSolexaEnds(const string & tmpname,
			 const string & tmpprefix);
  void performLowerCaseClipping(const string & tmpname, 
				const string & tmpprefix);
  void performQualAndMaskClips(const string & tmpname,
			       const string & tmpprefix);
  void performPool_MinimumQualityThreshold(const string & tmpname,
					   const string & tmpprefix);
  bool performRead_MinimumQualityThreshold(Read & actread,
					   base_quality_t minqual,
					   uint32 minnum);
  void performPool_AdaptorRightClip(const string & tmpname,
				    const string & tmpprefix,
				    const uint8 seqtype);
  void performMinimumLeftClips(bool onsltag, bool onmaskchar,
			       const string & tmpname,
			       const string & tmpprefix);
  void performMinimumRightClips(const string & tmpname,
				const string & tmpprefix);
  void performBadSequenceSearch(const string & tmpname,
				const string & tmpprefix);
  void correctContigs();               // TODO: have a look whether here?
  void calcPossibleSeqVectorClipoffs(int32 version=-1, 
				     const string prefix="", 
				     const string postfix="", 
				     const string tmpname="");
  void performSeqVectorClippings();
  void performSRMRightClippings();

  void extendADS(int32 version=-1, 
		 const string prefix="", 
		 const string postfix="", 
		 const string tmpname="");

  void clipPolyATAtEnds(const string & tmpname,
			const string & tmpprefix);
  void clipPolyBaseAtEnd(const string & tmpname, 
			 const string & tmpprefix);
  bool searchPolyBaseFrom5Prime(Read & actread,
				const char polybase,
				int32 & lpolystart, 
				int32 & rpolyend, 
				const uint32 mincount, 
				const uint32 maxbad,
				int32 grace);
  bool searchPolyBaseFrom3Prime(Read & actread,
				const char polybase,
				int32 & lpolystart, 
				int32 & rpolyend, 
				const uint32 mincount, 
				const uint32 maxbad,
				int32 grace);
  void shredReadsIntoReadPool(ReadPool & sourcepool,
			      uint32 shredlen, 
			      uint32 shredoffset,
			      uint8 shredreadtype,
			      const string & shredstrain);
  void analyseOverlapHashProfile(vector<uint8> & profile,
				 vector<skimedges_t>::const_iterator seI,
				 ADSEstimator & adse);
  //void clipTo200();
  //void performHashEditing();

public:
  static void buntifyReadsByHashFreq(uint8 basesperhash,
				     ReadPool & rp);
 

/*************************************************************************
 *
 *  estassembly
 *
 *************************************************************************/
public:
  vector<string> assembleESTs();


/*************************************************************************
 *
 *  assembly_reduceskimhits
 *
 *************************************************************************/
private:
  void reduceSkimHits4(int32 version=-1,
		       const string prefix="", 
		       const string postfix="", 
		       const string tmpname="");
  void rsh4_countTotalSkimsTaken();
  void rsh4_denormaliseSkimHits(const string & dnsfile,
				list<int64> & idblocks, 
				vector<uint64> & blockpos,
				vector<size_t> & blocklen);
  size_t rsh4_getNextNormalisedSkimBlock(list<int64> & idblocks, 
					 int64 & blockstartid, 
					 int64 & blockendid);
  size_t rsh4_loadNormalisedSkimHitBlock(const string & filename, 
					 size_t skimindex, 
					 int64 blockstartid, int64 blockendid,
					 int8 rid1dir, int8 rid2dir);
  void rsh4_filterSkimHits(const string & oldfilename, 
			   const string & newfilename,
			   size_t & skimindex);
  void rsh4_takeThisSkim(vector<skimedges_t>::const_iterator seI,
			 ADSEstimator & adse,
			 bool calcadse);
  void rsh4_getNextSkimBlock(const string & dnsfile, 
			     uint32 blocki,
			     const vector<uint64> & blockpos,
			     const vector<size_t> & blocklen);
  void rsh4_purgeSkimsOfReadsCutByChimera(string & filename);
  void rsh4_takeRailHits(const string & dnsfile, 
			       const vector<uint64> & blockpos,
			       const vector<size_t> & blocklen);
  void rsh4_flagMulticopyReads(const string & dnsfile, 
			       const vector<uint64> & blockpos,
			       const vector<size_t> & blocklen);
  void rsh4_weedOutBadHits(const string & dnsfile, 
			   const vector<uint64> & blockpos,
			   const vector<size_t> & blocklen);
  void rsh4_take100PCMappingHits(const string & dnsfile, 
				 const vector<uint64> & blockpos, 
				 const vector<size_t> & blocklen);
  void rsh4_takeNonReptHitsThatExtend(uint32 nbest,
				      uint8 minscoreratio,
				      const string & dnsfile,
				      const vector<uint64> & blockpos,
				      const vector<size_t> & blocklen);
  void rsh4_takeReptPEPEHitsThatExtend(uint32 nbest, 
				       uint8 minscoreratio,
				      const string & dnsfile,
				      const vector<uint64> & blockpos,
				      const vector<size_t> & blocklen);
  void rsh4_takeReptNPENPEHitsThatExtend(uint32 nbest, 
					 uint8 minscoreratio,
				      const string & dnsfile,
				      const vector<uint64> & blockpos,
				      const vector<size_t> & blocklen);
  void rsh4_takeReptPENPEHitsThatExtend(uint32 nbest, 
					uint8 minscoreratio,
				      const string & dnsfile,
				      const vector<uint64> & blockpos,
				      const vector<size_t> & blocklen);
  void rsh4_takeNonTakenReadsWithHitsThatExtend(uint32 nbest, 
						uint8 minscoreratio,
						const string & dnsfile,
						const vector<uint64> & blockpos,
						const vector<size_t> & blocklen);
  void rsh4_takeNonTakenSideExtends(uint32 nbest, 
				    uint8 minscoreratio,
				    const string & dnsfile,
				    const vector<uint64> & blockpos,
				    const vector<size_t> & blocklen);
  void rsh4_takeNonTakenReadsWithHits(uint32 nbest, 
				      const string & dnsfile,
				      const vector<uint64> & blockpos,
				      const vector<size_t> & blocklen);
  size_t rsh4_tNTSEhelper(uint32 nbest,
			  ADSEstimator & adse,
			  vector<vector<skimedges_t>::const_iterator> & sev);
  void rsh4_takeAll(const string & dnsfile,
		    const vector<uint64> & blockpos,
		    const vector<size_t> & blocklen);
  void rsh4_takeNeedAllOverlaps_weakgood(const string & dnsfile,
					 const vector<uint64> & blockpos,
					 const vector<size_t> & blocklen);
  void rsh4_takeSolexaByCritLevel(uint32 nbest,
				  const string & dnsfile,
				  const vector<uint64> & blockpos,
				  const vector<size_t> & blocklen);
  void rsh4_takeTemplateOverlaps(const string & dnsfile,
				 const vector<uint64> & blockpos,
				 const vector<size_t> & blocklen);


/*************************************************************************
 *
 *  assembly_io
 *
 *************************************************************************/
private:
  void loadSequenceData_resume();
  void loadSequenceData_new();
  void checkForReadNameLength(uint32 stoplength);
  void loadExternalSCFQualities();

  size_t loadFOFNEXP(const string & fofn,
		     const uint8 seqtype,
		     const uint8 loadaction,
		     uint32 & longestread);
  size_t loadFASTA(const string & fastafile, 
		   const string & fastaqualfile,
		   const uint8 seqtype,
		   const uint8 loadaction,
		   uint32 & longestread);
  size_t loadFASTQ(const string & fastqfile, 
		   const uint8 seqtype,
		   const uint8 loadaction,
		   uint32 & longestread);
  size_t loadPHD(const string & phdfile,
		 const uint8 loadaction,
		 uint32 & longestread);
  size_t loadCAF(const string& caffile,
		 const uint8 seqtype,
		 const uint8 loadaction,
		 vector<uint32> & lrperseqtype);
  size_t loadMAF(const string& caffile,
		 const uint8 seqtype,
		 const uint8 loadaction,
		 vector<uint32> & lrperseqtype);
 
  void loadBackboneMAF(const string & maffile);
  void loadBackboneCAF(const string & caffile);
  void loadBackboneGFF3(const string & gff3file);
  void loadBackboneGBF(const string & gbffile);
  void loadBackboneFASTA(const string & fastafile, 
			 const string & fastaqualfile);
  void postLoadBackbone();
  size_t addRailsToBackbones(const bool simulateonly);

  void loadStrainData(const string & sdfile);

  void postLoad();
  void dumpSomeStatistics();

  void saveExtTmpContig(Contig & con, string basename);
 


public:
  void dumpContigs();
  void loadSequenceData();
  void dumpRailReads(ofstream & fout);

  string buildDefaultCheckpointFileName(const string & filename);
  string buildDefaultInfoFileName(int32 version, 
				  const string & prefix, 
				  const string & postfix, 
				  const string & basename, 
				  const string & defaultname, 
				  const string & defaultextension,
				  bool removeold=false);
  string buildDefaultResultsFileName(int32 version, 
				     const string & prefix, 
				     const string & postfix, 
				     const string & basename, 
				     const string & defaultname, 
				     const string & defaultextension,
				     bool removeold=false);
  string getContigReadListFilename(int32 version=-1, 
				   const string & prefix="", 
				   const string & postfix="", 
				   const string & crlname="");
  void saveContigReadList(int32 version=-1, 
			  const string & prefix="", 
			  const string & postfix="", 
			  const string & crlname="",
			  bool deleteoldfile=false);
  string getStatisticsFilename(int32 version=-1, 
			       const string & prefix="", 
			       const string & postfix="", 
			       const string & statname="");
  void saveStatistics(int32 version=-1, 
		      const string & prefix="", 
		      const string & postfix="", 
		      const string & statname="",
		      bool deleteoldfile=false);
  string getAssemblyInfoFilename(int32 version=-1, 
			       const string & prefix="", 
			       const string & postfix="", 
			       const string & statname="");
  void saveAssemblyInfo(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & statname="",
			bool deleteoldfile=false);
  void saveDebrisList(int32 version=-1, 
		      const string & prefix="", 
		      const string & postfix="", 
		      const string & debrisname="");
  string getReadTagListFilename(int32 version=-1,
				const string & prefix="", 
				const string & postfix="", 
				const string & rtlname="");
  void saveReadTagList(int32 version=-1, 
		       const string & prefix="", 
		       const string & postfix="", 
		       const string & rtlname="",
		       bool deleteoldfile=false);
  string getConsensusTagListFilename(int32 version=-1, 
				     const string & prefix="", 
				     const string & postfix="", 
				     const string & ctlname="");
  void saveConsensusTagList(int32 version=-1, 
			    const string & prefix="", 
			    const string & postfix="", 
			    const string & ctlname="",
			    bool deleteoldfile=false);
  void saveSNPList(int32 version=-1, 
		   const string & prefix="", 
		   const string & postfix="", 
		   const string & saname="",
		   bool deleteoldfile=false);
  void saveFeatureAnalysis(int32 version=-1, 
			   const string & prefix="", 
			   const string & postfix="", 
			   const string & faname="",
			   const string & fsname="",
			   const string & fcname="",
			   bool deleteoldfile=false);
  string getFASTAFilename(int32 version=-1,
			  const string & prefix="", 
			  const string & postfix="", 
			  const string & fastaname="");
  string getFASTAPaddedFilename(int32 version=-1,
				const string & prefix="", 
				const string & postfix="", 
				const string & fastaname="");
  void saveAsFASTA(int32 version=-1, 
		   const string & prefix="", 
		   const string & postfix="", 
		   const string & fastaname="",
		   bool deleteoldfile=false);
  void saveStrainsAsFASTAQUAL(int32 version=-1,
			      const string & prefix="",
			      const string & postfix="",
			      const string & fastaname="",
			      bool deleteoldfile=false);
  string getTCSFilename(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & txtname="");
  void saveAsTCS(int32 version=-1, 
		 const string & prefix="", 
		 const string & postfix="", 
		 const string & tcsname="",
		 bool deleteoldfile=false);
  string getCAFFilename(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & cafname="");
  void saveAsCAF(int32 version=-1, 
		 const string & prefix="", 
		 const string & postfix="", 
		 const string & cafname="",
		 bool deleteoldfile=false);
  string getMAFFilename(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & cafname="");
  void saveAsMAF(int32 version=-1, 
		 const string & prefix="", 
		 const string & postfix="", 
		 const string & cafname="",
		 bool deleteoldfile=false);
  string getTXTFilename(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & txtname="");
  void saveAsTXT(int32 version=-1, 
		 const string & prefix="", 
		 const string & postfix="", 
		 const string & txtname="",
		 bool deleteoldfile=false);
  string getACEFilename(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & txtname="");
  void saveAsACE(int32 version=-1, 
		 const string & prefix="", 
		 const string & postfix="", 
		 const string & acename="",
		 bool deleteoldfile=false);
  string getWiggleFilename(int32 version=-1, 
			   const string & prefix="", 
			   const string & postfix="", 
			   const string & txtname="");
  void saveAsWiggle(int32 version=-1, 
		    const string & prefix="", 
		    const string & postfix="", 
		    const string & acename="",
		    bool deleteoldfile=false);
  string getGAP4DAFilename(int32 version=-1,
			   const string & prefix="", 
			   const string & postfix="", 
			   const string & dirname="");
  void saveAsGAP4DA(int32 version=-1,
		    const string & prefix="", 
		    const string & postfix="", 
		    const string & dirname="",
		    bool deleteoldfile=false);
  string getHTMLFilename(int32 version=-1, 
			const string & prefix="", 
			const string & postfix="", 
			const string & txtname="");
  void saveAsHTML(int32 version=-1, 
		  const string & prefix="", 
		  const string & postfix="", 
		  const string & htmlname="",
		  bool deleteoldfile=false);


/*************************************************************************
 *
 *  assembly_repeatresolve
 *
 *************************************************************************/
private:
  static bool ma_takeMCandOverlapWMC(Assembly & as, int32 rid1, int32 rid2);
  bool buildRepeatContigs(const int32 passnr);
public:
  void prework();


/*************************************************************************
 *
 *  assembly_swalign
 *
 *************************************************************************/
private:
  void setupAlignCache(vector<Align> & aligncache);
  void makeAlignmentsFromPosMatchFile(
    const string & filename,
    const int32 version,
    const int8 direction,
    const bool trans100percent, 
    bool (* checkfunction)(Assembly&,int32,int32),
    ofstream & matchfout, 
    ofstream & rejectfout);

  void computeSWAlign(list<AlignedDualSeq> & madsl,
		      uint32 rid1,
		      uint32 rid2,
		      int32 eoffset,
		      int8 direction,
		      vector<Align> & chkalign);

  static bool ma_takeall(Assembly & as, int32 rid1, int32 rid2);
  static bool ma_needRRFlag(Assembly & as, int32 rid1, int32 rid2);
  static bool ma_needRRFlagAndBothCRMr(Assembly & as, int32 rid1, int32 rid2);
  static bool ma_needSRMrOrTwoCRMr(Assembly & as, int32 rid1, int32 rid2);

  void makeAlignments(bool (* checkfunction)(Assembly&,int32,int32),
		      bool takefullskimfilenames,
		      const bool trans100percent, 
		      int32 version=-1, 
		      const string prefix="", 
		      const string postfix="", 
		      const string tmpname="");
  void loadAlignmentsFromFile(int32 version=-1, 
			      const string prefix="", 
			      const string postfix="", 
			      const string tmpname="");
  uint32 getOverlapMalusDivider(int32 id1, int32 id2);

  void clusterUnassembledReads(vector<int32> & clusteridperread,
			       vector<list<int32> > & readinclusterlist,
			       const vector<int8> & usedids);

  void minimiseMADSL(list<AlignedDualSeq> & madsl);
  void cleanupMADSL(list<AlignedDualSeq> & madsl,
		    uint32 i,
		    uint32 j,
		    int8 direction,
		    bool flag_stronggood,
		    bool flag_weakgood, 
		    bool flag_belowavgfreq, 
		    bool flag_norept,
		    bool flag_rept);

  int32 checkADSForRepeatMismatches(AlignedDualSeq & ads);
  int32 checkADSForRepeatMismatches_wrapped(AlignedDualSeq & ads, bool & need2ndpass);
  void transcribeHits(AlignedDualSeq & anads);

  void recalcNonPerfectSkimMappingsBySW(int32 version=-1);
  void rnpskmbs_helper(const string & filename, const int32 version, const int8 direction);


/*************************************************************************
 *
 *  assembly_repeatresolve
 *
 *************************************************************************/

private:

  void foolCompiler();
  void init();
  void zeroVars();


  void ensureStandardDirectories(bool purge);

//  void markNastyReapeatsWithSkim(int32 version=-1, 
//			    const string prefix="", 
//			    const string postfix="", 
//			    const string tmpname="");
  void findPossibleOverlaps(int32 version=-1, 
			    const string prefix="", 
			    const string postfix="", 
			    const string tmpname="");
  void flagMulticopyReads(const vector<uint32> & overlapcounter,
			  const string & tmpfilename);



  void performSnapshot(uint32 actpass);

  bool buildFirstContigs(const int32 loopnr, 
			 const EDITParameters & eparams,
			 const bool lastloop);
  uint32 bfc_killIntermediateSinglets();
  void bfc_moveSmallClustersToDebris();
  bool bfc_checkIfContigMeetsRequirements(Contig & con);
  void bfc_markRepReads(Contig & con);
  void bfc_storeContig(Contig & con, 
		       uint32 & numcontigs, 
		       const bool mustmarkrepeats, 
		       const int32 passnr, 
		       const bool lastpass);
  void transferContigReadsToReadpool(const Contig & buildcon,
				     list<Contig::pbdse_t> & pbdsev,
				     int32 passnr);
  void transferContigReadTagsToReadpool(const Contig & con,
					const list<Contig>::const_iterator bbContigI);

  void makeNewReadPoolFromContigs();

  bool checkPossibleMatch(const possible_overlaps_t & poverl, 
			  const uint32 i,
			  const uint32 j) const;

  void search454Overcalls(AlignedDualSeq & anads);
  void dump454OvercallsArrays();
  uint32 perform454OvercallEdits(bool qualonly);

  void huntPossibleContigJoinGrinches(int32 version=-1, 
				      const string prefix="", 
				      const string postfix="", 
				      const string tmpname="");
  void huntSpoilSports(Contig & chkcon);

  bool isReadGrinch(int32 thereadid, 
		    int32 incontigid, 
		    vector<int32> & readstocheck, 
		    vector<int32> & contigidofendreads);
  void transferContigRMBTagsToPermanentOverlapBans(Contig & con);
  void banReadPairGroups(const vector<int32> & g1, const vector<int32> & g2);


  void editSimple454Overcalls(const uint32 pass);
  

  //-----------------------------------------------------------------


  void dmi_dumpALine(ostream & ostr, const char * desc, 
		     size_t numelem, 
		     size_t bytes_size, 
		     size_t freecapacity, 
		     size_t lostbyalign);



  void preassembleTasks(bool usereadextension, bool clipvectorleftovers);

  bool checkTerminationRequest();

public:
  Assembly(vector<MIRAParameters> & params, bool resumeassembly);
  ~Assembly();

  void discard();
  void dumpMemInfo();

  inline void setContigBuiltCallback(void (*cbfunc)(Contig &, const ReadPool &) = NULL){
    AS_contigbuilt_callbackfunc=cbfunc;
  }
  void assemble();
  void saveResults();

  static bool markRepeats(Contig & con, vector<bool> & readsmarkedsrm);

  static void refreshContigAndReadpoolValuesAfterLoading(
    ReadPool & rp,
    list<Contig> & contigs);

  const Read & getRead(uint32 index);
  list<Contig> & getContigs() {return AS_contigs;};
  ReadPool & getReadPool() {return AS_readpool;};

  void checkForNFSMountOnTmpDir();

  uint32 cleanupOldFile(const string & basename, const string & filename);
  string buildFileName(int32 version=-1, 
		       const string & prefix="", 
		       const string & postfix="", 
		       const string & basename="",
		       const string & suffix="",
		       const string & dirname="",
		       bool removeold=true);


  void test();

  void setExtendedLog(bool f) {
    AS_logflag_dumprailreads=f;
    AS_logflag_dumphashanalysis=f;
    AS_logflag_oclevel=f;
    AS_logflag_swbbcheck=f;
    AS_logflag_adsdump=f;
    AS_logflag_dumpusedids=f;
  }

};


#endif



