/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 * 
 */


#ifndef lint
static char vcid[] = "$Id$";
#endif /* lint */


#include "assembly.H"
#include "maf_parse.H"

#include "caf/caf.H"

#include <boost/regex.hpp>

#define CEBUG(bla)
#define CEBUGF(bla)





/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::dumpContigs()
{
  (void) vcid;

  cout << "The assembled project has " << AS_contigs.size() << " objects.\n";

  Contig::setCoutType(Contig::AS_TEXT);
  list<Contig>::iterator I=AS_contigs.begin();
  while(I!=AS_contigs.end()){
    cout << *I++ << "\n";
  }
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush(); }

void Assembly::loadSequenceData()
{
  FUNCSTART("void Assembly::loadSequenceData()");

#ifdef TIMERESTRICTED
  if(AS_timesup) {
    cerr << "\n\nThis version of MIRA is old, please get a newer version of the assembler.\n";
    cerr << "\nCanonical download page: http://www.chevreux.org/mira_downloads.html\n";
    exit(0);
  }
#endif

  discard();

  if(AS_miraparams[0].getAssemblyParams().as_dateoutput) dateStamp(cout);
  cout << "\n";

  if(AS_resumeasembly){
    loadSequenceData_resume();
  }else{
    loadSequenceData_new();
  }
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadSequenceData_resume()
{
  FUNCSTART("void Assembly::loadSequenceData_resume()");

  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();

  vector<uint32> lrperseqtype(Read::SEQTYPE_END,0);
  
  size_t seqsloaded=loadMAF(buildDefaultCheckpointFileName(as_fixparams.as_infile_chkptMAF),
			    Read::SEQTYPE_SANGER,
			    1,                    // load directly
			    lrperseqtype);

  dumpSomeStatistics();

  FUNCEND();
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush(); }
void Assembly::loadSequenceData_new()
{
  FUNCSTART("void Assembly::loadSequenceData_new()");

  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();

  bool foundsomethingtoload=false;
  for(uint32 i=0; i<Read::SEQTYPE_END; i++){
    if(AS_miraparams[i].getAssemblyParams().as_load_sequencedata) foundsomethingtoload=true;
  }
  if(!foundsomethingtoload){
    throw Notify(Notify::FATAL, THISFUNC, "You did not specify any sequence technology as input to be loaded\neither via --job=... or via -LR:lsd.") ;
  }


  CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
  CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);

  if(as_fixparams.as_loadbackbone){
    if(as_fixparams.as_infile_backbone_which=="caf"){
      loadBackboneCAF(as_fixparams.as_infile_backbone_CAF);
    }else if(as_fixparams.as_infile_backbone_which=="fasta"){
      loadBackboneFASTA(as_fixparams.as_infile_backbone_FASTA,
			as_fixparams.as_infile_backbone_FASTAQUAL);
    }else if(as_fixparams.as_infile_backbone_which=="gbf"){
      loadBackboneGBF(as_fixparams.as_infile_backbone_GBF);
    }else if(as_fixparams.as_infile_backbone_which=="maf"){
      loadBackboneMAF(as_fixparams.as_infile_backbone_MAF);
    }else if(as_fixparams.as_infile_backbone_which=="gff3"){
      loadBackboneGFF3(as_fixparams.as_infile_backbone_GFF3);
    }else{
      MIRANOTIFY(Notify::FATAL, "wrong type (" << as_fixparams.as_infile_backbone_which << ") of file to load for backbone, sorry") ;
    }

    CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
    CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);
    postLoadBackbone();
    CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
    CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);


  }


  vector<uint32> lrperseqtype(Read::SEQTYPE_END,0);
  uint32 longestread=0;

  // loadaction:
  //  0 = count only
  //  1 = count, adjust readpool capacity and load
  //  2 = load only
  // (implemented by readpool and CAF loading routines)

  size_t totalseqstoload=0;
  for(uint8 loadaction=0; loadaction < 3; loadaction+=2){
    totalseqstoload=0;
    for(uint32 seqtype=0; seqtype<Read::SEQTYPE_END; seqtype++){
      size_t seqstoload=0;
      uint32 lread=1;
      assembly_parameters const & as_rtparams= AS_miraparams[seqtype].getAssemblyParams();
      //cout << "ST: " << seqtype << endl;
      if(as_rtparams.as_load_sequencedata){
	if(as_rtparams.as_infile_which=="fofnexp"){
	  //if(seqtype!=0){
	  //  MIRANOTIFY(Notify::FATAL, "Cannot load 'fofnexp' with anything else than Sanger at the moment.");
	  //}
	  seqstoload=loadFOFNEXP(as_rtparams.as_infile_FOFNEXP, seqtype, loadaction, lread);
	}else if(as_rtparams.as_infile_which=="fasta"){
	  seqstoload=loadFASTA(as_rtparams.as_infile_FASTA,
				     as_rtparams.as_infile_FASTAQUAL,
				     seqtype,
				     loadaction, 
				lread);
	}else if(as_rtparams.as_infile_which=="fastq"){
	  seqstoload=loadFASTQ(as_rtparams.as_infile_FASTQ,
			       seqtype,
			       loadaction,
			       lread);
	}else if(as_rtparams.as_infile_which=="maf"){
	  MIRANOTIFY(Notify::INTERNAL, "Need to write code for loading MAF.\n");
	}else if(as_rtparams.as_infile_which=="caf"){
	  vector<uint32> lrpst(Read::SEQTYPE_END,0);
	  seqstoload=loadCAF(as_rtparams.as_infile_CAF,
			     seqtype,
			     loadaction,
			     lrpst);
	  lread=0;
	  for(uint32 i=0; i<lrpst.size(); i++){
	    lrperseqtype[i]+=lrpst[i];
	    lread=max(lread,lrpst[i]);
	  }
	}else if(as_rtparams.as_infile_which=="phd"){
	  if(seqtype!=0){
	    MIRANOTIFY(Notify::FATAL, "Cannot load 'phd' with anything else than Sanger at the moment.");
	  }
	  seqstoload=loadPHD(as_rtparams.as_infile_PHD,
			     loadaction,
			     lread);
	}else{
	  MIRANOTIFY(Notify::FATAL, "While trying to load sequence data: type '" << as_rtparams.as_infile_which << "' is unknown (is there a typo?)") ;
	}
	if(loadaction==0) {
	  cout << Read::getNameOfSequencingType(seqtype) << " will load " << seqstoload << " reads.\n";
	}else{
	  cout << "Loaded " << seqstoload << " " << Read::getNameOfSequencingType(seqtype) << " reads.\n";
	}
	totalseqstoload+=seqstoload;

	lrperseqtype[seqtype]=lread;
	longestread=max(longestread,lread);

	CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
	CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);
      }
    }

    if(loadaction<2){
      for(uint32 i=0; i<Read::SEQTYPE_END; i++){
#ifdef HIDEPACBIO
	if(i==Read::SEQTYPE_PACBIO) continue;
#endif
	cout << "Longest " << Read::getNameOfSequencingType(i) << ": " << lrperseqtype[i] << endl;
      }
      cout << "Longest overall: " << longestread << endl;
    }

    if(loadaction==0) {
      cout << "Total reads to load: " << totalseqstoload << endl;
    }else{
      cout << "Total reads loaded: " << totalseqstoload << endl;
    }

    if(totalseqstoload==0){
      MIRANOTIFY(Notify::FATAL, "No input to load or no sequences in input files? Did you provide data to load?");
    }
    if(longestread==0){
      MIRANOTIFY(Notify::FATAL, "No read with sequence length >0 present? Did you provide data to load?");
    }

    if(as_fixparams.as_loadbackbone){
      // if wanted, determine solexahack parameter automatically
      if(AS_miraparams[0].getAlignParams().al_solexahack_maxerrors==0
	 && lrperseqtype[Read::SEQTYPE_SOLEXA]){
	cout << "-AL:shme is 0, automatically determining optimal value.\n"; 
	AS_miraparams[0].getNonConstAlignParams().al_solexahack_maxerrors=
	  lrperseqtype[Read::SEQTYPE_SOLEXA]*15/100;
	cout << "set -AL:shme " 
	     << AS_miraparams[0].getAlignParams().al_solexahack_maxerrors
	     << '\n';
      }
      
      // if wanted, determine rail length and overlap automatically
      if(AS_miraparams[0].getAssemblyParams().as_backbone_raillength == 0){
	cout << "-SB:brl is 0, automatically determining optimal value.\n"; 
	
	// add 15% to longest read (so accomodate insertion), then times 2
	uint32 newraillength=(longestread*115/100) * 2;
	if(newraillength > 18000){
	  cout << "Optimal rail would be longer than 18k, adjusting down to 18k.\n";
	  newraillength=18000;
	}
	AS_miraparams[0].getNonConstAssemblyParams().as_backbone_raillength=newraillength;
	cout << "brl: " 
	     << AS_miraparams[0].getNonConstAssemblyParams().as_backbone_raillength
	     << '\n';
      }
      if(AS_miraparams[0].getAssemblyParams().as_backbone_railoverlap == 0){
	cout << "-SB:bro is 0, automatically determining optimal value.\n"; 
	AS_miraparams[0].getNonConstAssemblyParams().as_backbone_railoverlap=
	  AS_miraparams[0].getNonConstAssemblyParams().as_backbone_raillength/2;
	cout << "bro: " 
	     << AS_miraparams[0].getNonConstAssemblyParams().as_backbone_railoverlap
	     << '\n';
      }
      if(AS_miraparams[0].getAssemblyParams().as_backbone_railoverlap >=
	 AS_miraparams[0].getAssemblyParams().as_backbone_raillength){
	cout << "-SB:bro is >= -SB:brl ... adjusting -SB:bro to (-SB:brl)-1\n";
	AS_miraparams[0].getNonConstAssemblyParams().as_backbone_railoverlap=
	  AS_miraparams[0].getNonConstAssemblyParams().as_backbone_raillength-1;
      }
    }

    uint32 bbrailstoreserve=0;
    if(as_fixparams.as_loadbackbone && loadaction<2){
      CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
      CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);
      CEBUG("Simulating adding reads...\n" << endl);
      bbrailstoreserve=addRailsToBackbones(true);
      CEBUG("bbrailstoreserve: " << bbrailstoreserve << endl);
      CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
      CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);
    }

    CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
    CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);

    if(loadaction==0){
      // add 10 as safety
      uint32 totalreserve=AS_readpool.size()+totalseqstoload+bbrailstoreserve+10;

      cout << "Reserving space for reads";
      if(totalreserve >=500000){
	cout << " (this may take a while)";
      }
      cout << endl;
      AS_readpool.reserve(totalreserve);
      cout << "Reserved space for " << AS_readpool.capacity() << " reads";
      if(as_fixparams.as_loadbackbone){
	cout << " (including backbone rails)";
      }
      cout << '.' << endl;

      // at the moment, we have to insert rails before the real reads as skim
      //  won't work if they are after (*sigh* too much optimising)
      if(as_fixparams.as_loadbackbone){
	CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
	CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);
	addRailsToBackbones(false);
	CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
	CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);
      }
    }

  }

  CEBUG("AS_readpool.capacity(): " << AS_readpool.capacity() << endl);
  CEBUG("AS_readpool.size(): " << AS_readpool.size() << endl);

#if DEVELOPMENTVERSION != 0 
  dumpMemInfo();
#endif

  checkForReadNameLength(AS_miraparams[0].getSpecialParams().mi_stop_readnamelength);

  if(as_fixparams.as_load_sequencedata){
    loadExternalSCFQualities();
  }

  for(uint32 i=0; i<Read::SEQTYPE_END; i++){
    assembly_parameters const & as_rtparams= AS_miraparams[i].getAssemblyParams();
    try{
      if(as_rtparams.as_load_sequencedata && as_rtparams.as_mergexmltraceinfo){
	if(as_fixparams.as_dateoutput) dateStamp(cout);
	cout << "\n";
	AS_readpool.mergeXMLTraceInfo(as_rtparams.as_infile_xmltraceinfo);
	cout << "\n\n";
      }
    }
    catch(Notify n){
      cout << "Error while loading XML data from the traceainfo file.\n\n";
      n.handleError(THISFUNC);
    }
  }


  // TODO:
  // Temporary hack. Put a 'N' in front of SOLEXA reads
  //  set the minimum clipoff and adjust values
  {
    CEBUG("Temp sxa hack: add N" << endl);
    for(uint32 ri=0; ri<AS_readpool.size(); ri++){
      if(AS_readpool[ri].isSequencingType(Read::SEQTYPE_SOLEXA)){
	if(AS_readpool[ri].getLenSeq()){
	  if(AS_readpool[ri].getLSClipoff()<1){
	    if(toupper(AS_readpool[ri].getSeqAsChar()[0])!='N'){
	      AS_readpool[ri].insertBaseInSequence('n',0,0,true);
	    }
	    // do not use LSClipoff(): SEQVEC has influence on quality readings
	    //  during the consensus calculation
	    AS_readpool[ri].setLQClipoff(1);
	  }
	}
      }
    }

    AS_miraparams[Read::SEQTYPE_SOLEXA].getNonConstAssemblyParams().as_clip_minslrequired++;
    AS_miraparams[Read::SEQTYPE_SOLEXA].getNonConstAssemblyParams().as_clip_minqlsetto++;
    CEBUG("done" << endl);
  }

  if(as_fixparams.as_loadstraindata){
    if(AS_miraparams[0].getAssemblyParams().as_dateoutput) dateStamp(cout);
    cout << "\n";
    loadStrainData(as_fixparams.as_infile_straindata);
    cout << "\n\n";
  }

  // find out what we have in the pool
  AS_seqtypespresent.clear();
  AS_seqtypespresent.resize(Read::SEQTYPE_END,false);
  for(uint32 i=0; i< AS_readpool.size(); i++){
    if(AS_readpool.getRead(i).hasValidData()){
      if(!AS_readpool.getRead(i).isBackbone() && !AS_readpool.getRead(i).isRail()){
	AS_seqtypespresent[AS_readpool.getRead(i).getSequencingType()]=true;
      }
    }
  }

  // special handling for 454 data (20.09.2008)
  //  as there's no separate seqvec clip in the XML files (until now)
  //  and MIRA now uses a "clip back, extend later" strategy for 454
  //  reads, the 454 adaptor must be protected from extension as it
  //  happens often enough that two reads start the sequencing adaptor
  //  at the same time ... WHICH WOULD THEN BE UNCOVERED!
  //
  // E.g.
  //  ACCGTCAGTCAGTCAGTGTTGACGTGTCAccctgagacacgcaacaggggatagacaaggca
  //  ACCGTACGTCAG*CAGTGTTGACGTGTCAccctgagacacgcaacaggggatagacaaggca
  // 
  // Two possible worarounds
  // 1) instruct extendADS() not to extend into lower case (bad, relies
  //    on case information)
  // 2) transform the right qual clip into a vec clip if there is no 
  //    vec clip
  // 
  // we'll do number 2 here

#if CPP_READ_SEQTYPE_END != 6
#error "Check if new seqtype needs same workaround."
#endif
  if(AS_seqtypespresent[Read::SEQTYPE_454GS20]){
    uint32 changecount=0;
    for(uint32 rnr=0; rnr<AS_readpool.size(); rnr++){
      if(!AS_readpool.getRead(rnr).isBackbone() && !AS_readpool.getRead(rnr).isRail()){
	if(AS_readpool.getRead(rnr).hasValidData()){
	  // if no right seq vec but a right clip 
	  if(AS_readpool.getRead(rnr).getRSClipoff() == static_cast<int32>(AS_readpool.getRead(rnr).getLenSeq())
	     && AS_readpool.getRead(rnr).getRQClipoff() != static_cast<int32>(AS_readpool.getRead(rnr).getLenSeq())){
	    // make right seq vec = right clip
	    AS_readpool.getRead(rnr).setRSClipoff(AS_readpool.getRead(rnr).getRQClipoff());
	    changecount++;
	  }
	}
      }
    }
    if(changecount){
      cout << "Note: " << changecount << " reads with 454 data had quality clips given, but no sequencing vector clip.\n"
	   << "For MIRA to run properly with read extension, those quality clips have been\n"
	   << "changed to sequencing vector clips.\n\n";
    }
  }

  postLoad();
  cout << "Have read pool with " << AS_readpool.size() << " reads.\n";

  dumpSomeStatistics();

  //Read::setCoutType(Read::AS_TEXTCLIPS);
  //cout << AS_readpool.getRead(0);

  {
    // Perform clippings

    string logname;
    logname=buildFileName(0,"","", 
			  AS_miraparams[0].getAssemblyParams().as_tmpf_clippings,
			  ".txt");
    string logprefix="load: ";
    
    bool sc=false;
    bool qc=false;
    bool mc=false;
    bool llc=false;
    bool mlc=false;
    bool mrc=false;
    bool bss=false;
    bool cpae=false;
    bool c3pp=false;
    bool qt=false;
    bool ckar=false;
    bool bse=false;
    for(uint32 st=0; st<Read::SEQTYPE_END; st++){
      if(AS_seqtypespresent[st]){
	sc|=AS_miraparams[st].getAssemblyParams().as_clip_mergessahavectorscreen;
	qc|=AS_miraparams[st].getAssemblyParams().as_clip_quality;
	mc|=AS_miraparams[st].getAssemblyParams().as_clip_maskedbases;
	llc|=AS_miraparams[st].getAssemblyParams().as_clip_lowercase;
	mlc|=AS_miraparams[st].getAssemblyParams().as_clip_ensureminimumleftclipoff;
	mrc|=AS_miraparams[st].getAssemblyParams().as_clip_ensureminimumrightclipoff;
	bss|=AS_miraparams[st].getAssemblyParams().as_clip_badstretchquality;
	cpae|=AS_miraparams[st].getAssemblyParams().as_clip_polyat;
	c3pp|=AS_miraparams[st].getAssemblyParams().as_clip_3ppolybase;
	qt|=AS_miraparams[st].getAssemblyParams().as_clip_quality_minthreshold;
	ckar|=AS_miraparams[st].getAssemblyParams().as_clip_knownadaptorsright;
      }
    }
    // these to are currently not per seqtype, but set as COMMON_SETTINGS
    bse|=AS_miraparams[0].getAssemblyParams().as_clip_badsolexaends;

    if(bse && AS_seqtypespresent[Read::SEQTYPE_SOLEXA]) clipBadSolexaEnds(logname,logprefix);

    if(ckar){
      for(uint8 st=0; st< AS_seqtypespresent.size(); ++st) {
	if(AS_seqtypespresent[st]) performPool_AdaptorRightClip(logname,logprefix,st);
      }
    }

    if(qt) performPool_MinimumQualityThreshold(logname,logprefix);
    if(llc) performLowerCaseClipping(logname,logprefix);
    if(qc||mc) performQualAndMaskClips(logname,logprefix);
    
    if(bss){
      if(mlc){
	// if bad sequence search, the minimum left clip must be done before
	cout << "Clipping: requested bad sequence search clip and a minimum left clip"
	  "\nNeed to perform minimum left clip before bad sequence search clip.\n";
	performMinimumLeftClips(true,false,logname,logprefix);
	// but don't do a second minleftclip later
	mlc=false;
      }
      performBadSequenceSearch(logname,logprefix);
    }
    if(sc){
      if(AS_miraparams[0].getAssemblyParams().as_dateoutput) dateStamp(cout);
      cout << "\n";
      
      bool present;
      {
	ifstream my_file(as_fixparams.as_infile_ssahavectorscreen.c_str());
	present=my_file.good();
      }
      if(present){
	mergeSSAHA2SMALTVecScreenData(as_fixparams.as_infile_ssahavectorscreen,
				      false,
				      logname,
				      logprefix);
      }
      {
	ifstream my_file(as_fixparams.as_infile_smaltvectorscreen.c_str());
	present=my_file.good();
      }
      if(present){
	mergeSSAHA2SMALTVecScreenData(as_fixparams.as_infile_smaltvectorscreen,
				      true,
				      logname,
				      logprefix);
      }
      cout << "\n\n";
    }
    if(c3pp){
      clipPolyBaseAtEnd(logname,logprefix);
    }
    if(cpae){
      clipPolyATAtEnds(logname,logprefix);
    }
    if(mlc){
      performMinimumLeftClips(true,false,logname,logprefix);
    }
    if(mrc){
      performMinimumRightClips(logname,logprefix);
    }
  }

  dumpSomeStatistics();

  {
    ofstream fout;
    fout.open((AS_miraparams[0].getDirectoryParams().dir_tmp+'/'+as_fixparams.as_tmpf_poolinfo+".lst").c_str(), ios::out| ios::trunc);
    
    AS_readpool.dumpPoolInfo(fout);

    fout.close();
  }

  if(AS_logflag_dumprailreads){
    ofstream fout;
    fout.open((AS_miraparams[0].getDirectoryParams().dir_tmp+"/elog.load.railreads.fasta").c_str(), ios::out| ios::trunc);
    dumpRailReads(fout);
    fout.close();
  }

  FUNCEND();
}

//#define CEBUG(bla)


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::dumpRailReads(ofstream & fout)
{
  Read::setCoutType(Read::AS_FASTA);
  for(uint32 i=0; i<AS_readpool.size(); ++i){
    if(AS_readpool[i].isRail()){
      fout << AS_readpool[i].isRail();
    }
  }
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/
void Assembly::checkForReadNameLength(uint32 stoplength)
{
  FUNCSTART("void Assembly::checkForReadNameLength(uint32 stoplength)");

  if(stoplength==0) return;

  uint32 count=0;
  for(uint32 ri=0; ri<AS_readpool.size(); ri++){
    if(AS_readpool[ri].getName().size()>stoplength){
      if(count==0) {
	cout << "List of read names which have problems with name length:\n";
      }
      cout << "Name too long: " << AS_readpool[ri].getName() << '\n';
      ++count;
    }
  }
  if(count>0){
    MIRANOTIFY(Notify::FATAL,
	       "\nSome read names were detected with more than " << stoplength << " characters.\n"
	       "While MIRA and many other programs have no problem with that, some older\n"
	       "programs have restrictions concerning the length of the read name.\n"
	       "\nExample given: the pipeline\n"
	       "     CAF -> caf2gap -> gap2caf\n"
	       "will stop working at the gap2caf stage if there are read names having > 40 characters\n"
	       "where the names differ only at >40 characters.\n"
	       "\nThis is a warning only, but as a couple of people were bitten by this, the default\n"
	       "behaviour of MIRA is to stop when it sees that potential problem.\n"
	       "\nYou might want to rename your reads to have <= " << stoplength << " characters.\n"
	       "\nOn the other hand, you also can ignore this potential problem and force MIRA to\n"
	       "continue by using the parameter: '-MI:somrnl=0'\n");
  }
  FUNCEND();
  return;
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadExternalSCFQualities()
{
  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();
  if(as_fixparams.as_external_qual==E_QUAL_SCF){
    if(AS_miraparams[0].getAssemblyParams().as_dateoutput) dateStamp(cout);
    cout << "\n";
    string log1=buildFileName(0, "", "", as_fixparams.as_tmpf_scfreadfail, "");
    string log2=buildFileName(0, "", "", as_fixparams.as_tmpf_scfreadfatallywrong, "");
    
    AS_readpool.loadQualitiesFromSCF(as_fixparams.as_external_qual_override, true, log1, log2);
  }
  return;
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::loadFOFNEXP(const string & fofn, const uint8 seqtype, const uint8 loadaction, uint32 & longestread)
{
  FUNCSTART("size_t Assembly::loadFOFNEXP(const string & fofn, const uint8 loadaction, uint32 & longestread, const uint8 seqtype)");

  cout << "Loading project as file of EXP filenames." << endl;

  size_t numseqsloaded=0;

  //assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();
  try{
    numseqsloaded+=AS_readpool.loadEXPs(fofn.c_str(), loadaction, longestread, seqtype);
    if(loadaction!=0) loadExternalSCFQualities();
  }
  catch(Notify n){
    n.handleError(THISFUNC);
  }

  AS_steps[ASREADPOOLOK]=1;

  FUNCEND();
  return numseqsloaded;
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::loadFASTA(const string & fastafile, const string & fastaqualfile, const uint8 seqtype, const uint8 loadaction, uint32 & longestread)
{
  FUNCSTART("Assembly::loadFASTA(const string & fastafile, const string & fastaqualfile, const uint8 seqtype, const uint8 loadaction)");

  bool generatefilenames=false;
  bool sxa_mustconvert=false;

  size_t numseqsloaded=0;

  cout << "Loading data (" << Read::getNameOfSequencingType(seqtype)
       << ") from FASTA files,\n";

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif
  switch(seqtype){
  case Read::SEQTYPE_SANGER : {
    generatefilenames=true;
    break;
  }
  case Read::SEQTYPE_454GS20 : {
    break;
  }
  case Read::SEQTYPE_IONTORRENT : {
    break;
  }
  case Read::SEQTYPE_PACBIO : {
    break;
  }
  case Read::SEQTYPE_SOLEXA : {
    sxa_mustconvert=AS_miraparams[0].getAssemblyParams().as_loadSOLEXA_solexa_scores_in_qual_files;
    break;
  }
  case Read::SEQTYPE_ABISOLID : {
    break;
  }
  default: {
    throw Notify(Notify::FATAL, THISFUNC, "unknown type of FASTA file to load") ;
  }
  }

  try{
    numseqsloaded+=AS_readpool.loadDataFromFASTA(
      fastafile.c_str(), 
      loadaction,
      longestread,
      AS_miraparams[seqtype].getAssemblyParams().as_wants_qualityfile,
      fastaqualfile.c_str(),
      generatefilenames,
      seqtype,
      sxa_mustconvert);
    if(seqtype == Read::SEQTYPE_SANGER) {
      loadExternalSCFQualities();
    }
  }
  catch(Notify n){
    n.handleError(THISFUNC);
  }


  AS_steps[ASREADPOOLOK]=1;

  FUNCEND();
  return numseqsloaded;
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::loadFASTQ(const string & fastqfile, const uint8 seqtype, const uint8 loadaction, uint32 & longestread)
{
  FUNCSTART("Assembly::loadFASTQ(const string & fastqfile, const string & fastaqualfile, const uint8 seqtype, const uint8 loadaction)");

  bool generatefilenames=false;
  bool sxa_mustconvert=false;
  size_t numseqsloaded=0;


  cout << "Loading data (" << Read::getNameOfSequencingType(seqtype)
       << ") from FASTQ files,\n";

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif
  switch(seqtype){
  case Read::SEQTYPE_SANGER : {
    generatefilenames=true;
    break;
  }
  case Read::SEQTYPE_454GS20 : {
    break;
  }
  case Read::SEQTYPE_IONTORRENT : {
    break;
  }
  case Read::SEQTYPE_PACBIO : {
    break;
  }
  case Read::SEQTYPE_SOLEXA : {
    sxa_mustconvert=AS_miraparams[0].getAssemblyParams().as_loadSOLEXA_solexa_scores_in_qual_files;
    break;
  }
  case Read::SEQTYPE_ABISOLID : {
    break;
  }
  default: {
    throw Notify(Notify::FATAL, THISFUNC, "unknown type of FASTA file to load") ;
  }
  }

  try{
    numseqsloaded+=AS_readpool.loadDataFromFASTQ(fastqfile.c_str(), 
						 loadaction,
						 longestread,
						 generatefilenames,
						 seqtype,
						 sxa_mustconvert);
    if(seqtype == Read::SEQTYPE_SANGER) {
      loadExternalSCFQualities();
    }
  }
  catch(Notify n){
    n.handleError(THISFUNC);
  }


  AS_steps[ASREADPOOLOK]=1;

  FUNCEND();
  return numseqsloaded;
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::loadPHD(const string & phdfile, const uint8 loadaction, uint32 & longestread)
{
  FUNCSTART("void Assembly::loadPHD(const string & phdfile, const uint8 loadaction)");

  cout << "Loading project from PHD file." << endl;
  size_t numseqsloaded=0;

  try{
    numseqsloaded+=AS_readpool.loadPHD(phdfile, loadaction, longestread);
    loadExternalSCFQualities();
  }
  catch(Notify n){
    n.handleError(THISFUNC);
  }

  AS_steps[ASREADPOOLOK]=1;

  FUNCEND();
  return numseqsloaded;
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::loadCAF(const string & caffile, const uint8 seqtype, const uint8 loadaction, vector<uint32> & lrperseqtype)
{
  FUNCSTART("Assembly::loadCAF(const char * caffile, const uint8 seqtype, const uint8 loadaction, vector<uint32> & lrperseqtype)");

  cout << "Loading project from CAF file: " << caffile << endl;
  size_t numseqsloaded=0;

  CAF tcaf(AS_readpool, AS_contigs, &AS_miraparams);
  numseqsloaded=tcaf.load(caffile.c_str(), seqtype, loadaction, lrperseqtype);

  AS_contigs.clear();

  AS_steps[ASREADPOOLOK]=1;

  FUNCEND();

  return numseqsloaded;
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::loadMAF(const string & maffile, const uint8 seqtype, const uint8 loadaction, vector<uint32> & lrperseqtype)
{
  FUNCSTART("Assembly::loadCAF(const char * caffile, const uint8 seqtype, const uint8 loadaction, vector<uint32> & lrperseqtype)");

  cout << "Loading project from MAF file: " << maffile << endl;
  size_t numseqsloaded=0;

  MAFParse mafp(AS_readpool, AS_contigs, &AS_miraparams);
  numseqsloaded=mafp.load(maffile, Read::SEQTYPE_SANGER, loadaction, lrperseqtype);

  AS_contigs.clear();

  AS_steps[ASREADPOOLOK]=1;

  FUNCEND();

  return numseqsloaded;
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadBackboneMAF(const string & maffile)
{
  FUNCSTART("Assembly::loadBackboneMAF(const char * maffile)");

  cout << "Loading backbone from MAF file: " << maffile << endl;

  vector<uint32> lrperseqtype;
  size_t numseqsloaded=0;

  MAFParse mafp(AS_readpool, AS_bbcontigs, &AS_miraparams);
  numseqsloaded=mafp.load(maffile, Read::SEQTYPE_SANGER, 1, lrperseqtype);

  AS_readpool.makeStrainIDs();

  // MAF file may have contigs or have only single reads
  // if only single reads, move all reads to contigs (like backbone from
  //  GBF)
  if(AS_bbcontigs.size()==0) {
    cout << "MAF contains no contigs. Adding single reads as backbones ... ";
    cout.flush();
   
    Contig con(&AS_miraparams, AS_readpool);

    for(uint32 i=0; i<AS_readpool.size(); i++) {
      AS_readpool[i].setReadNamingScheme(SCHEME_NONE);
      if(AS_readpool[i].hasValidData()){
	if(!AS_readpool[i].hasQuality()){
	  if(AS_miraparams[0].getAssemblyParams().as_backbone_basequals>0) {
	    AS_readpool[i].setQualities(AS_miraparams[0].getAssemblyParams().as_backbone_basequals);
	    AS_readpool[i].setQualityFlag(false);

	  }
	}
	
	AS_bbcontigs.push_back(con);
	AS_bbcontigs.back().addFirstRead(i,1);    
      }
    }
    cout << "done.\n";
  } else {
    cout << "MAF contained " << AS_bbcontigs.size() << " contigs. Only the contigs will be added as backbone.\n";
  }

  list<Contig>::iterator I=AS_bbcontigs.begin();
  for(; I!=AS_bbcontigs.end(); I++){
    I->deleteStarOnlyColumns(0,I->getContigLength());
  }

  cout << "Done.\n";

  FUNCEND();
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadBackboneCAF(const string & caffile)
{
  FUNCSTART("Assembly::loadBackboneCAF(const char * caffile)");

  cout << "Loading backbone from CAF file: " << caffile << endl;

  vector<uint32> lrperseqtype;
  CAF tcaf(AS_readpool, AS_bbcontigs, &AS_miraparams);

  tcaf.load(caffile.c_str(),Read::SEQTYPE_SANGER,1,lrperseqtype);

  AS_readpool.makeStrainIDs();

  // CAF file may have contigs or have only single reads
  // if only single reads, move all reads to contigs (like backbone from
  //  GBF)
  if(AS_bbcontigs.size()==0) {
    cout << "CAF contains no contigs. Adding single reads as backbones ... ";
    cout.flush();
    
    Contig con(&AS_miraparams, AS_readpool);

    for(uint32 i=0; i<AS_readpool.size(); i++) {
      AS_readpool[i].setReadNamingScheme(SCHEME_NONE);
      if(AS_readpool[i].hasValidData()){
	if(!AS_readpool[i].hasQuality()){
	  if(AS_miraparams[0].getAssemblyParams().as_backbone_basequals>0) {
	    AS_readpool[i].setQualities(AS_miraparams[0].getAssemblyParams().as_backbone_basequals);
	    AS_readpool[i].setQualityFlag(false);

	  }
	}
	
	AS_bbcontigs.push_back(con);
	AS_bbcontigs.back().addFirstRead(i,1);    
      }
    }
    cout << "done.\n";
  } else {
    cout << "CAF contained " << AS_bbcontigs.size() << " contigs. Only the contigs will be added as backbone.\n";
  }

  list<Contig>::iterator I=AS_bbcontigs.begin();
  for(; I!=AS_bbcontigs.end(); I++){
    I->deleteStarOnlyColumns(0,I->getContigLength());
  }

  cout << "Done.\n";

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadBackboneGFF3(const string & gff3file)
{
  FUNCSTART("Assembly::loadBackboneGBF(const char * gbffile)");

  cout << "Loading backbone from GFF3 file: " << gff3file << endl;
  cout.flush();

  AS_readpool.loadDataFromGFF3(gff3file);

  AS_readpool.makeStrainIDs();

  cout << "Done.\nAdding sequences as backbones ... ";
  cout.flush();

  Contig con(&AS_miraparams, AS_readpool);

  for(uint32 i=0; i<AS_readpool.size(); i++) {
    AS_readpool[i].setReadNamingScheme(SCHEME_NONE);
    if(AS_readpool.getRead(i).hasValidData()){
      if(AS_miraparams[0].getAssemblyParams().as_backbone_basequals>0) {
	AS_readpool.getRead(i).setQualities(AS_miraparams[0].getAssemblyParams().as_backbone_basequals);
	AS_readpool[i].setQualityFlag(false);
      }
      
      AS_bbcontigs.push_back(con);
      AS_bbcontigs.back().addFirstRead(i,1);    

      //cout << "Hello " << con.getContigName() << endl;
      //abort();
    }
  }

  cout << " done." << endl;

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadBackboneGBF(const string & gbffile)
{
  FUNCSTART("Assembly::loadBackboneGBF(const char * gbffile)");

  cout << "Loading backbone from GBF file: " << gbffile << endl;
  cout.flush();

  AS_readpool.loadDataFromGBF(gbffile);

  AS_readpool.makeStrainIDs();

  cout << "Done.\nAdding sequences as backbones ... ";
  cout.flush();

  Contig con(&AS_miraparams, AS_readpool);

  for(uint32 i=0; i<AS_readpool.size(); i++) {
    AS_readpool[i].setReadNamingScheme(SCHEME_NONE);
    if(AS_readpool.getRead(i).hasValidData()){
      if(AS_miraparams[0].getAssemblyParams().as_backbone_basequals>0) {
	AS_readpool.getRead(i).setQualities(AS_miraparams[0].getAssemblyParams().as_backbone_basequals);
	AS_readpool[i].setQualityFlag(false);
      }
      
      AS_bbcontigs.push_back(con);
      AS_bbcontigs.back().addFirstRead(i,1);    

      //cout << "Hello " << con.getContigName() << endl;
      //abort();
    }
  }

  cout << " done." << endl;

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadBackboneFASTA(const string & fastafile, const string & fastaqualfile)
{
  FUNCSTART("Assembly::loadBackboneCAF(const char * fastafile)");

  cout << "Loading backbone from FASTA file: " << fastafile << " (quality: " << fastaqualfile << ")" << endl;

  uint32 longestread=0;
  AS_readpool.loadDataFromFASTA(fastafile, 1, longestread, false, fastaqualfile);

  AS_readpool.makeStrainIDs();

  Contig con(&AS_miraparams, AS_readpool);

  for(uint32 i=0; i<AS_readpool.size(); i++) {
    AS_readpool[i].setReadNamingScheme(SCHEME_NONE);
    if(AS_readpool.getRead(i).hasValidData()){
      if(!AS_readpool[i].hasQuality()){
	if(AS_miraparams[0].getAssemblyParams().as_backbone_basequals>0) {
	  AS_readpool[i].setQualities(AS_miraparams[0].getAssemblyParams().as_backbone_basequals);
	  AS_readpool[i].setQualityFlag(false);
	}
      }
      
      AS_bbcontigs.push_back(con);
      AS_bbcontigs.back().addFirstRead(i,1);    
    }
  }

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::postLoadBackbone()
{
  FUNCSTART("void Assembly::postLoadBackbone()");

  cout << "Postprocessing backbone(s) ... this may take a while."<< endl;
  // mark all reads loaded in backbone as backbone
  // check that they are not named "ContigX"
  // set the strain to "backbone"
  // Backbones will not be included is Skim, makeAlignments etc.


  static const boost::regex badseqnameexp("^Contig[0-9]+$");
  //return regex_match(s, e);

  // set MFSM tags
  if(1){
    vector<multitag_t::mte_id_t> idstoreplace;
    {
      string tmp="FLTR";
      idstoreplace.push_back(Read::getTagID(tmp));
      tmp="FrRN";
      idstoreplace.push_back(Read::getTagID(tmp));
    }
    string mfsm="MFSM";
    multitag_t::mte_id_t mtid=Read::getTagID(mfsm);
    list<Contig>::iterator I=AS_bbcontigs.begin();
    for(; I!=AS_bbcontigs.end(); ++I){
      vector<Contig::contigread_t> & conreads=const_cast<vector<Contig::contigread_t>&>(I->getContigReads());
      vector<Contig::contigread_t>::iterator crI=conreads.begin();
      for(; crI != conreads.end(); ++crI){
	for(uint32 it=0; it<crI->read.getNumOfTags(); ++it){
	  multitag_t tmp=crI->read.getTag(it);
	  if(find(idstoreplace.begin(),idstoreplace.end(),tmp.identifier)!=idstoreplace.end()){
	    tmp.identifier=mtid;
	    crI->read.addTag(tmp);
	  }
	}
      }
    }
  }


  bool contignamesok=true;
  uint32 bbnum=1;
  {
    cout << AS_bbcontigs.size() << " to process\n";
    list<Contig>::iterator I=AS_bbcontigs.begin();
    for(; I!=AS_bbcontigs.end(); ++I, ++bbnum){
      // first, find a name for that contig

      const vector<Contig::contigread_t> & conreads=I->getContigReads();

      // if it is a single read contig,
      //  set the name for that contig to be the name of the read
      if(conreads.size()==1) {
	if(conreads.front().read.getName().size()){
	  if(conreads.front().read.getName()[0]=='C'){
	    if(regex_match(conreads.front().read.getName(), badseqnameexp)){
	      cout << "Bad name for backbone sequence " << bbnum << ": " << conreads.front().read.getName() << '\n';
	      cout << "Backbone sequences may NOT be name 'ContigX' with 'X' being any number.\n";
	      contignamesok=false;
	    }
	  }
	}else{
	  cout << "There's a backbone sequence (number " << bbnum << ") without a name? Too bad, not allowed.\n";
	  contignamesok=false;
	}

	/* BaCh 14.09.2010: why did I add _bb? Let's remove and see whether it's better.
	   BaCh 26.10.2010: I remembered. A contig may not be named like a sequence, CAF will
	                    dump an error.
			    Solution: ...?
	*/
	I->setContigName(conreads.front().read.getName()+"_bb");

	//I->setContigName(conreads.front().read.getName());
      }

      cout << I->getContigName() << "\t" << I->getContigLength() << endl;

      //bool bbvalue=true;

      ////  except singlets?!
      //if(conreads.size()==1 && I->getContigLength()<4000) {
      //	bbvalue=false;
      //} 

      // now let the contig do the rest of the setup
      I->setupAsBackBoneContig();
    }
  }

  if(!contignamesok){
    MIRANOTIFY(Notify::FATAL,"Some backbones had either no names or a bad name (see log above). Stopping here, fix your sequence names.\n")
  }

  AS_readpool.dumpStrainIDSummary();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

size_t Assembly::addRailsToBackbones(const bool simulateonly)
{
  FUNCSTART("size_t Assembly::addRailsToBackbones(const bool simulateonly)");

  if(!simulateonly) {
    cout << "Adding rails to " << AS_bbcontigs.size() << " contigs (this may take a while)." << endl;
  }

  const string strainname=AS_miraparams[0].getAssemblyParams().as_backbone_strainname;

  size_t numrailscreated=0;

  list<Contig>::iterator I=AS_bbcontigs.begin();
  for(; I!=AS_bbcontigs.end(); I++){
    I->recalcTemplateAndStrainIDsPresent();
    
    bool bbvalue=true;
    //if(conreads.size()==1 && I->getContigLength()<4000) {
    //	bbvalue=false;
    //} 
    
    // add the rails
    if(bbvalue) {
      numrailscreated+=I->addRails(
	AS_miraparams[0].getAssemblyParams().as_backbone_raillength,
	AS_miraparams[0].getAssemblyParams().as_backbone_railoverlap,
	strainname,
	AS_miraparams[0].getAssemblyParams().as_backbone_strainname_forceforall,
	AS_miraparams[0].getAssemblyParams().as_backbone_rail_fromstrain,
	simulateonly);
    }
  }

  FUNCEND();
  return numrailscreated;
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::loadStrainData(const string & sdfile)
{
  FUNCSTART("Assembly::loadStrainData(const string & sdfile)");

  cout << "Loading straindata." << endl;

  try{
    AS_readpool.loadStrainData(sdfile.c_str());
  }
  catch(Notify n){
    n.handleError(THISFUNC);
  }

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::postLoad()
{
  FUNCSTART("void Assembly::postLoad()");

  directory_parameters const & dir_params= AS_miraparams[0].getDirectoryParams();
  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();

  // how many sequences in assembly


  // count how many have valid data, clean up stars in reads
  // count how many have SCF data: if none, switch off editing.

  {
    cout << "Checking reads for trace data:\n";

    ProgressIndicator<int32> P (0, AS_readpool.size());

    ofstream fout;
    fout.open((dir_params.dir_tmp+'/'+as_fixparams.as_outfile_stats_reads_invalid).c_str(), ios::out| ios::trunc);

    bool can_edit=false;
    AS_num_reads_valid=0;
    for(uint32 i=0;i<AS_readpool.size();i++){
      P.progress(i);
      if(AS_readpool.getRead(i).hasValidData()){
	AS_num_reads_valid++;
	AS_readpool.getRead(i).removeGapsFromRead();
	if(AS_readpool.getRead(i).hasSCFData(true)){
	  can_edit=true;
	}
      } else {
	if(!AS_readpool.getRead(i).getName().empty()) {
	  fout << AS_readpool.getRead(i).getName() << "\n";
	//} else if (!AS_readpool.getRead(i).getEXPName().empty()) {
	//  fout << AS_readpool.getRead(i).getEXPName() << "\n";
	} else {
	  fout << "Unknown read (loaded as number: " << i << ")\n";
	}
      }
    }
    P.finishAtOnce();
    cout << endl;

    if(!can_edit) {
      cout << "No SCF data present in any read, automatic contig editing for Sanger data is now switched off.\n";
      AS_miraparams[0].setEditAutomaticContigEditing(false);
    }
    fout.close();
  }

  cout << AS_num_reads_valid << " reads with valid data for assembly.\n";

  if(!AS_num_reads_valid){
    throw Notify(Notify::FATAL, THISFUNC, "No valid read in assembly?");
  }


  bool templatesusable=AS_readpool.makeTemplateIDs();
  if(!templatesusable) {
    cout << "No useful template information found, template routines will not be used.\n";
    AS_miraparams[0].setAssemblyUseTemplateInformation(false);
  }


  // assign default strains, if wanted, to all non-backbone, non-rail reads
  //  which do not have strain info yet
  {
    for(uint32 i=0;i<AS_readpool.size();i++){
      Read & actread=AS_readpool.getRead(i);
      if(actread.isBackbone() 
	 || actread.isRail()) continue;
      if(AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_assigndefaultstrain
	 && actread.getStrain().empty()){
	actread.setStrain(AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_default_strainname);
      }
    }
  }  

  // now make and assign strain IDs
  AS_readpool.makeStrainIDs();
  
  //re-adjust bbcontigs template and strain ids in contig reads
  if(as_fixparams.as_loadbackbone){
    list<Contig>::iterator I=AS_bbcontigs.begin();
    for(; I!=AS_bbcontigs.end(); I++){
      I->recalcTemplateAndStrainIDsPresent();
    }
  }
  
  AS_readpool.dumpStrainIDSummary();

  // look for quality values in reads
  {
    bool stopall=false;
    for(uint32 i=0;i<AS_readpool.size();i++){
      Read & actread=AS_readpool.getRead(i);
      if(actread.isBackbone() 
	 || actread.isRail()) continue;
      if(AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_enforce_qualsinreads
	 && actread.hasValidData()
	 && actread.hasQuality()==false){
	cout << "No quality data found: (" << Read::getNameOfSequencingType(actread.getSequencingType()) << ") " << actread.getName() << '\n';
	stopall=true;
      }
    }
#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif
    if(stopall) {
      MIRANOTIFY(Notify::FATAL,"Some reads had no quality values given (see log above),\nplease check your input data.\nIf sure that this is ok for your data, switch off this check with -AS:epoq=no\nfor any sequencing type you wish (Sanger, 454, IonTorrent, PacBio, Solexa, ...)")
    }

  }


  FUNCEND();
  return;
}
  


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::dumpSomeStatistics()
{
  FUNCSTART("void Assembly::dumpSomeStatistics()");
  // initialise the assembly_structure and do some statistics

  directory_parameters const & dir_params= AS_miraparams[0].getDirectoryParams();
  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();

  uint32 backbonereads=0;
  uint32 railreads=0;
  {
    ofstream fout;
    fout.open((dir_params.dir_tmp+'/'+as_fixparams.as_outfile_stats_reads_tooshort).c_str(), ios::out| ios::trunc);

    AS_num_reads_too_small=0;
    for(uint32 i=0;i<AS_readpool.size();i++){
      Read & actread=AS_readpool.getRead(i);

      //AS_ok_for_assembly[i]=1;
      actread.setUsedInAssembly(true);
      
      if(actread.hasValidData()==false){
	cout << actread.getName() << ": unable to load or other reason for invalid data.\n";
	//AS_ok_for_assembly[i]=0;
	actread.setUsedInAssembly(false);
      }else{
	if(actread.isBackbone()){
	  actread.setUsedInAssembly(false);
	  backbonereads++;
	} else if(actread.isRail()) {
	  railreads++;
	}else{
	  // throw out on minumum length if no template partner is present
	  if(actread.getLenClippedSeq() < AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_minimum_readlength){
	    //cout << "Short length: " 
	    //	 << actread.getName() << " (" 
	    //	 << actread.getShortNameOfSequencingType(actread.getSequencingType()) 
	    //	 << "): only " << actread.getLenClippedSeq() 
	    //	 << " good bases, ";
	    fout << actread.getName();
	    if(actread.getTemplatePartnerID() == -1){
	      //cout << "need: " 
	      //	   << AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_minimum_readlength 
	      //	   << ". No paired end partner, rejected.\n";
	      fout << " too small and no paired end\n";
	      AS_num_reads_too_small++;
	      //AS_ok_for_assembly[i]=0;
	      actread.setUsedInAssembly(false);
	    }else{
	      if(actread.getLenClippedSeq() < 20){
		//cout << "really too small, rejected.\n";
		fout << " too small even with paired end\n";
		AS_num_reads_too_small++;
		//AS_ok_for_assembly[i]=0;
		actread.setUsedInAssembly(false);
	      }else{
		fout << " saved by paired-end\n";
		//cout << "accepted as paired-end partner is present.\n";
	      }
	    }
	  }
	}
      }
    }
    fout.close();
  }
  
  if(AS_logflag_dumpusedids){
    ofstream fout;
    fout.open((dir_params.dir_tmp+"/elog.usedids.lst").c_str(), ios::out | ios::trunc);
    for(uint32 i=0; i<AS_used_ids.size(); i++){
      if(AS_readpool[i].isUsedInAssembly()){
	fout << AS_readpool[i].getName()<< '\n';
      }
    }
    fout.close();
  }

  // TODO: also take reads too short into statistics
  //  

  vector<int32> straindatabytype(Read::SEQTYPE_END,0);
  vector<int32> withoutclipsbytype(Read::SEQTYPE_END,0);
  vector<int32> readcountbytype(Read::SEQTYPE_END,0);
  vector<int32> readusedbytype(Read::SEQTYPE_END,0);
  vector<int32> numnoqualbytype(Read::SEQTYPE_END,0);

  vector<uint64_t> meanlengthbytype(Read::SEQTYPE_END,0);
  vector<uint64_t> meantlengthbytype(Read::SEQTYPE_END,0);

  //remove("log.noqualities");
  ofstream fout;
  fout.open((dir_params.dir_tmp+"/miralog.noqualities").c_str(), ios::out| ios::trunc);
  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & actread=AS_readpool.getRead(i);
    if(actread.isBackbone() == false
       && actread.isRail() == false){
      if(actread.hasQuality()==false){
	numnoqualbytype[actread.getSequencingType()]++;
	fout << actread.getName() << endl;
      }
      readcountbytype[actread.getSequencingType()]++;
      meantlengthbytype[actread.getSequencingType()]+=actread.getLenSeq();
      if(actread.isUsedInAssembly()){
	readusedbytype[actread.getSequencingType()]++;
	meanlengthbytype[actread.getSequencingType()]+=actread.getLenClippedSeq();
      }
      if(!actread.getStrain().empty()){
	straindatabytype[actread.getSequencingType()]++;
      }
#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif
      switch(actread.getSequencingType()){
      case Read::SEQTYPE_SANGER : {
	break;
      }
      case Read::SEQTYPE_PACBIO : {
	break;
      }
      case Read::SEQTYPE_IONTORRENT : {
	if(actread.getLQClipoff()<1 ||
	   (actread.getLQClipoff()==1 && actread.getRightClipoff()==static_cast<int32>(actread.getLenSeq()))){
	  withoutclipsbytype[actread.getSequencingType()]++;
	}
	break;
      }
      case Read::SEQTYPE_454GS20 : {
	if(actread.getRightClipoff()==static_cast<int32>(actread.getLenSeq())
	   && (actread.getLQClipoff()<1 ||
	       actread.getLQClipoff()==4)){
	  withoutclipsbytype[actread.getSequencingType()]++;
	}
	break;
      }
      case Read::SEQTYPE_SOLEXA : {
	if(actread.getLQClipoff()<1 ||
	   (actread.getLQClipoff()==1 && actread.getRightClipoff()==static_cast<int32>(actread.getLenSeq()))){
	  withoutclipsbytype[actread.getSequencingType()]++;
	}
	break;
      }
      case Read::SEQTYPE_ABISOLID : {
	throw Notify(Notify::INTERNAL, THISFUNC, "Type ABI SOLiD needs more support 20a.");
	break;
      }
      default : {
	cerr << "Sequencing type " << actread.getSequencingType() << " unknown?\n";
	throw Notify(Notify::FATAL, THISFUNC, "Found unknown sequencing type in read.");
      }
      }
    }
  }  
  fout.close();

  vector<uint64_t> totallengthbytype=meanlengthbytype;
  vector<uint64_t> totaltlengthbytype=meantlengthbytype;

  for(uint32 i=0; i<meantlengthbytype.size(); i++){
    if(readcountbytype[i]) meantlengthbytype[i]/=readcountbytype[i];
    if(readusedbytype[i]) meanlengthbytype[i]/=readusedbytype[i];
  }

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif
  cout << "\n===========================================================================\n";
  cout << "Pool statistics:\n";
  cout << "Backbones: " << backbonereads << "\tBackbone rails: " << railreads << "\n";

#ifdef HIDEPACBIO
  cout << "\n\t\tSanger\t454\tIonTor\tSolexa\tSOLiD\n";
  cout << "\t\t---------------------------------\n";
  cout << "Total reads\t" << readcountbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << readcountbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << readcountbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << readcountbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << readcountbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Reads wo qual\t" << numnoqualbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Used reads\t" << readusedbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << readusedbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << readusedbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << readusedbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << readusedbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Avg tot rlen\t" << meantlengthbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Avg rlen used\t" << meanlengthbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "\nWith strain\t" << straindatabytype[Read::SEQTYPE_SANGER];
  cout << '\t' << straindatabytype[Read::SEQTYPE_454GS20];
  cout << '\t' << straindatabytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << straindatabytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << straindatabytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "W/o clips\t" << withoutclipsbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_ABISOLID] << '\n';
#else
  cout << "\n\t\tSanger\t454\tIonTor\tPacBio\tSolexa\tSOLiD\n";
  cout << "\t\t----------------------------------------\n";
  cout << "Total reads\t" << readcountbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << readcountbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << readcountbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << readcountbytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << readcountbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << readcountbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Reads wo qual\t" << numnoqualbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << numnoqualbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Used reads\t" << readusedbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << readusedbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << readusedbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << readusedbytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << readusedbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << readusedbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Avg tot rlen\t" << meantlengthbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << meantlengthbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "Avg rlen used\t" << meanlengthbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << meanlengthbytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "\nWith strain\t" << straindatabytype[Read::SEQTYPE_SANGER];
  cout << '\t' << straindatabytype[Read::SEQTYPE_454GS20];
  cout << '\t' << straindatabytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << straindatabytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << straindatabytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << straindatabytype[Read::SEQTYPE_ABISOLID] << '\n';
  cout << "W/o clips\t" << withoutclipsbytype[Read::SEQTYPE_SANGER];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_454GS20];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_IONTORRENT];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_PACBIO];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_SOLEXA];
  cout << '\t' << withoutclipsbytype[Read::SEQTYPE_ABISOLID] << '\n';
#endif

  cout << '\n';
  for(uint32 i=0; i<totallengthbytype.size(); i++){
    cout << Read::getNameOfSequencingType(i) << "\ttotal bases:" << totaltlengthbytype[i]
	 << "\tused bases in used reads: " << totallengthbytype[i] << '\n';
  }

  cout << "===========================================================================\n\n";

  
  //for(uint32 i=0; i<meanlengthbytype.size(); i++){
  //  if(readcountbytype[i] >0 
  //     && readcountbytype[i] == numnoqualbytype[i]){
  //    cout << "No qualities for " << Read::getNameOfSequencingType(i)
  //	   << " reads, forcing usage of non-IUPAC consensus base for these reads.\n";
  //    AS_miraparams[i].getNonConstContigParams().con_force_nonIUPACconsensus_perseqtype=true;
  //  }
  //}


  if(AS_readpool.size()-AS_num_reads_too_small-backbonereads-railreads<=0) {
    throw Notify(Notify::FATAL, THISFUNC, "No read can be used for assembly.");
  }

  cout << endl;
  
  FUNCEND();
}





/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::saveExtTmpContig(Contig & con, string basename)
{
  FUNCSTART("void Assembly::saveExtTmpContig(string prepost)");

  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();
  //directory_parameters const & dir_params= AS_miraparams[0].getDirectoryParams();

  if(con.getNumReadsInContig() > 1
     ||   as_fixparams.as_output_exttmp_alsosinglets){

    if (as_fixparams.as_output_exttmp_caf) {
      string filename=basename+".caf";

      cout << "Logging this contig to file: " << filename << endl;
    
      ofstream cafout(filename.c_str(), ios::out | ios::trunc);
      Contig::setCoutType(Contig::AS_CAF);
      cafout << con;
      cafout.close();
    }
  
    if (as_fixparams.as_output_exttmp_ace) {
      string filename=basename+".ace";
      cout << "Logging this contig to file: " << filename << endl;
    
      ofstream aceout(filename.c_str(), ios::out | ios::trunc);
      Contig::setCoutType(Contig::AS_ACE);
      aceout << con;
      aceout.close();
    }
  
  
    if (as_fixparams.as_output_exttmp_fasta) {
      string filename=basename+".fasta";
      string qualname=filename+".qual";
    
      cout << "Logging this contig to files: " << filename << "  and  " << qualname << endl;
    
      ofstream fastaout(filename.c_str(), ios::out | ios::trunc);
      Contig::setCoutType(Contig::AS_FASTA);
      fastaout << con;
      fastaout.close();
      ofstream qualout(qualname.c_str(), ios::out | ios::trunc);
      Contig::setCoutType(Contig::AS_FASTAQUAL);
      qualout << con;
      qualout.close();
    }
  
  
    if (as_fixparams.as_output_exttmp_gap4da) {
      string dirname=basename+".gap4da";

      cout << "Logging this contig to directory: " << dirname << endl;
      if(purgeCreateDir(dirname,true)){
	MIRANOTIFY(Notify::FATAL, "Cannot make sure the directory exist? Aborting.");
      }
    
      Contig::setCoutType(Contig::AS_GAP4DA);
      ofstream fofnout((dirname+"/fofn").c_str(), ios::out | ios::trunc);
      con.saveAsGAP4DA(dirname, fofnout);
      fofnout.close();
    }
  }

  FUNCEND();
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::buildDefaultCheckpointFileName(const string & filename)
{
  return AS_miraparams[0].getDirectoryParams().dir_checkpoint+"/"+filename;
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::buildDefaultInfoFileName(int32 version, const string & prefix, const string & postfix, const string & basename, const string & defaultname, const string & defaultextension, bool removeold)
{
  string dirname;
  if(version>=0){
    dirname=AS_miraparams[0].getDirectoryParams().dir_tmp;
  }else{
    dirname=AS_miraparams[0].getDirectoryParams().dir_info;
  }

  string filename;
  if(basename.size()){
    filename=buildFileName(version, prefix, postfix, 
			   basename, defaultextension,
			   "",
			   removeold);
  }else{
    filename=buildFileName(version, prefix, postfix, 
			   defaultname, defaultextension,
			   dirname,
			   removeold);
  }
  
  return filename;
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::buildDefaultResultsFileName(int32 version, const string & prefix, const string & postfix, const string & basename, const string & defaultname, const string & defaultextension, bool removeold)
{
  string dirname;
  if(version>=0){
    dirname=AS_miraparams[0].getDirectoryParams().dir_tmp;
  }else{
    dirname=AS_miraparams[0].getDirectoryParams().dir_results;
  }

  string filename;
  if(basename.size()){
    filename=buildFileName(version, prefix, postfix, 
			   basename, defaultextension,
			   "",
			   removeold);
  }else{
    filename=buildFileName(version, prefix, postfix, 
			   defaultname, defaultextension,
			   dirname,
			   removeold);
  }
  
  return filename;
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getContigReadListFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultInfoFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_stats_crlist,
    ".txt");
}

void Assembly::saveContigReadList(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getContigReadListFilename(version, prefix, postfix, basename));
  assout::saveContigReadList(AS_contigs,filename,deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getStatisticsFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultInfoFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_stats_contigstats,
    ".txt");
}

void Assembly::saveStatistics(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getStatisticsFilename(version, prefix, postfix, basename));
  assout::saveStatistics(AS_contigs,filename, deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getAssemblyInfoFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultInfoFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_stats_info,
    ".txt");
}

void Assembly::saveAssemblyInfo(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getAssemblyInfoFilename(version, prefix, postfix, basename));
  assout::saveAssemblyInfo(AS_assemblyinfo,filename, deleteoldfile);
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::saveDebrisList(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  FUNCSTART("void Assembly::saveDebrisInfo(int32 version, const string & prefix, const string & postfix, const string & debrisfilename)");
  
  string filename(buildDefaultInfoFileName(
		    version, prefix, postfix, basename,
		    AS_miraparams[0].getAssemblyParams().as_outfile_stats_debrislist, 
		    ".txt"));

  cout << "Saving debris list to file: " << filename << endl;
  ofstream fout(filename.c_str(), ios::out | ios::trunc);
  
  for(uint32 i=0; i< AS_readpool.size(); i++){
    if(AS_isdebris[i]) {
      fout << AS_readpool.getRead(i).getName() << '\n';
    }
  }
  fout.close();
  
  FUNCEND();
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getReadTagListFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultInfoFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_stats_readtags, 
    ".txt");
}

void Assembly::saveReadTagList(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getReadTagListFilename(version, prefix, postfix, basename));
  assout::saveReadTagList(AS_contigs,filename,deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getConsensusTagListFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
   return buildDefaultInfoFileName(
     version, prefix, postfix, basename,
     AS_miraparams[0].getAssemblyParams().as_outfile_stats_contigtags, 
     ".txt");
}

void Assembly::saveConsensusTagList(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
  {
  string filename(getConsensusTagListFilename(version, prefix, postfix, basename));
  assout::saveConsensusTagList(AS_contigs,filename,deleteoldfile);
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::saveSNPList(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(buildDefaultInfoFileName(
		    version, prefix, postfix, basename,
		    AS_miraparams[0].getAssemblyParams().as_outfile_stats_snpanalysis, 
		    ".txt"));
  assout::saveSNPList(AS_contigs,filename,deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::saveFeatureAnalysis(int32 version, const string & prefix, const string & postfix, const string & faname, const string & fsname, const string & fcname, bool deleteoldfile)
{
  FUNCSTART("void Assembly::saveFeatureAnalysis(int32 version, const string & prefix, const string & postfix, const string & faname, const string & faname, bool deleteoldfile)");

  string dirname;
  if(version>=0){
    dirname=AS_miraparams[0].getDirectoryParams().dir_tmp;
  }else{
    dirname=AS_miraparams[0].getDirectoryParams().dir_info;
  }

  string filenamea;
  if(faname.size()){
    filenamea=buildFileName(version, prefix, postfix, faname, ".txt");
  }else{
    filenamea=buildFileName(version, prefix, postfix, 
			    AS_miraparams[0].getAssemblyParams().as_outfile_stats_featureanalysis, 
			    ".txt",
			   dirname);
  }

  string filenames;
  if(fsname.size()){
    filenames=buildFileName(version, prefix, postfix, fsname, ".txt");
  }else{
    filenames=buildFileName(version, prefix, postfix, 
			    AS_miraparams[0].getAssemblyParams().as_outfile_stats_featuresummary, 
			    ".txt",
			   dirname);
  }

  string filenamec;
  if(fcname.size()){
    filenamec=buildFileName(version, prefix, postfix, fcname, ".txt");
  }else{
    filenamec=buildFileName(version, prefix, postfix, 
			    AS_miraparams[0].getAssemblyParams().as_outfile_stats_featuresequences, 
			    ".txt",
			   dirname);
  }

  assout::saveFeatureAnalysis(AS_contigs,AS_readpool,
			      filenamea,filenames,filenamec,
			      deleteoldfile);

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getFASTAFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_FASTA,
    ".fasta");
}
string Assembly::getFASTAPaddedFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_FASTAPADDED,
    ".fasta");
}

void Assembly::saveAsFASTA(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getFASTAFilename(version, prefix, postfix, basename));
  string paddedfilename(getFASTAPaddedFilename(version, prefix, postfix, basename));
  assout::saveAsFASTA(AS_contigs,filename,paddedfilename,deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::saveStrainsAsFASTAQUAL(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  FUNCSTART("void Assembly::saveStrainsAsFASTAQUAL(int32 version, const string & prefix, const string & postfix, const string & fastaname)");

  string filename(buildDefaultResultsFileName(
		    version, prefix, postfix, basename,
		    AS_miraparams[0].getAssemblyParams().as_outfile_FASTAPADDED,
		    ""));
  assout::saveStrainsAsFASTAQ(AS_contigs,AS_readpool,
			      filename,
			      false,0,0,
			      deleteoldfile);
  
  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getTCSFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_TCS,
    ".tcs");
}
void Assembly::saveAsTCS(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  FUNCSTART("void Assembly::saveAsTCS(int32 version, const string & prefix, const string & postfix, const string & tcsname)");

  string filename(getTCSFilename(version, prefix, postfix, basename));
  assout::saveAsTCS(AS_contigs,filename,deleteoldfile);

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getCAFFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_CAF,
    ".caf");
}

void Assembly::saveAsCAF(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getCAFFilename(version, prefix, postfix, basename));
  assout::saveAsCAF(AS_contigs,filename,deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getMAFFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_MAF,
    ".maf");
}

void Assembly::saveAsMAF(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getMAFFilename(version, prefix, postfix, basename));
  assout::saveAsMAF(AS_contigs,filename,deleteoldfile);
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getTXTFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_TXT,
    ".txt");
}

void Assembly::saveAsTXT(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string filename(getTXTFilename(version, prefix, postfix, basename));
  assout::saveAsTXT(AS_contigs,filename,deleteoldfile);
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getACEFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_ACE,
    ".ace");
}
void Assembly::saveAsACE(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  FUNCSTART("void Assembly::saveAsACE(int32 version, const string & prefix, const string & postfix, const string & acename)");

  string filename(getACEFilename(version, prefix, postfix, basename));
  assout::saveAsACE(AS_contigs,filename,deleteoldfile);

  FUNCEND();
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getWiggleFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_WIGGLE,
    ".wig");
}
void Assembly::saveAsWiggle(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  FUNCSTART("void Assembly::saveAsWiggle(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)");

  string filename(getWiggleFilename(version, prefix, postfix, basename));
  assout::saveAsWiggle(AS_contigs,filename,deleteoldfile);

  FUNCEND();
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getGAP4DAFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outdir_GAP4DA,
    ".gap4da");
}

void Assembly::saveAsGAP4DA(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  string subdirname(getGAP4DAFilename(version, prefix, postfix, basename));
  assout::saveAsGAP4DA(AS_contigs,subdirname,deleteoldfile);
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

string Assembly::getHTMLFilename(int32 version, const string & prefix, const string & postfix, const string & basename)
{
  return buildDefaultResultsFileName(
    version, prefix, postfix, basename,
    AS_miraparams[0].getAssemblyParams().as_outfile_HTML,
    ".html");
}

void Assembly::saveAsHTML(int32 version, const string & prefix, const string & postfix, const string & basename, bool deleteoldfile)
{
  FUNCSTART("void Assembly::saveAsHTML(int32 version, const string & prefix, const string & postfix, const string & htmlname, bool deleteoldfile)");

  string filename(getHTMLFilename(version, prefix, postfix, basename));

  string projectname(AS_miraparams[0].getAssemblyParams().as_projectname_out);

  cout << "Saving contigs to file: " << filename << endl;

  //ofstream htmlout(filename.c_str(), ios::out | ios::trunc);
  assout::dumpContigListAsHTML(AS_contigs, filename, deleteoldfile, projectname);

  FUNCEND();
}



