/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 * 
 */

// 	$Id$	

// functions to process reads
// currently in namespace and object assembly


#ifndef lint
static char vcid2[] = "$Id$";
#endif /* lint */

#include "boost/unordered_map.hpp" 
#include <boost/regex.hpp> 

#include "assembly.H"
#include <ctype.h>

//#define CEBUG(bla)   {if(CEBUGFLAG) {cout << bla; cout.flush();}}
#define CEBUG(bla)




//#define CEBUG(bla)   {if(id1==2282 && id2==342) {cout << bla; cout.flush();}}
//#define CEBUG(bla)   {cout << bla; cout.flush();}
//#define CEBUG(bla)   {cout << bla;}





/*************************************************************************
 *
 * ugly and slow, but works and is fast enough
 *
 *
 *************************************************************************/
//#define CEBUG(bla)   {cout << bla; cout.flush(); }
void Assembly::mergeTemplateInfo(const string & tifile, const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::mergeTemplateInfo(const string & tifile, const string & logname, const string & logprefix)");

  cout << "Merging template info from " << tifile << ":\n";

  CEBUG("Building hash table ... "); cout.flush();
  
  typedef boost::unordered_map<std::string, int32> strmap;
  strmap rnmap;
  strmap::iterator rnI;

  for(uint32 i=0; i<AS_readpool.size();i++){
    if(!AS_readpool[i].getName().empty()) {
      rnmap[AS_readpool[i].getName()]=i;
    }
  }
  CEBUG("done." << endl);

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  ifstream tifin;
  tifin.open(tifile.c_str(), ios::in|ios::ate);
  if(!tifin){
    MIRANOTIFY(Notify::FATAL, "File not found: " << tifile);
  }
  streampos tifsize=tifin.tellg();
  tifin.seekg(0, ios::beg);

  ProgressIndicator<streamsize> P (0, tifsize,1000);

  string token;

  while(!tifin.eof()){
    tifin >> token;
    if(tifin.eof()) break;
    if(P.delaytrigger()) P.progress(tifin.tellg());

    //tifin >> sd_score >> sd_readname;

    if(tifin.eof()) break;

    if(token[0]=='+'){
      // new lib
    }else{
      // existing name
      bool foundname=false;
      rnI=rnmap.find(token);
      if(rnI==rnmap.end()) {
	CEBUG("Not found: " << token << endl);
	continue;
      }
      uint32 foundreadid=rnI->second;
      if(!AS_readpool[foundreadid].hasValidData()) continue;
      
      Read actread(AS_readpool[foundreadid]);
      assembly_parameters const & as_params= AS_miraparams[actread.getSequencingType()].getAssemblyParams();
    }
  }
  P.finishAtOnce();

  tifin.close();

  if(!logname.empty()){
    logfout.close();
  }

  cout << "\nDone." << endl;


  FUNCEND();
  return;
}
//#define CEBUG(bla)

/*************************************************************************
 *
 * ugly and slow, but works and is fast enough
 *
 *
 *************************************************************************/
//#define CEBUG(bla)   {cout << bla; cout.flush(); }
void Assembly::mergeSSAHA2SMALTVecScreenData(const string & ssahafile, bool issmalt, const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::mergeSSAHA2VecScreenData(const string & ssahafile, bool issmalt, const string & logname, const string & logprefix)");

  cout << "Merging vector screen data from ";
  if(issmalt){
    cout << "SMALT";
  }else{
    cout << "SSAHA2";
  }
  cout << " results file " << ssahafile << ":\n";

  CEBUG("Building hash table ... "); cout.flush();
  
  typedef boost::unordered_map<std::string, int32> strmap;
  strmap rnmap;
  strmap::iterator rnI;

  for(uint32 i=0; i<AS_readpool.size();i++){
    if(!AS_readpool[i].getName().empty()) {
      rnmap[AS_readpool[i].getName()]=i;
    }
  }
  CEBUG("done." << endl);

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  ifstream ssahafin;
  ssahafin.open(ssahafile.c_str(), ios::in|ios::ate);
  if(!ssahafin){
    MIRANOTIFY(Notify::FATAL, "File not found: " << ssahafile);
  }
  streampos sfinsize=ssahafin.tellg();
  ssahafin.seekg(0, ios::beg);

  ProgressIndicator<streamsize> P (0, sfinsize,1000);

  uint32 sd_score;
  string sd_readname;
  string sd_vecname;
  uint32 sd_rfrom;
  uint32 sd_rto;
  uint32 sd_vfrom;
  uint32 sd_vto;
  string sd_dir;
  uint32 sd_totalmatchsize;
  float  sd_percentmatch;
  uint32 sd_rlen;
  
  string token;
  string alstring;
  if(issmalt){
    alstring="alignment:";
  }else{
    alstring="ALIGNMENT:";
  }

  bool haserrors=false;

  while(!ssahafin.eof()){
    ssahafin >> token;
    if(ssahafin.eof()) break;
    if(P.delaytrigger()) P.progress(ssahafin.tellg());
    if(token.compare(0,alstring.size(),alstring) != 0) {
      getline(ssahafin,token);
      continue;
    }
    ssahafin >> sd_score >> sd_readname;

    if(ssahafin.eof()) break;

    // *sigh* allow for empty names
    sd_vecname.clear();
    {
      bool loopit=true;
      char tmp;

      ssahafin.get(tmp);
      loopit=true;
      do{
	ssahafin.get(tmp);
	if(ssahafin.eof()) break;
	if(tmp==' ' || tmp=='\t'){
	  loopit=false;
	}else{
	  sd_vecname.push_back(tmp);
	}
      }while(loopit);
    }

    if(ssahafin.eof()) break;

    ssahafin >> sd_rfrom
	     >> sd_rto
	     >> sd_vfrom
	     >> sd_vto
	     >> sd_dir
	     >> sd_totalmatchsize
	     >> sd_percentmatch
	     >> sd_rlen;
     
    if(ssahafin.eof()) break;

    CEBUG(sd_readname << '\t' << sd_rfrom << '\t' << sd_rto << '\n');

    bool foundname=false;
    rnI=rnmap.find(sd_readname);
    if(rnI==rnmap.end()) {
      CEBUG("Not found: " << sd_readname << endl);
      continue;
    }
    uint32 foundreadid=rnI->second;
    if(!AS_readpool[foundreadid].hasValidData()) continue;

    Read actread(AS_readpool[foundreadid]);
    assembly_parameters const & as_params= AS_miraparams[actread.getSequencingType()].getAssemblyParams();

    if(actread.getLenSeq() != sd_rlen){
      if(actread.isSequencingType(Read::SEQTYPE_SOLEXA) && actread.getLenSeq() != sd_rlen+1) {
	cout << "\nError! The length of read " << actread.getName() 
	     << " (" << actread.getLenSeq() 
	     << ") does not match the length given in the SSAHA2/SMALT file (" 
	     << sd_rlen << ")\nSSAHA2 line:"
	     << ' ' << token
	     << ' ' << sd_score
	     << ' ' << sd_readname
	     << ' ' << sd_vecname
	     << ' ' << sd_rfrom
	     << ' ' << sd_rto
	     << ' ' << sd_vfrom
	     << ' ' << sd_vto
	     << ' ' << sd_dir
	     << ' ' << sd_totalmatchsize
	     << ' ' << sd_percentmatch
	     << ' ' << sd_rlen << endl;
	haserrors=true;
      }
    }

    CEBUG("SSAHA2/SMALT line:"
	  << ' ' << token
	  << ' ' << sd_score
	  << " r: " << sd_readname
	  << " v: " << sd_vecname
	  << " # " << sd_rfrom
	  << ' ' << sd_rto
	  << ' ' << sd_vfrom
	  << ' ' << sd_vto
	  << ' ' << sd_dir
	  << ' ' << sd_totalmatchsize
	  << ' ' << sd_percentmatch
	  << ' ' << sd_rlen << endl);

    //Read::setCoutType(Read::AS_FASTA);
    //CEBUG(actread);
    //Read::setCoutType(Read::AS_CLIPPEDFASTA);
    //CEBUG(actread);
    
    // in SSAHA2 output, from rfrom may be > rto for reverse matches
    // swap in these cases
    if(sd_rfrom > sd_rto) swap(sd_rfrom,sd_rto);

    for(uint32 i=sd_rfrom-1; i<sd_rto; i++){
      bool domask=false;
      if(as_params.as_clip_ssahamerge_strictfrontclip >0
	 || as_params.as_clip_ssahamerge_strictendclip >0){
	if(as_params.as_clip_ssahamerge_strictfrontclip >0 
	   && static_cast<int32>(i)<as_params.as_clip_ssahamerge_strictfrontclip) domask=true;
	if(as_params.as_clip_ssahamerge_strictendclip>0 
	   && i>=actread.getLenSeq()-as_params.as_clip_ssahamerge_strictendclip) domask=true;
      }else{
	domask=true;
      }
      if(domask) actread.changeBaseInSequence('X',0,i);
    }
    //Read::setCoutType(Read::AS_FASTA);
    //CEBUG(actread);
    //Read::setCoutType(Read::AS_CLIPPEDFASTA);
    //CEBUG(actread);

    actread.setClipoffsToMaskedChars(as_params.as_clip_ssahamerge_gapsize,
				     as_params.as_clip_ssahamerge_maxfrontgap,
				     as_params.as_clip_ssahamerge_maxendgap,
				     false);
    //Read::setCoutType(Read::AS_CLIPPEDFASTA);
    //CEBUG(actread);
    
    if(actread.getLMClipoff() > AS_readpool[foundreadid].getLSClipoff()){
      AS_readpool[foundreadid].setLSClipoff(actread.getLMClipoff());
      CEBUG("clippyl\n");
      if(!logname.empty()){
	logfout << logprefix << " SSAHA2/SMALT clip left " 
		<< actread.getName() 
		<< " to: " 
		<< AS_readpool[foundreadid].getLSClipoff() << '\n';
      }
    }else{
      if(!logname.empty()){
	logfout << logprefix << "unchanged SSAHA2/SMALT clip left " 
		<< actread.getName() 
		<< " stays: " 
		<< AS_readpool[foundreadid].getLSClipoff() << '\n';
      }
    }
    if(actread.getRMClipoff() < AS_readpool[foundreadid].getRSClipoff()){
      AS_readpool[foundreadid].setRSClipoff(actread.getRMClipoff());
      CEBUG("clippyr\n");
      if(!logname.empty()){
	logfout << logprefix << " SSAHA2/SMALT clip right " 
		<< actread.getName() 
		<< " to: " 
		<< AS_readpool[foundreadid].getRSClipoff() << '\n';
      }
    }else{
      if(!logname.empty()){
	logfout << logprefix << "unchanged SSAHA2/SMALT clip right " 
		<< actread.getName() 
		<< " stays: " 
		<< AS_readpool[foundreadid].getRSClipoff() << '\n';
      }
    }

    //Read::setCoutType(Read::AS_TEXTSHORT);
    //CEBUG(AS_readpool[foundreadid]);
  }
  P.finishAtOnce();

  ssahafin.close();

  if(!logname.empty()){
    logfout.close();
  }

  cout << "\nDone merging SSAHA2 vector screen data." << endl;

  if(haserrors){
    MIRANOTIFY(Notify::FATAL,"There were errors in the SSAHA2 data, most probably the sequences used to screen are different from\nthe ones loaded now (see log above). Sorry, MIRA has to abort, please check your data.");
  }

  FUNCEND();
  return;
}
//#define CEBUG(bla)





/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::performHashAnalysis(int32 version, const string prefix, const string postfix, const string logname)
{
  FUNCSTART("void Assembly::performHashAnalysis()");
  //CEBUG("BEFORE\n");
  //for(uint32 actid=0; actid<AS_readpool.size(); actid++){
  //  Read & r=AS_readpool.getRead(actid);
  //  r.integrityCheck();
  //  Read::setCoutType(Read::AS_TEXT);
  //  cout << r;
  //}

  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();
  skim_parameters const & skim_params= AS_miraparams[0].getSkimParams();

  uint8 basesperhash=skim_params.sk_basesperhash;
  if(sizeof(uint64) < 8 && basesperhash > 15) basesperhash=15;

  uint32 nastyrepeatratio=0;
  if(AS_needsskimfornastyrepeats && skim_params.sk_masknastyrepeats){
    AS_needsskimfornastyrepeats=false;
    nastyrepeatratio=skim_params.sk_nastyrepeatratio;
  }

  {
    Skim s3;

    s3.setHashFrequencyRatios(skim_params.sk_freqest_minnormal,
			      skim_params.sk_freqest_maxnormal,
			      skim_params.sk_freqest_repeat,
			      skim_params.sk_freqest_heavyrepeat,
			      skim_params.sk_freqest_crazyrepeat,
			      skim_params.sk_nastyrepeatratio);

    s3.analyseHashes(AS_miraparams[0].getDirectoryParams().dir_tmp,
		     AS_readpool,
		     true,
		     false,
		     false,
		     true,
		     1,
		     basesperhash,
		     1,
		     nastyrepeatratio>0);

    if(AS_miraparams[0].getAssemblyParams().as_buntify_reads){
      buntifyReadsByHashFreq(basesperhash, AS_readpool);
    }
  }

  //CEBUG("AFTER\n");
  //for(uint32 actid=0; actid<AS_readpool.size(); actid++){
  //  Read & r=AS_readpool.getRead(actid);
  //
  //  Read::setCoutType(Read::AS_TEXT);
  //
  //  if(r.getName()=="FF5UQ0101A62BE.fn"
  //     || r.getName()=="FFPHEER01DATWZ"
  //     || r.getName()=="FFPHEER01AK3C0"){
  //    cout << r;
  //  }
  //}

  //if(nastyrepeatratio){
  if(skim_params.sk_repeatlevel_in_infofile){
    string filename;

    if(logname.size()){
      filename=buildFileName(version, prefix, postfix, logname, "");
    }else{
      //filename=buildFileName(version, prefix, postfix,
      //			     as_fixparams.as_outfile_stats_readrepeats,
      //			     ".lst");

      //filename=buildDefaultInfoFileName(version, prefix, postfix,
      filename=buildDefaultInfoFileName(-1, "", "",
					"",
					as_fixparams.as_outfile_stats_readrepeats,
					".lst",
					true);
    }

    cout << "Writing read repeat info to: " << filename << " ... ";
    cout.flush();

    uint32 howmanys=0;
    uint32 howmanyt=0;
    uint32 repanalysislevel=skim_params.sk_repeatlevel_in_infofile;
    if(repanalysislevel<5) repanalysislevel=5;
    if(repanalysislevel>8) repanalysislevel=8;

    ofstream fout;
    fout.open(filename.c_str(), ios::out|ios::trunc);
    for(uint32 rpi=0; rpi<AS_readpool.size(); rpi++){
      Read & actread= AS_readpool.getRead(rpi);
      if(!actread.hasValidData()
	 || !actread.isUsedInAssembly()) continue;
      bool mustshow=false;
      if(actread.hasTag(Read::REA_tagentry_idHAF5,-1)) {
	if(repanalysislevel==5) mustshow=true;
      }else if(actread.hasTag(Read::REA_tagentry_idHAF6,-1)) {
	if(repanalysislevel<=6) mustshow=true;
      }else if(actread.hasTag(Read::REA_tagentry_idHAF7,-1)) {
	if(repanalysislevel<=7) mustshow=true;
      }else if(actread.hasTag(Read::REA_tagentry_idMNRr,-1)) {
	if(repanalysislevel<=8) mustshow=true;
      }
      if(mustshow){
	bool countedthisseq=false;
	for(uint32 tn=0; tn<actread.getNumOfTags(); tn++){
	  const multitag_t & acttag=actread.getTag(tn);
	  if(acttag.to-acttag.from +1 >= basesperhash){
	    mustshow=false;
	    if(acttag.identifier==Read::REA_tagentry_idHAF5) {
	      if(repanalysislevel==5) mustshow=true;
	    }else if(acttag.identifier==Read::REA_tagentry_idHAF6) {
	      if(repanalysislevel<=6) mustshow=true;
	    }else if(acttag.identifier==Read::REA_tagentry_idHAF7) {
	      if(repanalysislevel<=7) mustshow=true;
	    }else if(acttag.identifier==Read::REA_tagentry_idMNRr) {
	      if(repanalysislevel<=8) mustshow=true;
	    }
	    if(mustshow){
	      if(!countedthisseq){
		countedthisseq++;
		++howmanys;
	      }
	      ++howmanyt;
	      fout << actread.getName() << '\t'
		   << acttag.getIdentifierStr() << '\t';
	      for(uint32 readpos=acttag.from; readpos<=acttag.to; readpos++){
		fout << static_cast<char>(toupper(actread.getBaseInSequence(readpos)));
	      }
	      fout << '\n';
	    }
	  }
	}
      }
    }

    cout << howmanys << " sequences with " << howmanyt << " masked stretches." << endl;
  }

  if(AS_logflag_dumphashanalysis){
    string logfilename=AS_miraparams[0].getDirectoryParams().dir_tmp+"/elog.dp.hashanalysis.lst";

    cout << "elog hashan: " << logfilename << endl;
    ofstream logfout;
    logfout.open(logfilename.c_str(), ios::out|ios::trunc);

    for(uint32 rpi=0; rpi<AS_readpool.size(); rpi++){
      Read::setCoutType(Read::AS_TEXT);
      logfout << AS_readpool[rpi];
    }
  }

  FUNCEND();
  return;
}









/*************************************************************************
 *
 * expects reads to have baseflags set  (by performHashAnalysis())
 *
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}

uint64 Assembly::performNewProposedCutbackClips(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::performProposedCutbackClips(const string & logname, const string & logprefix)");

  cout << "Hash analysis for proposed cutbacks:";

  skim_parameters const & skim_params= AS_miraparams[0].getSkimParams();
  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();

  {
    uint8 basesperhash=as_fixparams.as_clip_pec_basesperhash;
    if(sizeof(uint64) < 8 && basesperhash > 15) basesperhash=15;

    Skim s3;

    s3.setHashFrequencyRatios(skim_params.sk_freqest_minnormal,
			      skim_params.sk_freqest_maxnormal,
			      skim_params.sk_freqest_repeat,
			      skim_params.sk_freqest_heavyrepeat,
			      skim_params.sk_freqest_crazyrepeat,
			      skim_params.sk_nastyrepeatratio);

    s3.analyseHashes(AS_miraparams[0].getDirectoryParams().dir_tmp,
		     AS_readpool,
		     true,
		     false,
		     false,
		     true,
		     1,
		     basesperhash,
		     1,
		     false);
  }

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  if(as_fixparams.as_dateoutput) dateStamp(cout);
  cout << '\n';

  string ggcproblem="ggc";

  cout << "Looking for proposed cutbacks ... "; cout.flush();

  uint32 cbleft=0;
  uint32 cbright=0;
  uint32 killed=0;
  uint64 numbasesclipped=0;
  for(uint32 actid=0; actid<AS_readpool.size(); actid++){
    Read & r=AS_readpool.getRead(actid);
    
    //if(r.getName()=="EZRJ5AL02JMKBG"
    //   || r.getName()=="E0K6C4E01EE4FA"
    //   || r.getName()=="E0K6C4E02HKBO6"
    //   || r.getName()=="E0K6C4E02JD918"
    //   || r.getName()=="E0K6C4E02F3T1Y"
    //   || r.getName()=="E0K6C4E02G544S"
    //  ){
    //  Read::setCoutType(Read::AS_TEXT);
    //  cout << r;
    //}

    if(r.hasValidData()
       && r.hasBaseHashStats()
       && !(r.isBackbone() 
	    || r.isRail())){

      bool hasbeenclipped=false;

      uint32 oldlen=r.getLenClippedSeq();

      //if(r.getName()=="FF5UQ0101A62BE.fn"
      //	 || r.getName()=="FFPHEER01DATWZ"
      //	 || r.getName()=="FFPHEER01AK3C0"){
      //	cout << r << endl;
      //}

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif

      {
	int32 lpos=r.getLeftClipoff();
	vector<Read::bposhashstat_t>::const_iterator bhsI=r.getBPosHashStats().begin();
	advance(bhsI,lpos);
	//Read::baseflags_t bf=Read::BFLAGS_CONFIRMED_MULTIPLEHASH;
	switch(r.getSequencingType()){
	case Read::SEQTYPE_SANGER :
	case Read::SEQTYPE_454GS20 :
	case Read::SEQTYPE_IONTORRENT :
	case Read::SEQTYPE_PACBIO :
	case Read::SEQTYPE_SOLEXA :{
	  // at the moment same for all
	  for(; lpos<static_cast<int32>(r.getLenSeq()); lpos++, bhsI++) {

	    //if(r.getName()=="FF5UQ0101A62BE.fn"
	    //   || r.getName()=="FFPHEER01DATWZ"
	    //   || r.getName()=="FFPHEER01AK3C0"){
	    //  cout << r.getName() << '\t' << lpos
	    //	   << '\t' << (uint16) bhsI->fwd.getFrequency()
	    //	   << '\t' << (uint16) bhsI->rev.getFrequency()
	    //	   << endl;
	    //}

	    if(bhsI->fwd.getFrequency() > 1
	       || bhsI->rev.getFrequency() > 1) break;
	  }
	  break;
	}
	default : {
	  MIRANOTIFY(Notify::INTERNAL, "Don't know how to handle sequencing type '" << r.getNameOfSequencingType(r.getSequencingType()) << "'.\n");
	}
	}

	
	if(lpos != r.getLeftClipoff()){
	  hasbeenclipped=true;

	  if(lpos>0 && lpos>=r.getLenSeq()) lpos=r.getLenSeq()-1; 
	  CEBUG("pcb l: " << r.getName() << " " << r.getLeftClipoff()
		<< " " << lpos << endl);
	  logfout << logprefix << " left "
		  << r.getName() << '\t' 
		  << r.getLeftClipoff() << " -> ";
	  r.setLQClipoff(lpos);
	  cbleft++;
	  logfout << r.getLeftClipoff() << '\n';
	}
      }

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif

      {
      	int32 rpos=r.getRightClipoff();
	vector<Read::bposhashstat_t>::const_iterator bhsI=r.getBPosHashStats().begin();
	advance(bhsI,rpos);
      
      	switch(r.getSequencingType()){
      	case Read::SEQTYPE_SANGER :{
      	  //Read::baseflags_t bf=Read::BFLAGS_CONFIRMED_MULTIPLEHASH;
      	  for(; rpos >0; rpos--){
	    bhsI--;
	    if(bhsI->fwd.getFrequency() > 1
	       || bhsI->rev.getFrequency() > 1) break;
	  }
      	  break;
      	}
      	case Read::SEQTYPE_454GS20 :{
      	  for(; rpos >0; rpos--){
	    bhsI--;
	    if(bhsI->fwd.hasConfirmedFwdRev()
	       || bhsI->rev.hasConfirmedFwdRev()
	       || bhsI->fwd.hasConfirmedMultipleSeqType()
	       || bhsI->rev.hasConfirmedMultipleSeqType()
// TODO: added 21.01.2010, check whether really better
	       || bhsI->fwd.hasSeenAtLowPos()
	       || bhsI->rev.hasSeenAtLowPos()) break;
	  }
      	  break;
      	}
      	case Read::SEQTYPE_IONTORRENT :{
//	  // TODO: copied form 454, needs to be checked
//      	  for(; rpos >0; rpos--){
//	    bhsI--;
//	    if(bhsI->fwd.hasConfirmedFwdRev()
//	       || bhsI->rev.hasConfirmedFwdRev()
//	       || bhsI->fwd.hasConfirmedMultipleSeqType()
//	       || bhsI->rev.hasConfirmedMultipleSeqType()
//	       || bhsI->fwd.hasSeenAtLowPos()
//	       || bhsI->rev.hasSeenAtLowPos()) break;
//	  }
      	  for(; rpos >0; rpos--){
	    bhsI--;
	    if(bhsI->fwd.getFrequency() > 1
	       || bhsI->rev.getFrequency() > 1) break;
	  }
      	  break;
      	}
      	case Read::SEQTYPE_PACBIO :{
	  // no info. atm, same as sanger
	  // TODO: check with real PacBio data
      	  for(; rpos >0; rpos--){
	    bhsI--;
	    if(bhsI->fwd.getFrequency() > 1
	       || bhsI->rev.getFrequency() > 1) break;
	  }
      	  break;
      	}
      	case Read::SEQTYPE_SOLEXA :{
      	  for(; rpos >0; rpos--){
	    bhsI--;
	    if(bhsI->fwd.hasConfirmedFwdRev()
	       || bhsI->rev.hasConfirmedFwdRev()
	       || bhsI->fwd.hasConfirmedMultipleSeqType()
	       || bhsI->rev.hasConfirmedMultipleSeqType()
	       || bhsI->fwd.hasSeenAtLowPos()
	       || bhsI->rev.hasSeenAtLowPos()) break;
	  }
      	  break;
      	}
      	default : {
      	  MIRANOTIFY(Notify::INTERNAL, "Don't know how to handle sequencing type '" << r.getNameOfSequencingType(r.getSequencingType()) << "'.\n");
      	}
      	}
      
      
      	if(rpos != r.getRightClipoff()){
	  hasbeenclipped=true;

      	  CEBUG("pcb r: " << r.getName() << " " << r.getRightClipoff()
      		<< " " << rpos << endl);
      	  logfout << logprefix << " right "
      		  << r.getName() << '\t' 
      		  << r.getRightClipoff() << " -> ";
      	  r.setRQClipoff(rpos);
      	  cbright++;
      	  logfout << r.getRightClipoff() << '\n';

	  // special handling of Solexa GGC.G error
	  // from point of right clip, 15 bases backwards:
	  //  search for first ggc.g and clip there
	  if(r.getSequencingType()==Read::SEQTYPE_SOLEXA
	     && AS_miraparams[0].getAssemblyParams().as_clip_pec_sxaggcxg
	     && r.getLenClippedSeq() >=15){
	    //Read::setCoutType(Read::AS_TEXTSHORT);
	    //cout << r;
	    string searchstr=r.getSeqAsChar();
	    boost::to_lower(searchstr);
	    int64 searchstart=r.getRightClipoff()-15;
	    if(searchstart<0) searchstart=0;
	    size_t found;
	    do{
	      found=searchstr.find(ggcproblem,searchstart);
	      if (found!=string::npos){
		searchstart=found+1;
		if(found < r.getRightClipoff()
		   && found+4<r.getRightClipoff() 
		   && searchstr[found+4]=='g'){
		  logfout << logprefix << "possible Solexa GGC.G problem "
			  << r.getName() << '\t' << r.getRQClipoff() << " -> ";
		  r.setRQClipoff(static_cast<int32>(found+4));
		  logfout << r.getRQClipoff() << '\n';
		  found=string::npos; // stop the loop
		}
	      }
	    }while(found!=string::npos);
	  }
      	}
      }

      if(hasbeenclipped){
	numbasesclipped+=oldlen-r.getLenClippedSeq();
	if(oldlen 
	   && (r.getLenClippedSeq() < AS_miraparams[r.getSequencingType()].getAssemblyParams().as_minimum_readlength )){
	  killed++;
	  logfout << logprefix << " "
		  << r.getName() << " killed, remaining length ("
		  << r.getLenClippedSeq() << ")\n";
	}
      }
    }
  }

  logfout.close();

  cout << "done.\nPerformed clips:"
       << "\n\tNum reads cliped left: " << cbleft
       << "\n\tNum reads cliped right: " << cbright
       << "\n\tNum reads completely killed: " << killed
       << "\n\tTotal bases clipped         : " << numbasesclipped
       << "\n\n";

  // now, set the align parameters to enforce clean ends
  for(uint32 st=0; st<Read::SEQTYPE_END; st++){
    align_parameters & alpar=const_cast<align_parameters &>(AS_miraparams[st].getAlignParams());
    alpar.ads_enforce_clean_ends=true;
    alpar.ads_clean_end_distance=AS_miraparams[0].getSkimParams().sk_basesperhash;
  }


  clipPolyBaseAtEnd(logname,logprefix);

  FUNCEND();

  return numbasesclipped;
}
//#define CEBUG(bla)





/*************************************************************************
 *
 * expects reads to have baseflags set
 *
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}

struct tmpbhentry_t{
  uint32 from;
  uint32 to;
  uint8 freq;
};

void Assembly::buntifyReadsByHashFreq(uint8 basesperhash, ReadPool & rp)
{
  FUNCSTART("void Assembly::buntifyReadsByHashFreq()");

  cout << "Buntifying reads";
  if(rp.size()>500000) cout << " (this may take a while)";
  cout << " ... "; cout.flush();

  vector<multitag_t::mte_id_t> freqnames;
  freqnames.push_back(Read::REA_tagentry_idHAF0);
  freqnames.push_back(Read::REA_tagentry_idHAF1);
  freqnames.push_back(Read::REA_tagentry_idHAF2);
  freqnames.push_back(Read::REA_tagentry_idHAF3);
  freqnames.push_back(Read::REA_tagentry_idHAF4);
  freqnames.push_back(Read::REA_tagentry_idHAF5);
  freqnames.push_back(Read::REA_tagentry_idHAF6);
  freqnames.push_back(Read::REA_tagentry_idHAF7);

  vector<uint8> tmpfreq;
  tmpfreq.reserve(10000);
  for(uint32 actid=0; actid<rp.size(); actid++){
    Read & actread=rp.getRead(actid);
    
    //Read::setCoutType(Read::AS_TEXT);
    //cout << actread;

    if(actread.hasValidData()
       && actread.hasBaseHashStats()){

      // remove old hash frequence tags
      for(uint32 i=0; i<freqnames.size(); i++){
	actread.deleteTag(freqnames[i]);
      }

      tmpfreq.clear();
      tmpfreq.resize(actread.getLenSeq(),0);

      vector<Read::bposhashstat_t>::const_iterator bhsI=actread.getBPosHashStats().begin();
      vector<Read::bposhashstat_t>::const_iterator bhsE=actread.getBPosHashStats().end();
      vector<uint8>::iterator tfI=tmpfreq.begin();
      vector<uint8>::iterator tfE=tmpfreq.end();

      buntifyHelper(2, basesperhash, bhsI, bhsE, tfI, tfE);
      buntifyHelper(3, basesperhash, bhsI, bhsE, tfI, tfE);
      buntifyHelper(4, basesperhash, bhsI, bhsE, tfI, tfE);
      buntifyHelper(5, basesperhash, bhsI, bhsE, tfI, tfE);
      buntifyHelper(6, basesperhash, bhsI, bhsE, tfI, tfE);
      buntifyHelper(7, basesperhash, bhsI, bhsE, tfI, tfE);

      vector<tmpbhentry_t> telist;
      telist.reserve(20);

      //{
      //	cout << "bfr: " << actread.getName() << endl;
      //	for(uint32 i=0;i<tmpfreq.size(); i++){
      //	  cout << "i: " << i << '\t' << static_cast<uint16>(tmpfreq[i]) << endl;
      //	}
      //}

      uint32 from=0;
      uint32 to=0;
      for(; from<actread.getLenSeq(); from=to+1){
	to=from;
	uint8 actfreq=tmpfreq[to];
	for(; to<actread.getLenSeq() && tmpfreq[to]==actfreq; to++) {} ;
	to--;
	if(actfreq>0){
	  telist.resize(telist.size()+1);
	  telist.back().from=from;
	  telist.back().to=to;
	  telist.back().freq=actfreq;
	}
      }

      // for first or last entry, do not put tags for frequencies
      //  >=2 if their length is < basesperhash
      // BaCh 03.06.2011: hmmm, why not. OK, makes CAF/MAF bigger, but else?
      for(uint32 ti=0; ti<telist.size(); ti++){
	bool settag=true;
//	if(telist[ti].freq>=2 &&
//	   (ti==0 || ti==telist.size()-1)){
//	  if(telist[ti].to - telist[ti].from < basesperhash-1){
//	    settag=false;
//	  }
//	}
	if(settag) actread.addTag(telist[ti].from,
				  telist[ti].to,
				  freqnames[telist[ti].freq], "");
      }
    }

    //if(actid==273250 || actid==273252){
    //  Read::setCoutType(Read::AS_TEXT);
    //  cout << actread;
    //}

  }

  cout << "done." << endl;

  FUNCEND();

}
//#define CEBUG(bla)


void Assembly::buntifyHelper(uint8 allowedfreq, uint8 basesperhash, vector<Read::bposhashstat_t>::const_iterator bhsI, vector<Read::bposhashstat_t>::const_iterator bhsE, vector<uint8>::iterator tfI, vector<uint8>::iterator tfE)
{
  for(; bhsI!= bhsE; bhsI++, tfI++){
    uint8 actfreq=bhsI->fwd.getFrequency();
    if(allowedfreq==actfreq){
      if(actfreq>0){
	vector<uint8>::iterator ttfI=tfI;
	for(uint32 i=0; i<basesperhash && ttfI!=tfE; i++, ttfI++){
	  *ttfI=actfreq;
	}
      }
    }
  }
}


/*************************************************************************
 *
 * 
 *
 *************************************************************************/

void Assembly::cutBackPossibleChimeras(const string & logname, const string & logprefix, const vector<int32> & chuntleftcut, const vector<int32> & chuntrightcut, vector<bool> & chimeracutflag)
{
  FUNCSTART("void Assembly::cutBackPossibleChimeras(const string & logname, const string & logprefix, const vector<int32> & chuntleftcut, const vector<int32> & chuntrightcut)");

  BUGIFTHROW(chuntleftcut.size()!=chuntrightcut.size() && chuntleftcut.size() != AS_readpool.size(),"Arrays mismatch? chuntleftcut.size()!=chuntrightcut.size && chuntleftcut.size() != AS_readpool.size()");

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  cout << "Cutting back possible chimeras ... "; cout.flush();

  if(!chimeracutflag.empty()){
    chimeracutflag.clear();
    chimeracutflag.resize(chuntleftcut.size(),false);
  }

  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();

  for(uint32 actreadid=0;actreadid<chuntleftcut.size();actreadid++){
    Read & actread=AS_readpool.getRead(actreadid);
    if(actread.hasValidData()
       && !(actread.isBackbone() 
	    || actread.isRail())){
      bool didcut=false;
      if(as_fixparams.as_clip_skimchimeradetection
	 && (chuntleftcut[actreadid]>0
	     || chuntrightcut[actreadid]>0)){
	logfout << logprefix << " possible chimera: " << actread.getName()
		<< "\t["
		<< actread.getLeftClipoff()
		<< ","
		<< actread.getRightClipoff()
		<< "[ using cfrag " << chuntleftcut[actreadid] << ":" << chuntrightcut[actreadid]
		<< " cut back to ";
	
	actread.setLSClipoff(actread.getLeftClipoff()+chuntleftcut[actreadid]);
	actread.setRSClipoff(actread.getLeftClipoff()+(chuntrightcut[actreadid]-chuntleftcut[actreadid])+1);
	didcut=true;
	if(!chimeracutflag.empty()){
	  chimeracutflag[actreadid]=true;
	}

	logfout << '[' 
		<< actread.getLeftClipoff()
		<< ","
		<< actread.getRightClipoff()
		<< "[\n";
      }

      if(!didcut
	 && (chuntleftcut[actreadid]<0
	     || chuntrightcut[actreadid]<0)){
	if(as_fixparams.as_clip_skimjunkdetection){
	  logfout << logprefix << " removed possible junk: " ;
	}else{
	  logfout << logprefix << " untouched possible junk: " ;
	}
	logfout << actread.getName()
		<< "\t["
		<< -chuntleftcut[actreadid]
		<< ","
		<< -chuntrightcut[actreadid]
		<< '\n';
	if(as_fixparams.as_clip_skimjunkdetection){
	  actread.setLSClipoff(actread.getLeftClipoff()-chuntleftcut[actreadid]);
	  actread.setRSClipoff(actread.getRightClipoff()+chuntrightcut[actreadid]);
	  if(!chimeracutflag.empty()){
	    chimeracutflag[actreadid]=true;
	  }
	}
      }
    }
  }

  cout << "done.\n";
}



/*************************************************************************
 *
 * 
 *
 *************************************************************************/

void Assembly::clipBadSolexaEnds(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::clipBadSolexaEnds(const string & logname, const string & logprefix)");

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }
  
  // invalidate all Solexa reads that have a stretch of 20 A
  //  or if has stretch >= 12 and non-A bases < 20%
  // N in-between do not reset the counter
  // invalidate by setting left seq vec to length of read
  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & actread=AS_readpool.getRead(i);
    if(actread.hasValidData()
       && actread.isSequencingType(Read::SEQTYPE_SOLEXA)
       && !(actread.isBackbone() 
	    || actread.isRail())){

      int32 runindex=actread.getLeftClipoff();
      char actbase=' ';
      uint32 bcount=0;

      uint32 arun=0;
      uint32 maxarun=0;
      uint32 nona=0;

      uint32 trun=0;
      uint32 maxtrun=0;
      uint32 nont=0;

      for(; runindex<actread.getRightClipoff(); runindex++) {
	actbase=static_cast<char>(toupper(actread.getBaseInSequence(runindex)));
	if(actbase!='N'){
	  bcount++;
	  if(actbase=='A'){
	    arun++;
	    if(arun>maxarun) maxarun=arun;
	    nont++;
	    trun=0;
	  }else if(actbase=='T'){
	    trun++;
	    if(trun>maxtrun) maxtrun=trun;
	    nona++;
	    arun=0;
	  }else{
	    nona++;
	    nont++;
	    arun=0;
	    trun=0;
	  }
	}
      }
      if(maxarun>=20){
	actread.setLSClipoff(actread.getLenSeq());
	logfout << logprefix << " bad solexa end: A hard " 
		<< actread.getName()
		<< '\n';
      }else if(maxarun>=12){
	uint32 ratio= static_cast<uint32>((static_cast<double>(100.0)/bcount)*nona);
	if(ratio<20) {
	  actread.setLSClipoff(actread.getLenSeq());
	  logfout << logprefix << " bad solexa end: A soft " 
		  << actread.getName() 
		  << '\n';
	}
      }

      if(maxtrun>=20){
	actread.setLSClipoff(actread.getLenSeq());
	logfout << logprefix << " bad solexa end: T (hard) " 
		<< actread.getName() 
		<< '\n';
      }else if(maxtrun>=12){
	uint32 ratio= static_cast<uint32>((static_cast<double>(100.0)/bcount)*nont);
	if(ratio<20) {
	  actread.setLSClipoff(actread.getLenSeq());
	  logfout << logprefix << " bad solexa end: T (soft) " 
		  << actread.getName() 
		  << '\n';
	}
      }
    }
  }

  FUNCEND();
}


/*************************************************************************
 *
 * clip all lowercase at the end of reads 
 *
 *************************************************************************/

void Assembly::performLowerCaseClipping(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::performLowerCaseClipping(const string & logname, const string & logprefix)");

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  uint64 totallen=0;
  uint64 lowercaselen=0;
  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & actread=AS_readpool.getRead(i);
    if(actread.hasValidData()
       && AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_clip_lowercase
       && !(actread.isBackbone() 
	    || actread.isRail())){
      totallen+=actread.getLenClippedSeq();
      int32 runindex=actread.getLeftClipoff();
      for(; runindex<actread.getRightClipoff(); ++runindex){
	if(islower(actread.getBaseInSequence(runindex))) lowercaselen++;
      }
    }
  }

  if(totallen==lowercaselen) {
    cout << "Lowercase clip: all sequences to be clipped are lowercase?! Failsafe: no clipping performed.\n";
    return;
  }

  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & actread=AS_readpool.getRead(i);
    if(actread.hasValidData()
       && AS_miraparams[actread.getSequencingType()].getAssemblyParams().as_clip_lowercase
       && !(actread.isBackbone() 
	    || actread.isRail())){

      //Read::setCoutType(Read::AS_CLIPPEDFASTA);
      //cout << actread;

      int32 runindex=actread.getLeftClipoff();
      for(; runindex<actread.getRightClipoff(); ++runindex){
	char ab=actread.getBaseInSequence(runindex);
	if(!islower(ab)
	   && ab != 'N'
	   && ab != 'X') break;
      }
      if(runindex!=actread.getLeftClipoff()) {
	actread.setLSClipoff(runindex);
	logfout << logprefix << " changed left (lowercase) " 
		<< actread.getName() << " to " << actread.getLeftClipoff() << '\n';
      }

      runindex=actread.getRightClipoff()-1;
      for(; runindex>=actread.getLeftClipoff() && islower(actread.getBaseInSequence(runindex)); --runindex) ;
      if(runindex!=actread.getRightClipoff()-1) {
	actread.setRSClipoff(runindex+1);
	logfout << logprefix << " changed right (lowercase) " 
		<< actread.getName() <<  " to " << actread.getRightClipoff() << '\n';
      }
      //cout << actread;
    }
  }

  logfout.close();

  FUNCEND();
}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::performQualAndMaskClips(const string & logname, const string & logprefix)
{
  // The next line is here to fool the compiler to also
  // include the CVS string into the binary
  // line has no effect and will put optimised away anyway
  (void) vcid2;

  FUNCSTART("void Assembly::performClips(const string & logname, const string & logprefix)");

  cout << "Starting clips: ";

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  //if(qualclip) {
  //  cout << "quality";
  //  cout.flush();
  //}
  //if(qualclip && maskcharclip) cout << " and";
  //if(maskcharclip) {
  //  cout << " masked characters";
  //  cout.flush();
  //}

  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & r=AS_readpool.getRead(i);
    if(r.hasValidData()
       && !(r.isBackbone() 
	    || r.isRail())){

      if(AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_quality){
	int32 oldlq=r.getLQClipoff();
	int32 oldrq=r.getRQClipoff();
	r.performQualityClip(
	  AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_quality_minqual,
	  AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_quality_winlen);
	if(!logname.empty()){
	  logfout << logprefix;
	  if(oldlq != r.getLQClipoff()
	     || oldrq != r.getRQClipoff()){
	    logfout << " changed";
	  }else{
	    logfout << " unchanged";
	  }
	  logfout << " qual. " 
		  << r.getName() 
		  << "\tLeft: "
		  << r.getLQClipoff()
		  << "\tRight: "
		  << r.getRQClipoff() << '\n';
	}
      }
      if(AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_maskedbases) {
	int32 oldlm=r.getLMClipoff();
	int32 oldrm=r.getRMClipoff();
	r.setClipoffsToMaskedChars(
	  AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_maskedbase_gapsize,
	  AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_maskedbase_maxfrontgap,
	  AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_maskedbase_maxendgap,
	  false);
	r.setClipoffsToMaskedChars(
	  1,
	  1,
	  1,
	  true);
	if(!logname.empty()){
	  logfout << logprefix;
	  if(oldlm != r.getLMClipoff()
	     || oldrm != r.getRMClipoff()){
	    logfout << " changed";
	  }else{
	    logfout << " unchanged";
	  }
	  logfout << " mask. " 
		  << r.getName() 
		  << "\tLeft: "
		  << r.getLMClipoff()
		  << "\tRight: "
		  << r.getRMClipoff() << '\n';
	}
      }
    }
  }

  if(!logname.empty()){
    logfout.close();
  }

  cout << " done." << endl;

  FUNCEND();
  return;
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::performPool_MinimumQualityThreshold(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::performMinimumLeftClips(bool onsltag, bool onmaskchar)");

  cout << "Starting minimum quality threshold clip ... "; cout.flush();

  uint32 numkilled=0;

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  for(uint32 actid=0; actid < AS_readpool.size(); actid++){
    Read & actread = AS_readpool.getRead(actid);
    if(actread.hasValidData()
       && !(actread.isBackbone() || actread.isRail())){
      
      assembly_parameters const & as_params= AS_miraparams[actread.getSequencingType()].getAssemblyParams();
      if(as_params.as_clip_quality_minthreshold==0) continue;
      
      if(!performRead_MinimumQualityThreshold(actread,as_params.as_clip_quality_minthreshold,as_params.as_clip_quality_numminthreshold)){
	++numkilled;
	actread.setLQClipoff(actread.getLenSeq());
	actread.setRQClipoff(actread.getLenSeq());
	logfout << logprefix << " min qual threshold not met, killed\n";
      }
    }
  }
    
  if(!logname.empty()){
    logfout.close();
  }
  
  cout << "done. Killed " << numkilled << " reads.\n";

  FUNCEND();
  return;
}

/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

bool Assembly::performRead_MinimumQualityThreshold(Read & actread, base_quality_t minqual, uint32 minnum)
{
  FUNCSTART("void Assembly::performRead_ReadMinimumQualityThreshold(base_quality_t minqual, uin32 minnum)");

  const vector<base_quality_t> & quals=actread.getQualities();
  vector<base_quality_t>::const_iterator qI=quals.begin();
  uint32 count=0;
  for(; qI != quals.end(); ++qI){
    if(*qI>=minqual
      && ++count>=minnum) return true;
  }

  FUNCEND();
  return false;
}



/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/
void Assembly::performPool_AdaptorRightClip(const string & logname, const string & logprefix, const uint8 seqtype)
{
  FUNCSTART("void Assembly::performPool_SolexaAdaptorRightClip(const string & logname, const string & logprefix, const uint8 seqtype);)");

  BUGIFTHROW(seqtype>=Read::SEQTYPE_END,"Unknown seqtype " << static_cast<uint16>(seqtype) << "given.");

  // prepare regular expressions
  list<boost::regex> adapres;
  {
    istringstream tmpis;
    if(seqtype==Read::SEQTYPE_SOLEXA){
      static const char regexfile[] = {
#include "adaptorsregex.solexa.xxd.H"
	,0
      };
      tmpis.str(regexfile);
    }else if(seqtype==Read::SEQTYPE_IONTORRENT){
      static const char regexfile[] = {
#include "adaptorsregex.iontor.xxd.H"
	,0
      };
      tmpis.str(regexfile);
    }

    string line;
    while(true){
      getline(tmpis,line);
      if(tmpis.eof()) break;
      boost::to_upper(line);
      adapres.push_back(boost::regex(line));
    }
  }

  ReadPool adappool(&AS_miraparams);
  adappool.reserve(500);
  {
    istringstream tmpis;

    if(seqtype==Read::SEQTYPE_SOLEXA){
      static const char adapfile[] = {
#include "adaptorsforclip.solexa.xxd.H"
	,0
      };
      tmpis.str(adapfile);
    }else if(seqtype==Read::SEQTYPE_IONTORRENT){
      static const char adapfile[] = {
#include "adaptorsforclip.iontor.xxd.H"
	,0
      };
      tmpis.str(adapfile);
    }else if(seqtype==Read::SEQTYPE_454GS20){
      static const char adapfile[] = {
#include "adaptorsforclip.454.xxd.H"
	,0
      };
      tmpis.str(adapfile);
    }

    string line;
    while(true){
      getline(tmpis,line);
      if(tmpis.eof()) break;
      line.erase(0,1);         // get away the ">"
      if(!line.empty()){
	adappool.addNewEmptyRead();
	Read & actread=adappool[adappool.size()-1];
	actread.disallowAdjustments();
	actread.setName(line);
	getline(tmpis,line);
	if(tmpis.eof()) break;
	actread.setSequenceFromString(line);
      }
    }
  }

  //adappool.dumpPoolInfo(cout);

  // Go back if nothing to be searched
  if(adappool.size()==0 && adapres.size()==0) return;

  cout << "Starting " << Read::getNameOfSequencingType(seqtype) << " known adaptor right clip ... "; cout.flush();

  Skim adapskim;
  adapskim.skimStreamPrepare(adappool,7,1);

  cout << "Searching multithread now ... \n"; cout.flush();

  cout << static_cast<int16>(AS_miraparams[0].getSkimParams().sk_numthreads) << endl;

  vector<int32> clipres;
  adapskim.findAdaptorRightClip(AS_readpool,clipres,seqtype,9,AS_miraparams[0].getSkimParams().sk_numthreads);
  //adapskim.findAdaptorRightClip(AS_readpool,clipres,seqtype,9,1);
  //adapskim.findAdaptorRightClip(AS_readpool,clipres,seqtype,9,8);

  BUGIFTHROW(clipres.size()!=AS_readpool.size(),"clipres.size()!=AS_readpool.size()???");

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif

  uint32 numclipped=0;

  cout << "Searching for " <<  Read::getNameOfSequencingType(seqtype) << " partial end adaptors ... \n"; cout.flush();
  ProgressIndicator<int64> P(0, AS_readpool.size());
  for(uint32 actid=0; actid < AS_readpool.size(); actid++){
    P.progress(actid);
    Read & actread = AS_readpool.getRead(actid);
    if(actread.hasValidData()
       && actread.getSequencingType()==seqtype
       && !(actread.isBackbone() || actread.isRail())){

      if(clipres[actid]>=0){
	++numclipped;
	actread.setRSClipoff(clipres[actid]);
	logfout << logprefix << " " 
		<< Read::getNameOfSequencingType(seqtype) 
		<< " adaptor: " << actread.getName()
		<< " changed right clip to " << clipres[actid] << "\n";
      }else if(!adapres.empty()){
	string seq(actread.getSeqAsChar());
	boost::to_upper(seq);

	list<boost::regex>::const_iterator areI=adapres.begin();
	boost::match_results<std::string::const_iterator> what; 
	boost::match_flag_type flags = boost::match_default; 
	std::string::const_iterator start, end; 
	for(; areI != adapres.end(); ++areI){
	  start = seq.begin(); 
	  end = seq.end(); 
	  if(regex_search(start, end, what, *areI, flags)) { 
	    ++numclipped;
	    actread.setRSClipoff(what.position());
	    logfout << logprefix << " "
		<< Read::getNameOfSequencingType(seqtype) 
		    << " partial end adaptor: " << actread.getName()
		    << " changed right clip to " << what.position() << "\n";
	    break;
	  }
	}
      }
    }
  }

  P.finishAtOnce();

  if(!logname.empty()){
    logfout.close();
  }
  
  cout << "done. Clipped " << numclipped << " reads.\n";

  FUNCEND();
  return;
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::performMinimumLeftClips(bool onsltag, bool onmaskchar, const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::performMinimumLeftClips(bool onsltag, bool onmaskchar)");

  if(onsltag || onmaskchar){
    cout << "Starting minimum left clip ... "; cout.flush();
    
    ofstream logfout;
    if(!logname.empty()){
      logfout.open(logname.c_str(), ios::out|ios::app);
      if(!logfout){
	MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
      }
    }

#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif

    for(uint32 i=0;i<AS_readpool.size();i++){
      Read & r=AS_readpool.getRead(i);
      if(r.hasValidData()
	 && (r.isSequencingType(Read::SEQTYPE_SANGER)
	     || r.isSequencingType(Read::SEQTYPE_454GS20)
	     || r.isSequencingType(Read::SEQTYPE_IONTORRENT)
	     || r.isSequencingType(Read::SEQTYPE_PACBIO)
	     || r.isSequencingType(Read::SEQTYPE_SOLEXA)
	     || r.isSequencingType(Read::SEQTYPE_ABISOLID))
	 && !(r.isBackbone() || r.isRail())){
	

	//cout << "RT: " << (uint16) r.getSequencingType() << endl;
	//cout << "MIN: " << AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minslrequired << endl;
	//cout << "SET: " << AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minqlsetto << endl;;
	//
	//r.setCoutType(Read::AS_TEXTCLIPS);
	//cout << "Old:\n" << r;

	if(AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_ensureminimumleftclipoff){
	  int32 oldlq=r.getLQClipoff();
	  r.setMinimumLeftClipoff(
	    AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minslrequired,
	    AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minqlsetto,
	    onsltag,
	    onmaskchar
	    );
	  if(!logname.empty()){
	    logfout << logprefix;
	    if(oldlq!=r.getLQClipoff()){
	      logfout << " changed";
	    }else{
	      logfout << " unchanged";
	    }
	    logfout << " minleft. " 
		    << r.getName() 
		    << "\tLeft: "
		    << oldlq
		    << "\t -> "
		    << r.getLQClipoff()
		    << '\n';
	  }
	  //cout << "New:\n" << r;
	}
      }
    }
    
    if(!logname.empty()){
      logfout.close();
    }

    cout << "done." << endl;
  }

  FUNCEND();
  return;
}



/*************************************************************************
 *
 * in comparison to left clip:
 *  
 *  if seqtype id 454, then the values are adapted:
 *    *2  for reads >=180  (FLX and XDR)
 *
 *************************************************************************/

void Assembly::performMinimumRightClips(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::performMinimumRightClips(bool onsltag, bool onmaskchar)");

  cout << "Starting minimum right clip ... "; cout.flush();
  
  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }
  
#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif

  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & r=AS_readpool.getRead(i);
    BUGIFTHROW(r.getSequencingType() > Read::SEQTYPE_ABISOLID,
	       r.getName() << ": sequencing type " << r.getSequencingType() << " unknown to this function, must be worked on.\n");
    if(r.hasValidData()
       && (r.isSequencingType(Read::SEQTYPE_SANGER)
	   || r.isSequencingType(Read::SEQTYPE_454GS20)
	   || r.isSequencingType(Read::SEQTYPE_IONTORRENT)
	   || r.isSequencingType(Read::SEQTYPE_PACBIO)
	   || r.isSequencingType(Read::SEQTYPE_SOLEXA)
	   || r.isSequencingType(Read::SEQTYPE_ABISOLID))
       && !(r.isBackbone() || r.isRail())){
      
      
      //cout << "RT: " << (uint16) r.getSequencingType() << endl;
      //cout << "MIN: " << AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minsrrequired << endl;
      //cout << "SET: " << AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minqrsetto << endl;;
      //
      //r.setCoutType(Read::AS_TEXTCLIPS);
      //cout << "Old:\n" << r;
      
      if(AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_ensureminimumrightclipoff){
	// special handling of sequencing types
	bool clipit=true;
	switch(r.getSequencingType()) {
	case Read::SEQTYPE_SANGER : break;
	case Read::SEQTYPE_PACBIO : break;
	case Read::SEQTYPE_IONTORRENT : break;
	case Read::SEQTYPE_454GS20 : {
	  // for paired end, do clip only forward sequences
	  // this way, partial reads from paired end also do not
	  //  get clipped
	  if(r.getTemplateEnd() != 'F') clipit=false;

	  break;
	}
	case Read::SEQTYPE_SOLEXA : {
	  clipit=false;
	  break;
	}
	case Read::SEQTYPE_ABISOLID : {
	  clipit=false;
	  break;
	}
	default: {
	}
	}

	if(clipit) {
	  int32 oldrq=r.getRQClipoff();
	  int32 oldrs=r.getRSClipoff();
	  int32 minsr=AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minsrrequired;
	  int32 setto=AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_minqrsetto;
	  if(r.getSequencingType()==Read::SEQTYPE_454GS20){
	    if(r.getLenSeq()>180){
	      //minsr*=2;
	      //setto*=2;
	      minsr+=minsr/2;
	      setto+=setto/2;
	    }
	  }
	  
	  r.setMinimumRightClipoff(minsr,setto);
	  // if the length of the read falls below the minimum length,
	  //  put back the original clip
	  if(r.getLenClippedSeq() < AS_miraparams[r.getSequencingType()].getAssemblyParams().as_minimum_readlength){
	    r.setRQClipoff(oldrq);
	    r.setRSClipoff(oldrs);
	  }
	  
	  
	  if(!logname.empty()){
	    logfout << logprefix;
	    if(oldrq!=r.getRQClipoff()){
	      logfout << " changed";
	    }else{
	      logfout << " unchanged";
	    }
	    logfout << " minright. " 
		    << r.getName() 
		    << "\tRight: "
		    << oldrq
		    << "\t-> "
		    << r.getRQClipoff()
		    << '\n';
	  }
	  //cout << "New:\n" << r;
	}
      }
    }
  }
  
  if(!logname.empty()){
    logfout.close();
  }
  
  cout << "done." << endl;

  FUNCEND();
  return;
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Assembly::performBadSequenceSearch(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::performBadSequenceSearch(uint32 winlen, base_quality_t minqual)");

  cout << "Performing search for bad sequence quality ... "; cout.flush();
  
  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }
  
  for(uint32 i=0;i<AS_readpool.size();i++){
    Read & r=AS_readpool.getRead(i);
    if(AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_badstretchquality
       && r.hasValidData()
       && r.hasQuality()
       && r.isSequencingType(Read::SEQTYPE_SANGER)
       && !(r.isBackbone() || r.isRail())){
      
      uint32 winlen=AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_badstretchquality_winlen;
      base_quality_t minqual=AS_miraparams[r.getSequencingType()].getAssemblyParams().as_clip_badstretchquality_minqual;

      const vector<base_quality_t> & bquals=r.getQualities();
      int32 runi=r.getLeftClipoff();
      int32 endi=r.getRightClipoff();

      uint32 qualsbelow=0;
      bool foundbad=false;
      for(; runi < endi; runi++){
	if(bquals[runi] < minqual){
	  qualsbelow++;
	  if(qualsbelow >= winlen){
	    foundbad=true;
	    break;
	  }
	}else{
	  qualsbelow=0;
	}
      }

      if(foundbad) {
	int32 newrclip=runi-qualsbelow+1;
	int32 shortened=r.getRightClipoff()-newrclip;
	//cout << r.getName() << " has bad stretch, shortening by " << r.getRightClipoff()-newrclip << '\n';
	if(newrclip < r.getLQClipoff()) newrclip=r.getLQClipoff();
	r.setRQClipoff(newrclip);
	if(!logname.empty()){
	  logfout << logprefix << " bad seq. " 
		  << r.getName() 
		  << "\tShortened by " << shortened
		  << "\tNew right: "
		  << r.getRQClipoff()
		  << '\n';
	}
      }
    }
  }
  cout << "done." << endl;

  if(!logname.empty()){
    logfout.close();
  }

  FUNCEND();
  return;
}




/*************************************************************************
 *
 *
 * 
 *
 *************************************************************************/

void Assembly::correctContigs()
{
#ifdef MIRA_HAS_EDIT
  FUNCSTART("void Assembly::correctContigs()");

  if(AS_miraparams[0].getAssemblyParams().as_dateoutput) dateStamp(cout);
  cout << "\nEditing contigs:" << endl;

  EDITParameters eparams;

  //  eparams.setDoEval();
  eparams.setStrictEvaluation(false);
  eparams.setConfirmationThreshold(0.5);
  eparams.setShowProgress(true);
  eparams.setVerbose(0);
  eparams.setShowProgress(true);


  list<Contig>::iterator I = AS_contigs.begin();
  int32 ccounter=0;
  ProgressIndicator<int64> P(0, AS_contigs.size());

  while(I!=AS_contigs.end()){
    P.progress(ccounter);
    try {
      //	CEBUG("Editing contig:" << ccounter << endl);
      //	CEBUG(*I);
      cout << "Editing contig:" << ccounter << endl;
      editContigBack(*I, eparams);
      ScfBuffer::discard();
      cout << "deleting star columns" << ccounter << endl;
      I->deleteStarOnlyColumns(0, I->getContigLength()-1);
      cout << "marking repeats" << ccounter << endl;

      Contig::repeatmarker_stats_t repstats;
      vector<bool> readsmarkedsrm;
      I->newMarkPossibleRepeats(repstats, readsmarkedsrm);

      //	CEBUG("Corrected contig:" << endl);
      //	CEBUG(*I);
    }
    catch(Notify n){
      n.handleError("Error while examining fault-region");
    }
    
    I++;ccounter++;
  }
  
  P.finishAtOnce();
  
  cout << endl;

  FUNCEND();
#endif
  return;
}






/*************************************************************************
 *
 * Calculates possible sequence vector leftovers at the left side of a read
 * Reads that get a clip must be of Sanger type
 *
 * Does not clip backbone reads, rail reads, multicopyreads
 *  AND not areas protected by Staden GenBank Feature tags
 *
 * Clipping itself must be done afterwards in the performSeqVectorClippings()
 *  function. This was split in two parts to allow releasing of the
 *  big memory chunks AS_readhmcovered, AS_readhitmiss, etc.
 *
 *************************************************************************/


void Assembly::calcPossibleSeqVectorClipoffs(int32 version, const string prefix, const string postfix, const string logname)
{
  FUNCSTART("void Assembly::calcPossibleSeqVectorClipoffs(int32 version, const string prefix, const string postfix, const string logname)");

  if(AS_readhmcovered.size()==0 || AS_readhitmiss.size()==0) {
    cout << "\nNo vector clipping information available, aborting vector clip.\n";
    FUNCEND();
    return;
  }

  if(AS_miraparams[0].getAssemblyParams().as_dateoutput) dateStamp(cout);
  cout << "\nCalculating possible vector leftovers ... ";
  cout.flush();
  //ProgressIndicator P (0, AS_readhmcovered.size()-1);

  AS_clipleft.clear();
  AS_clipright.clear();
  AS_clipleft.resize(AS_readhmcovered.size(),-1);
  AS_clipright.resize(AS_readhmcovered.size(),-1);

  string filename;
  if(logname.size()){
    filename=buildFileName(version, prefix, postfix, logname, ".txt");
  }else{
    filename=buildFileName(version, prefix, postfix, 
			   AS_miraparams[0].getAssemblyParams().as_tmpf_vectorclip,
			   ".txt");
  }

  ofstream logout(filename.c_str(), ios::out | ios::trunc);

  for(uint32 id=0; id<AS_readhmcovered.size(); id++) {
    if(AS_readpool.getRead(id).getSequencingType() != Read::SEQTYPE_SANGER
       || AS_readpool.getRead(id).isBackbone()
       || AS_readpool.getRead(id).isRail()
       || AS_multicopies[id]>0
      ) continue;


    //P.progress(id);

    uint32 clippos=0;
    bool mustclip=false;
    for(uint32 actpos=0; actpos<AS_readhmcovered[id].size(); actpos++) {
      if(actpos-clippos > 5) break;
      if(AS_readhmcovered[id][actpos]>=4) {
	if(AS_readhitmiss[id][actpos]) {
	  if(100.0/static_cast<double>(AS_readhmcovered[id][actpos])*static_cast<double>(AS_readhitmiss[id][actpos]) >= 30.0) {
	    clippos=actpos;
	    mustclip=true;
	  }
	}
      }
    }
    clippos++;

    // check that no GenBank Feature tags protect the area, else clip less
    {

      // FIXME: put all checks for that into read.C (*sigh*)

      for(uint32 i=0; i<AS_readpool.getRead(id).getNumOfTags(); i++){
	const multitag_t & acttag=AS_readpool.getRead(id).getTag(i);
	if(GBF::checkIfGBFfeature(acttag.getIdentifierStr())) {
	  if(acttag.from<clippos) clippos=acttag.from;
	  if(acttag.to<=clippos) clippos=0;
	}
      }
    }

    // auf clip verzichten wenn nur 1 base betroffen (sieht zu doof aus)
    if(mustclip && clippos>1) {
      uint32 maxcliplenallowed=AS_miraparams[AS_readpool.getRead(id).getSequencingType()].getAssemblyParams().as_clip_vector_maxlenallowed; 
      if(maxcliplenallowed == 0 || clippos <= maxcliplenallowed) {
	//AS_readpool.getRead(id).setClipoffs(AS_readpool.getRead(id).getLeftClipoff()+clippos,
	//				    AS_readpool.getRead(id).getRightClipoff(),
	//				    false);

	//AS_clipleft[id]=AS_readpool.getRead(id).getLeftClipoff()+clippos;
	
	AS_clipleft[id]=clippos;

	logout << "Clipped " << clippos << " bases on the left of " << AS_readpool.getRead(id).getName() << "\n";

      } else {
	if(clippos > maxcliplenallowed) {
	  logout << "Not clipped " << clippos << " bases on the left of " << AS_readpool.getRead(id).getName() << " , too long.\n";
	}
      }
    }
  }

  logout.close();

  //P.progress(AS_readhmcovered.size());
  cout << "done.\n";
	     
  AS_steps[ASVECTORSCLIPPED]=1;
  AS_steps[ASADSLISTOK]=0;

  FUNCEND();
}




/*************************************************************************
 *
 * Reads must be Sanger type
 *
 *
 *************************************************************************/

void Assembly::performSeqVectorClippings()
{
  FUNCSTART("void Assembly::performSeqVectorClippings()");

  cout << "\nPerforming vector clipping ... ";
  cout.flush();

  for(uint32 id=0; id<AS_clipleft.size(); id++) {
    if(AS_clipleft[id]>=0
       && AS_readpool.getRead(id).isSequencingType(Read::SEQTYPE_SANGER)) {
      AS_readpool.getRead(id).setClipoffs(AS_readpool.getRead(id).getLeftClipoff()+AS_clipleft[id],
					  AS_readpool.getRead(id).getRightClipoff(),
					  false);
    }
  }
  FUNCEND();

  AS_clipleft.clear();

  cout << "done." << endl;

  return;
}


/*************************************************************************
 *
 * Short Read Mappings right clip
 * Reads must be Solexa/SOLiD type
 *
 *
 *************************************************************************/

void Assembly::performSRMRightClippings()
{
  FUNCSTART("void Assembly::performSRMRightClippings()");

  cout << "\nPerforming right clipping of unfriendly Solexa reads ... ";
  cout.flush();

  uint32 counter=0;
  uint32 howmuch=0;
  for(uint32 id=0; id<AS_clipright.size(); id++) {
    if(AS_clipright[id]>=0
       && (AS_readpool.getRead(id).isSequencingType(Read::SEQTYPE_SOLEXA)
	   || AS_readpool.getRead(id).isSequencingType(Read::SEQTYPE_ABISOLID))) {
      counter++;
      howmuch+=AS_readpool.getRead(id).getRightClipoff()-AS_clipright[id];
      AS_readpool.getRead(id).setClipoffs(AS_readpool.getRead(id).getLeftClipoff(),
					  AS_clipright[id],
					  false);
    }
  }
  FUNCEND();

  AS_clipright.clear();

  cout << "done.\n";
  cout << "Clipped " << howmuch << " bases in " << counter << " reads." << endl;

  return;
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

struct cliplen_t{
  int32 len;
  bool changed;
};


//#define CEBUGFLAG 1
void Assembly::extendADS(int32 version, const string prefix, const string postfix, const string logname)
{
  FUNCSTART("void Assembly::extendADS(int32 version, const string prefix, const string postfix, const string logname)");

//  if(AS_steps[ASADSLISTOK]==0){
//    makeAlignments();
//  }


#if CPP_READ_SEQTYPE_END != 6
#error "This code is made for 6 sequencing types, adapt!"
#endif

  // TODO: change to use different Aligns / MIRAparams depending 
  //   on Sanger / 454 (/ PacBio ???) reads

  // TODO: what about PacBio? currently not used, but should it?

  MIRAParameters tmpparams = AS_miraparams[0];

  tmpparams.setAlignMinRelScore(5);

  assembly_parameters const & as_params= tmpparams.getAssemblyParams();

  string filename;
  if(logname.size()){
    filename=buildFileName(version, prefix, postfix, logname, ".txt");
  }else{
    filename=buildFileName(version, prefix, postfix, 
			   as_params.as_tmpf_adsextend,
			   ".txt");
  }

  ofstream logout(filename.c_str(), ios::out | ios::trunc);


  vector<cliplen_t> clips(AS_readpool.size());
  for(uint32 i=0; i<clips.size(); i++){
    clips[i].len=0;
    clips[i].changed=false;
  }

  list<AlignedDualSeq> madsl;

  try{
    // true for using memcache
    Align bla(&tmpparams);

    cout << "\n";
    if(as_params.as_dateoutput) dateStamp(cout);
    cout << "\nSearching possible read extensions (for Sanger and/or 454):\n";
    
    ProgressIndicator<int32> P(0, static_cast<int32>(AS_adsfacts.size())-1);
    uint32 pindic=0;
    
    vector<AlignedDualSeqFacts>::const_iterator I = AS_adsfacts.begin();
    for(;I!=AS_adsfacts.end();I++){
      P.progress(pindic++);
      // first try: prolongate to end.
      int32 id1=I->getID1();
      int32 id2=I->getID2();
      
      // no sense to calc read extensions for reads where both seqtypes are said
      //  not to use extensions
      if(AS_miraparams[AS_readpool.getRead(id1).getSequencingType()].getAssemblyParams().as_use_read_extension == false
	 && AS_miraparams[AS_readpool.getRead(id2).getSequencingType()].getAssemblyParams().as_use_read_extension == false) continue;

      if(AS_permanent_overlap_bans.checkIfBanned(id1,id2)) {
	CEBUG("PermBan for: " << id1 << " " << id2 <<"\tskipping\n");
	continue;
      }

      CEBUG("\n\nid1: " << id1 << "\t" << AS_readpool.getRead(id1).getName() <<endl);
      CEBUG("id2: " << id2 << "\t" << AS_readpool.getRead(id2).getName() <<endl);

      // normally the sequences should have a length >0
      // but due to some clipping being done after SKIM (chimera etc.), it
      //  may happen they are 0 now. If that's the case, don't bother
      //  looking at.
      if(AS_readpool[id1].getLenClippedSeq() == 0
	 || AS_readpool[id2].getLenClippedSeq() == 0) continue;

      // check for sequencing types
      if( AS_readpool.getRead(id1).isSequencingType(Read::SEQTYPE_PACBIO)
	  || AS_readpool.getRead(id2).isSequencingType(Read::SEQTYPE_PACBIO)) continue;

      if( AS_readpool.getRead(id1).isSequencingType(Read::SEQTYPE_IONTORRENT)
	  || AS_readpool.getRead(id2).isSequencingType(Read::SEQTYPE_SOLEXA)) continue;

      if( AS_readpool.getRead(id1).isSequencingType(Read::SEQTYPE_SOLEXA)
	  || AS_readpool.getRead(id2).isSequencingType(Read::SEQTYPE_SOLEXA)) continue;

      if( AS_readpool.getRead(id1).isSequencingType(Read::SEQTYPE_ABISOLID)
	  || AS_readpool.getRead(id2).isSequencingType(Read::SEQTYPE_ABISOLID)) continue;

      //if(clips[id1].changed && clips[id2].changed){
      //	CEBUG(id1 << " and " << id2 <<" already changed.\n");
      //	continue;	    
      //}
      
      madsl.clear();

#if CEBUGFLAG > 0
      //Read::setCoutType(Read::AS_TEXT);
      Read::setCoutType(Read::AS_TEXTCLIPS);
      CEBUG(AS_readpool.getRead(id1));
      CEBUG(AS_readpool.getRead(id2));
#endif

      if(I->getSequenceDirection(id1) * I->getSequenceDirection(id2) > 0){
	
	CEBUG("doalign\n");

	// evil hack warning
	// the &(* ...) construction is needed for gcc3 as it cannot convert 
	//  a vector<char> iterator to char *   (*sigh*)
	
	int32 extendlen1=AS_readpool.getRead(id1).getRightExtend();
	int32 extendlen2=AS_readpool.getRead(id2).getRightExtend();

	if(AS_miraparams[AS_readpool.getRead(id1).getSequencingType()].getAssemblyParams().as_use_read_extension == false) {
	  extendlen1=0;
	}
	if(AS_miraparams[AS_readpool.getRead(id2).getSequencingType()].getAssemblyParams().as_use_read_extension == false){
	  extendlen2=0;
	}

	CEBUG("l1: " <<AS_readpool.getRead(id1).getLenClippedSeq() << endl);
	CEBUG("e1: " <<extendlen1 << endl);
	CEBUG("l2: " <<AS_readpool.getRead(id2).getLenClippedSeq() << endl);
	CEBUG("e2: " <<extendlen2 << endl);

	if(extendlen1 >= 10 || extendlen2 >= 10){
	  bla.acquireSequences(
	    &(*AS_readpool.getRead(id1).getActualSequence().begin())
	    +AS_readpool.getRead(id1).getLeftClipoff(),
	    AS_readpool.getRead(id1).getLenClippedSeq()+extendlen1,
	    &(*AS_readpool.getRead(id2).getActualSequence().begin())
	    +AS_readpool.getRead(id2).getLeftClipoff(),
	    AS_readpool.getRead(id2).getLenClippedSeq()+extendlen2,
	    id1, id2, 1, 1, true, I->getOffsetInAlignment(id2));
	  bla.fullAlign(&madsl,false,false);
	  
	  if(madsl.size()==0){
	    CEBUG("No results, less radical try.\n");

	    int32 tryseqlen1=0;
	    if(AS_miraparams[AS_readpool.getRead(id1).getSequencingType()].getAssemblyParams().as_use_read_extension) {
	      if(clips[id1].changed){
		extendlen1-=clips[id1].len;
	      }
	      extendlen1/=2;
	      tryseqlen1=AS_readpool.getRead(id1).getLenClippedSeq()+extendlen1;
	      if(clips[id1].changed){
		tryseqlen1+=clips[id1].len;
	      }
	      if(AS_readpool.getRead(id1).getLeftClipoff()+tryseqlen1 >= static_cast<int32>(AS_readpool.getRead(id1).getLenSeq())) {
		CEBUG("t1o: " <<tryseqlen1 << endl);
		tryseqlen1=AS_readpool.getRead(id1).getLenClippedSeq()+AS_readpool.getRead(id1).getRightExtend();
		CEBUG("t1n: " <<tryseqlen1 << endl);
	      }
	    }

	    int32 tryseqlen2=0;
	    if(AS_miraparams[AS_readpool.getRead(id2).getSequencingType()].getAssemblyParams().as_use_read_extension) {
	      if(clips[id2].changed){
		extendlen2-=clips[id2].len;
	      }
	      extendlen2/=2;
	      tryseqlen2=AS_readpool.getRead(id2).getLenClippedSeq()+extendlen2;
	      if(clips[id2].changed){
		tryseqlen2+=clips[id2].len;
	      }
	      if(AS_readpool.getRead(id2).getLeftClipoff()+tryseqlen2 >= static_cast<int32>(AS_readpool.getRead(id2).getLenSeq())) {
		CEBUG("t2o: " <<tryseqlen2 << endl);
		tryseqlen2=AS_readpool.getRead(id2).getLenClippedSeq()+AS_readpool.getRead(id2).getRightExtend();
		CEBUG("t2n: " <<tryseqlen2 << endl);
	      }
	    }

	    CEBUG("cc1: " <<clips[id1].changed << endl);
	    CEBUG("cl1: " <<clips[id1].len << endl);
	    CEBUG("l1: " <<AS_readpool.getRead(id1).getLenClippedSeq() << endl);
	    CEBUG("t1: " <<tryseqlen1 << endl);
	    CEBUG("cc2: " <<clips[id2].changed << endl);
	    CEBUG("cl2: " <<clips[id2].len << endl);
	    CEBUG("l2: " <<AS_readpool.getRead(id2).getLenClippedSeq() << endl);
	    CEBUG("t2: " <<tryseqlen2 << endl);
	    if(extendlen1 < 5 && extendlen2 < 5) {
	      CEBUG("skip" << endl);
	      continue;
	    }
	    
	    if(tryseqlen1>0 && tryseqlen2>0){
	      bla.acquireSequences(
		&(*AS_readpool.getRead(id1).getActualSequence().begin())
		+AS_readpool.getRead(id1).getLeftClipoff(),
		tryseqlen1,
		&(*AS_readpool.getRead(id2).getActualSequence().begin())
		+AS_readpool.getRead(id2).getLeftClipoff(),
		tryseqlen2,
		id1, id2, 1, 1, true, I->getOffsetInAlignment(id2));
	    }
	  }
	}
      }else{
	if(I->getSequenceDirection(id2)>0){
	}else{
	}
      }
      
      if(madsl.size()==0){
	CEBUG("No results\n");
      }else{
	int32 bestweight=0;
	list<AlignedDualSeq>::iterator J;
	for(J= madsl.begin(); J!=madsl.end(); ){
	  if(J->isValid()==false){
	    J=madsl.erase(J);
	  }else{
	    if(J->getWeight()>bestweight) bestweight=J->getWeight();
	    J++;
	  }
	}
	// take only the best
	for(J= madsl.begin(); J!=madsl.end();){
	  if(J->getWeight() != bestweight){
	    J=madsl.erase(J);
	  } else {
	    J++;
	  }
	}    
//	  cout << "Ext. 1st success: " << id1 << "\t" << id2 << "\n";
//	  cout << *I;
//	  cout << *(madsl.begin());
	
	int32 lens1=0;
	int32 lens2=0;
	if(madsl.begin()->clipper(as_params.as_readextension_window_len,
				  as_params.as_readextension_window_maxerrors,
				  lens1, lens2)){
//	    cout << "Lalala\n";

	  lens1-=AS_readpool.getRead(id1).getLenClippedSeq();
	  lens2-=AS_readpool.getRead(id2).getLenClippedSeq();
	  CEBUG("o1: " << AS_readpool.getRead(id1).getLenClippedSeq() << "\tn: " << lens1);
	  CEBUG("\no2: " << AS_readpool.getRead(id2).getLenClippedSeq() << "\tn: " << lens2<<endl);
	  

	  if(AS_miraparams[AS_readpool.getRead(id1).getSequencingType()].getAssemblyParams().as_use_read_extension){
	    if(lens1>5 && lens1>clips[id1].len){
	      clips[id1].len=lens1;
	      clips[id1].changed=true;
	    }
	  }

	  if(AS_miraparams[AS_readpool.getRead(id2).getSequencingType()].getAssemblyParams().as_use_read_extension){
	    if(lens2>5 && lens2>clips[id2].len){
	      clips[id2].len=lens2;
	      clips[id2].changed=true;
	    }
	  }
	}
      }
    }
    P.finishAtOnce();
  }
  catch(Notify n){
    n.handleError(THISFUNC);
  }

  int32 lenplus=0;
  int32 numchanged=0;
  for(uint32 rid=0; rid<clips.size(); rid++){
    if(AS_readpool.getRead(rid).isBackbone()
       || AS_readpool.getRead(rid).isRail()) continue;
    // contig join spoiler! do not extend back again!
    if(AS_readpool.getRead(rid).hasTag(Read::REA_tagentry_idCJSP)) continue;
    if(AS_miraparams[AS_readpool.getRead(rid).getSequencingType()].getAssemblyParams().as_use_read_extension) continue;

    if(clips[rid].changed){
      CEBUG("ID: " << rid << "\t" << AS_readpool.getRead(rid).getName() << "\toldlen: " << AS_readpool.getRead(rid).getLenClippedSeq());
      CEBUG("\tgained: " << clips[rid].len << endl);
      numchanged++;
      lenplus+=clips[rid].len;

      logout << AS_readpool.getRead(rid).getName() << "\t" << clips[rid].len << "\n";

      AS_readpool.getRead(rid).setClipoffs(AS_readpool.getRead(rid).getLeftClipoff(),
					 AS_readpool.getRead(rid).getLeftClipoff()+AS_readpool.getRead(rid).getLenClippedSeq()+clips[rid].len-1,
					 false);
      
      if(AS_readpool.getRead(rid).checkRead()){
	cout << AS_readpool.getRead(rid);
	throw Notify(Notify::INTERNAL, THISFUNC, AS_readpool.getRead(rid).checkRead()) ;
      }
    }
  }
  
  cout << "\nChanged length of " << numchanged << " sequences."<< endl;
  if(numchanged!=0){
    cout << "Mean length gained in these sequences: " << static_cast<double>(lenplus)/ static_cast<double>(numchanged) << " bases." << endl;
  }

  logout.close();

  AS_steps[ASADSLISTOK]=0;

  FUNCEND();
  return;  
}
//#define CEBUGFLAG 0




/*************************************************************************
 *
 * clip poly-A in forward and poly-T in reverse direction
 * or: clip only after the poly-stretches, and tag the stretches with FpAS
 *
 *************************************************************************/

void Assembly::clipPolyATAtEnds(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::clipPolyATAtEnds()");

  cout << "Clipping or tagging poly A/T stretches at ends of reads ... ";
  cout.flush();

  Read::setCoutType(Read::AS_TEXTSHORT);

  //int32 minlooklen=as_params.as_polybase_start_stretch;
  //int32 minrunlen=as_params.as_polybase_minimum_count;

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  for(uint32 actid=0; actid < AS_readpool.size(); actid++){
    if(AS_readpool.getRead(actid).hasValidData()
       && !(AS_readpool.getRead(actid).isBackbone() || AS_readpool.getRead(actid).isRail())){

      Read & actread = AS_readpool.getRead(actid);
      assembly_parameters const & as_params= AS_miraparams[actread.getSequencingType()].getAssemblyParams();
      if(!as_params.as_clip_polyat) continue;

      CEBUG(actread.getName() << endl);
      CEBUG(actread << endl);

      uint32 mincount=as_params.as_clip_polyat_len;
      uint32 maxbad=as_params.as_clip_polyat_maxerrors;
      int32 grace=static_cast<int32>(as_params.as_clip_polyat_maxgap);
      bool keepsignal=as_params.as_clip_polyat_keeppolysignal;

      //int32 minrunlen=static_cast<int32>(winlen)-static_cast<int32>(maxbad);

      // search poly-a in forwarddirection
      {
	int32 lpolystart=-1;
	int32 rpolyend=-1;
	if(searchPolyBaseFrom5Prime(actread,'a',lpolystart,rpolyend,mincount,maxbad,grace)){
	  if(keepsignal){
	    actread.setRMClipoff(rpolyend+1);
	    CEBUG("setting rm " << rpolyend+1 << endl);
	  }else{
	    actread.setRMClipoff(lpolystart);
	    CEBUG("setting rm " << lpolystart << endl);
	  }
	  CEBUG("taggingl " << lpolystart << " " << rpolyend << endl);
	  
	  if(!logname.empty()){
	    logfout << logprefix << " poly-A fwd. " 
		    << actread.getName() 
		    << "\tMask right: "
		    << actread.getRMClipoff()
		    << '\n';
	  }
	  
	  actread.addTag(lpolystart,
			 rpolyend,
			 Read::REA_tagentry_idFpAS,
			 Read::REA_tagentry_coFpAS);
	}	
      }

      // search poly-t in reverse direction
      {
	int32 lpolystart=-1;
	int32 rpolyend=-1;
	
	if(searchPolyBaseFrom3Prime(actread,'t',lpolystart,rpolyend,mincount,maxbad,grace)){
	  if(keepsignal){
	    actread.setLMClipoff(lpolystart);
	    CEBUG("setting lm " << lpolystart << endl);
	  }else{
	    actread.setLMClipoff(rpolyend+1);
	    CEBUG("setting lm " << rpolyend+1 << endl);
	  }
	  CEBUG("taggingl " << lpolystart << " " << rpolyend << endl);

	  if(!logname.empty()){
	    logfout << logprefix << " poly-T rev. " 
		    << actread.getName() 
		    << "\tMask left: "
		    << actread.getLMClipoff()
		    << '\n';
	  }

	  actread.addTag(lpolystart,
			 rpolyend,
			 Read::REA_tagentry_idFpAS,
			 Read::REA_tagentry_coFpAS);
	}	
      }
    }
  }
  
  if(!logname.empty()){
    logfout.close();
  }
    
  cout << "done." << endl;

  FUNCEND();
  return;
}




/*************************************************************************
 *
 * Search poly-base (mincount length and maximum maxbad other bases) from left
 *  side of read (with 'grace' length grace if not encountered), 
 *
 *  return: 
 *    - true if found and return left and right coordinates in lpolystart and
 *      rpolyend
 *    - false if not found (lpolystart and rpolyend undefined)
 *
 *************************************************************************/

bool Assembly::searchPolyBaseFrom5Prime(Read & actread, const char polybase, int32 & lpolystart, int32 & rpolyend, const uint32 mincount, const uint32 maxbad, int32 grace)
{
  FUNCSTART("bool Assembly::searchPolyBaseFrom5Prime(Read & actread, const char polybase, int32 & lpolystart, int32 & rpolyend, const uint32 mincount, const uint32 maxbad, const int32 grace)");


  BUGIFTHROW(!dptools::isValidACGTBase(polybase),"Ummm ... " << polybase << " is not ACGT?");
  BUGIFTHROW(grace<0,"grace (" << grace << ") < 0 ?");
  BUGIFTHROW(maxbad>=mincount,"maxbad (" << maxbad << ") >= mincount (" << mincount << ") ?");

  CEBUG(actread.getName() << endl);
  CEBUG(actread << endl);

  lpolystart=-1;
  rpolyend=-1;
	
  int32 runindex=actread.getLeftClipoff();
  int32 lastgoodrunindex=runindex;
  char actbase=' ';
  bool found=false;

  for(; grace >=0 && runindex<actread.getRightClipoff(); ++runindex, --grace) {
    actbase=actread.getBaseInSequence(runindex);
    CEBUG("###1 : " << grace << " " << runindex << "\t" << actbase << endl);
    if(dptools::areBasesContained(polybase,actbase)){
      lpolystart=runindex;
      lastgoodrunindex=runindex;
      uint32 acount=0;
      uint32 othercount=0;
      char cbase;
      for(; lastgoodrunindex<actread.getRightClipoff(); lastgoodrunindex++){
	cbase=actread.getBaseInSequence(lastgoodrunindex);
	if(dptools::areBasesContained(polybase,cbase)){
	  acount++;
	}else if(tolower(cbase)!='n'){
	  othercount++;
	  if(othercount>maxbad) break;
	}
      }
      if(acount>=mincount) {
	found=true;
	// get off non-poly characters as far as possible
	if(lastgoodrunindex==actread.getRightClipoff()) lastgoodrunindex--;
	while(lastgoodrunindex>runindex && !dptools::areBasesContained(polybase,actread.getBaseInSequence(lastgoodrunindex))) lastgoodrunindex--;
	rpolyend=lastgoodrunindex;
	break;
      }
      lpolystart=-1;
    }else{
      lpolystart=-1;
    }
  }

  if(rpolyend >=0 && lpolystart != -1) {
    FUNCEND();
    return true;
  }

  FUNCEND();
  return false;
}



/*************************************************************************
 *
 * Search poly-base (mincount length and maximum maxbad other bases) from left
 *  side of read (with 'grace' length grace if not encountered), 
 *
 *  return: 
 *    - true if found and return left and right coordinates in lpolystart and
 *      rpolyend
 *    - false if not found (lpolystart and rpolyend undefined)
 *
 *************************************************************************/
//#define CEBUG(bla)   {cout << bla; cout.flush();}
bool Assembly::searchPolyBaseFrom3Prime(Read & actread, const char polybase, int32 & lpolystart, int32 & rpolyend, const uint32 mincount, const uint32 maxbad, int32 grace)
{
  FUNCSTART("bool Assembly::searchPolyBaseFrom3Prime(Read & actread, const char polybase, int32 & lpolystart, int32 & rpolyend, const uint32 mincount, const uint32 maxbad, const int32 grace)");


  BUGIFTHROW(!dptools::isValidACGTBase(polybase),"Ummm ... " << polybase << " is not ACGT?");
  BUGIFTHROW(grace<0,"grace (" << grace << ") < 0 ?");
  BUGIFTHROW(maxbad>=mincount,"maxbad (" << maxbad << ") >= mincount (" << mincount << ") ?");

  CEBUG(actread.getName() << endl);
  CEBUG(actread << endl);

  lpolystart=-1;
  rpolyend=-1;
	

  int32 runindex=actread.getRightClipoff()-1;
  int32 lastgoodrunindex=runindex;
  char actbase=' ';
  bool found=false;

  CEBUG("Reverse " << actread.getName() << '\n');
  
  for(; grace >=0 && runindex>=actread.getLeftClipoff(); --runindex, --grace) {
    actbase=static_cast<char>(tolower(actread.getBaseInSequence(runindex)));
    CEBUG("###1 : " << grace << " " << runindex << "\t" << actbase << endl);
    if(dptools::hasNucleicAcidInIUPAC(polybase,actbase)){
      rpolyend=runindex;
      lastgoodrunindex=runindex;
      uint32 tcount=0;
      uint32 othercount=0;
      uint32 runcount=0;
      char cbase;
      char dbase;
      for(; lastgoodrunindex>=actread.getLeftClipoff(); --lastgoodrunindex, ++runcount){
	cbase=actread.getBaseInSequence(lastgoodrunindex);
	CEBUG("###2 : " << runcount << " " << lastgoodrunindex << "\t" << cbase << " " << tcount << " " << othercount << endl);
	if(dptools::areBasesContained(polybase,cbase)){
	  tcount++;
	  if(othercount>0 && runcount>=mincount){
	    dbase=actread.getBaseInSequence(lastgoodrunindex+mincount);
	    if(dptools::areBasesContained(polybase,dbase)){
	      --othercount;
	    }
	  }
	}else if(tolower(cbase)!='n'){
	  othercount++;
	  if(othercount>maxbad) break;
	}
      }
      if(tcount>=mincount) {
	CEBUG("Found tcount\n");
	found=true;
	// get off non-t characters as far as possible
	if(lastgoodrunindex<actread.getLeftClipoff()) lastgoodrunindex++;
	while(lastgoodrunindex<rpolyend && !dptools::areBasesContained(polybase,actread.getBaseInSequence(lastgoodrunindex))) lastgoodrunindex++;
	lpolystart=lastgoodrunindex;
	break;
      }
      rpolyend=-1;
    }else{
      rpolyend=-1;
    }
  }

  CEBUG("LPOLYSTART: " << lpolystart << "\tRPOLYEND: " << rpolyend << endl);

  if(lpolystart >=0 && rpolyend != -1) {
    FUNCEND();
    return true;
  }

  FUNCEND();
  return false;
}
//#define CEBUG(bla)








/*************************************************************************
 *
 * clip poly-base at right end of read
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}
void Assembly::clipPolyBaseAtEnd(const string & logname, const string & logprefix)
{
  FUNCSTART("void Assembly::clipPolyBaseAtEnd(const string & logname, const string & logprefix)");

  cout << "Clipping dubious poly-base stretches at end of reads ... ";
  cout.flush();

  Read::setCoutType(Read::AS_TEXTSHORT);

  ofstream logfout;
  if(!logname.empty()){
    logfout.open(logname.c_str(), ios::out|ios::app);
    if(!logfout){
      MIRANOTIFY(Notify::FATAL, "Could not open log for appending: " << logname);
    }
  }

  vector<uint32> countvec(128,0);

  for(uint32 actid=0; actid < AS_readpool.size(); actid++){
    Read & actread = AS_readpool.getRead(actid);
    if(actread.hasValidData()
       && !(actread.isBackbone() || actread.isRail())){

      assembly_parameters const & as_params= AS_miraparams[actread.getSequencingType()].getAssemblyParams();
      if(!as_params.as_clip_3ppolybase_len) continue;

      CEBUG(actread.getName() << endl);

      uint32 mincount=as_params.as_clip_3ppolybase_len;
      uint32 maxbad=as_params.as_clip_3ppolybase_maxerrors;
      int32 grace=static_cast<int32>(as_params.as_clip_3ppolybase_maxgap);

      if(mincount==0){
	MIRANOTIFY(Notify::FATAL, "-AS:c3ppmsl may not be 0");
      }

      // first guess which base might be a polybase
      //
      // count occurences of bases in last (mincount+grace or mincount?) positions of read
      // the largest count >=30% of real bases (no 'n') wins 

      if(actread.getLenClippedSeq() < mincount) continue;

      Read::setCoutType(Read::AS_FASTA);
      CEBUG(actread << endl);
      Read::setCoutType(Read::AS_TEXTSHORT);
      CEBUG(actread << endl);

      countvec['a']=0;
      countvec['c']=0;
      countvec['g']=0;
      countvec['n']=0;
      countvec['t']=0;
      
      int32 runindex=actread.getRightClipoff()-1;
      for(uint32 ri=0; ri<mincount && runindex>=actread.getLeftClipoff(); --runindex, ++ri) {
	char actbase=static_cast<char>(tolower(actread.getBaseInSequence(runindex)));
	++countvec[actbase];
      }

      CEBUG("CV: " << countvec['a'] << " " << countvec['c'] << " " << countvec['g'] << " " << countvec['t'] << endl);

      uint32 realbases=countvec['a']+countvec['c']+countvec['g']+countvec['t'];
      uint32 maxreal=max(countvec['a'],max(countvec['c'],max(countvec['g'],countvec['t'])));

      CEBUG("RB: " << realbases << "\tMR: " << maxreal << endl);

      char tentativepolybase='?';
      if(realbases>0 && 100*maxreal/realbases >= 30){
	CEBUG("MRThresh\n");
	for(uint32 testi=0; testi<4; ++testi){
	  if(countvec["acgt"[testi]]==maxreal){
	    CEBUG("MRThreshHit\n");
	    tentativepolybase="acgt"[testi];
	    break;
	  }
	}
      }

      // so, if a tentative polybase was found, try to find some clips and clip if found

      if(tentativepolybase!='?') {
	int32 lpolystart=-1;
	int32 rpolyend=-1;
	CEBUG("looking...\n");
	if(searchPolyBaseFrom3Prime(actread,tentativepolybase,lpolystart,rpolyend,mincount,maxbad,grace)){
	  actread.setRMClipoff(lpolystart);
	  CEBUG("setting rm " << lpolystart << endl);
	  
	  if(!logname.empty()){
	    logfout << logprefix << " poly-base " << tentativepolybase << " at end " 
		    << actread.getName() 
		    << "\tMask right: "
		    << actread.getRMClipoff()
		    << '\n';
	  }
	}	
      }
    }
  }

  cout << "done.\n";

  FUNCEND();
}
//#define CEBUG(bla)


/*************************************************************************
 *
 * splits a sequence into overlapping subsequences
 *
 * AND
 *
 * saves pre-computed adsfacts file into log directory for later
 *  later reading
 * number of generated adsfacts is put in AS_numADSFacts_fromshreds
 *
 *
 * This saves enormous amount of time, but is not the "real" thing:
 *  matches between shreds that are non-overlapping from the start on are
 *  not made
 *
 *************************************************************************/

/*
void Assembly::shredReadsIntoReadPool(ReadPool & sourcepool, uint32 shredlen, uint32 shredoffsetinc, uint8 shredreadtype, const string & shredstrain)
{
  FUNCSTART("void Assembly::shredReadsIntoReadPool(ReadPool & sourcepool, uint32 shredlen, uint32 shredoffsetinc, uint8 shredreadtype, const string & shredstrain)");

  AS_numADSFacts_fromshreds=0;
  string adsfshredsfilename=AS_miraparams[0].getDirectoryParams().dir_tmp+"/shred.adsfacts";
  ofstream adsfout;
  adsfout.open((adsfshredsfilename+".adsfacts").c_str(), ios::out|ios::trunc);

  deque<uint32> overlapfifo;

  string shredseq;
  shredseq.reserve(shredlen);
  vector<base_quality_t> shredqual;
  shredqual.reserve(shredlen+10);
  string shredname;
 
  for(uint32 actsourceid=0; actsourceid < sourcepool.size(); actsourceid++){
    Read & sourceread = sourcepool.getRead(actsourceid);
    if(!sourceread.hasValidData()) continue;
    if(sourceread.getLenSeq() < shredlen) continue;

    uint32 actoffset=0;
    uint32 shredcounter=0;
    for(bool doloop=true; doloop; actoffset+=shredoffsetinc){
      uint32 fromi=actoffset;
      uint32 toi=actoffset+shredlen;
      if(toi>=sourceread.getLenSeq()) {
	toi=sourceread.getLenSeq();
	doloop=false;
      }
      shredseq.clear();
      shredqual.clear();
      for(; fromi<toi; fromi++){
	shredseq+=sourceread.getBaseInSequence(fromi);
	shredqual.push_back(sourceread.getQualityInSequence(fromi));
      }

      // if wished: lower quals to max as_cap454consensusqual
      if(AS_miraparams[0].getAssemblyParams().as_cap454consensusqual>0){
	vector<base_quality_t>::iterator qI=shredqual.begin();
	base_quality_t maxqual=AS_miraparams[0].getAssemblyParams().as_cap454consensusqual;
	for(;qI != shredqual.end(); qI++){
	  if(*qI>maxqual) *qI=maxqual;
	}
      }

      ostringstream ostr;
      ostr << "shred_" << shredcounter << "_" << sourceread.getName();
      shredname=ostr.str();

      AS_readpool.addNewEmptyRead();
      uint32 newreadid=AS_readpool.size()-1;
      Read & newread=AS_readpool.getRead(newreadid);
      newread.setName(shredname);
      newread.setSequenceFromString(shredseq);
      newread.setQualities(shredqual);
      newread.setStrain(shredstrain.c_str());
      newread.setSequencingType(shredreadtype);

      //cout << "\n----------------------------------------\nAdded " << shredname << '\n';
      // now insert the weights
      {
	overlapfifo.push_front(newreadid);
	deque<uint32>::iterator OFI=overlapfifo.begin();
	OFI++;
	int32 overlaplen=shredlen-shredoffsetinc;
	int32 totalshredoffset=shredoffsetinc;
	uint32 numelements=1;
	while(OFI != overlapfifo.end()) {
	  if(overlaplen<=0) break;

	  AlignedDualSeqFacts tmpadsf;
	  tmpadsf.publicinit(
	    *OFI,
	    newreadid,
	    static_cast<uint16>(totalshredoffset),
	    static_cast<uint16>(totalshredoffset
				-(AS_readpool.getRead(*OFI).getLenSeq()
				  -AS_readpool.getRead(newreadid).getLenSeq())),
	    0,
	    static_cast<uint16>((AS_readpool.getRead(*OFI).getLenSeq()+
				 AS_readpool.getRead(newreadid).getLenSeq()-overlaplen)),
	    1,
	    1,
	    100);
	  
	  // output of the ADSfacts to file
	  // TODO: real ouput
	  // first weight and direction 
	  // TODO: reduce weight to favorise real reads in assembly???
	  adsfout << overlaplen*10000 << "\t1\t";
	  tmpadsf.serialiseOut(adsfout);
	  adsfout << '\n';

	  AS_numADSFacts_fromshreds++;
  
	  OFI++;
	  overlaplen-=shredoffsetinc;
	  totalshredoffset+=shredoffsetinc;
	  numelements++;
	}
	if(overlapfifo.size()>numelements) overlapfifo.resize(numelements);
      }
      shredcounter++;
    }
    cout << "Shredded " << sourceread.getName() << " into " << shredcounter << " pieces.\n";
  }

  adsfout.close();

  FUNCEND();
}
*/


#define CEBUG(bla)   {cout << bla; cout.flush();}

void Assembly::analyseOverlapHashProfile(vector<uint8> & profile, vector<skimedges_t>::const_iterator seI, ADSEstimator & adse)
{
  vector<uint32> longeststretch(7,0);
  vector<uint32> currentstretch(7,0);

  for(size_t pi=0; pi<profile.size(); pi++){
    //CEBUG(pi << '\t' << static_cast<uint16>(profile[pi]) << '\n');
    for(size_t si=0; si<7; si++){
      if(si==profile[pi]){
	currentstretch[si]++;
	if(currentstretch[si]>longeststretch[si]) longeststretch[si]=currentstretch[si];
      }else{
	currentstretch[si]=0;
      }
    }
  }

  if(longeststretch[3]<5){
    if(AS_skimstaken[seI->skimindex]==true){
      cout << "Remove seI: " << *seI;
      cout << "stretches:\n";
      for(size_t si=0; si<7; si++){
	cout << si << ' ' << longeststretch[si] << endl;
      }

      AS_skimstaken[seI->skimindex]=false;
      AS_numskimoverlaps[seI->rid1]--;
      AS_numskimoverlaps[seI->linked_with]--;
    }
  }
}

#define CEBUG(bla)






/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////        Obsolete         ///////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////



///*************************************************************************
// *
// *
// *
// *
// *************************************************************************/
//
//void Assembly::clipTo200()
//{
//  FUNCSTART("void Assembly::clipTo200()");
//
//  cout << "200 clipper. Test function! Mail the authors if you can read this!\n";
//
//  for(uint32 i=0;i<AS_readpool.size();i++){
//    Read & theread = AS_readpool.getRead(i);
//    if(theread.hasValidData()==true){
//      //     cout << "Len: " << theread.getLenClippedSeq() << endl;
//      if(theread.getLenClippedSeq()>250){
//	theread.setCoutType(Read::AS_TEXT);
//	//	cout << "Clipping ... \n" << theread;
//	theread.setClipoffs(theread.getLeftClipoff(), 
//			    theread.getLeftClipoff()+250, 
//			    false);
//	//	cout << "clipped ... \n" << theread;
//      }
//    }
//  }
//
//  FUNCEND()
//}
//
//
//
///*************************************************************************
// *
// * expects reads to have baseflags set  (by performHashAnalysis())
// *
// * doesn't seem to be a good idea
// *
// *************************************************************************/
//
////#define CEBUG(bla)   {cout << bla; cout.flush();}
//
//void Assembly::performHashEditing()
//{
//  FUNCSTART("void Assembly::performHashEditing()");
//
//  cout << "Hash analysis for editing:";
//
//  skim_parameters const & skim_params= AS_miraparams[0].getSkimParams();
//  assembly_parameters const & as_fixparams= AS_miraparams[0].getAssemblyParams();
//
//  uint32 basesperhash=as_fixparams.as_clip_pec_basesperhash;
//  if(sizeof(uint64) < 8 && basesperhash > 15) basesperhash=15;
//  {
//
//    Skim s3;
//
//    s3.setHashFrequencyRatios(skim_params.sk_freqest_minnormal,
//			      skim_params.sk_freqest_maxnormal,
//			      skim_params.sk_freqest_repeat,
//			      skim_params.sk_freqest_heavyrepeat,
//			      skim_params.sk_freqest_crazyrepeat,
//			      skim_params.sk_nastyrepeatratio);
//
//    s3.analyseHashes(AS_miraparams[0].getDirectoryParams().dir_tmp,
//		     AS_readpool,
//		     true,
//		     false,
//		     false,
//		     true,
//		     1,
//		     basesperhash,
//		     1,
//		     false);
//  }
//
//  if(as_fixparams.as_dateoutput) dateStamp(cout);
//  cout << '\n';
//
//  cout << "Looking for proposed edits ... "; cout.flush();
//
//  vector<uint8> maxhf;
//  maxhf.reserve(10000);
//
//  uint64 numbaseschanged=0;
//  uint64 numreadschanged=0;
//
//  for(uint32 actid=0; actid<AS_readpool.size(); actid++){
//    Read & r=AS_readpool.getRead(actid);
//    
//    if(r.hasValidData()
//       && r.hasBaseHashStats()
//       && !(r.isBackbone() 
//	    || r.isRail())){
//
//      maxhf.clear();
//      maxhf.resize(r.getLenSeq(),0);
//
//      bool wasedited=false;
//
//      {
//	int32 lpos=r.getLeftClipoff();
//	vector<Read::bposhashstat_t>::const_iterator bhsI=r.getBPosHashStats().begin();
//	vector<uint8>::iterator mhfI=maxhf.begin();
//	advance(bhsI,lpos);
//	advance(mhfI,lpos);
//
//	uint32 counter=basesperhash;
//	for(; lpos<static_cast<int32>(r.getLenSeq()); lpos++, bhsI++, mhfI++) {
//	  *mhfI=(bhsI->fwd.getFrequency())>1;
//	  if(*mhfI) counter=basesperhash;
//	  if(counter) {
//	    *mhfI=4;
//	    --counter;
//	  }
//	}
//
//	lpos=r.getLeftClipoff();
//	mhfI=maxhf.begin();
//	advance(mhfI,lpos);
//
//	//for(; lpos<static_cast<int32>(r.getLenSeq()); lpos++) {
//	//  cout << (uint16) maxhf[lpos] << ' ';
//	//}
//	//cout << endl;
//	//lpos=r.getLeftClipoff();
//	//for(; lpos<static_cast<int32>(r.getLenSeq()); lpos++) {
//	//  cout << r.getBaseInSequence(lpos) << ' ';
//	//}
//	//cout << endl;
//	//Read::setCoutType(Read::AS_TEXT);
//	//cout << r;
//
//	lpos=r.getLeftClipoff();
//	for(; lpos<static_cast<int32>(r.getLenSeq()); lpos++, mhfI++) {
//	  if(*mhfI) break;
//	}
//
//	int32 editstart=-1;
//	for(; lpos<static_cast<int32>(r.getLenSeq()); lpos++, mhfI++) {
//	  if(editstart<0){
//	    if(*mhfI==0) {
//	      editstart=lpos;
//	    }
//	  }else{
//	    if(*mhfI) {
//	      for(int32 ii=editstart; ii<lpos; ii++) {
//		//editpositions.push_back(ii);
//		r.changeBaseInSequence('n',0,ii);
//		numbaseschanged++;
//		wasedited=true;
//	      }
//	      editstart=-1;
//	    }
//	  }
//	}
//
//      }
//      if(wasedited) numreadschanged++;
//
//      //if(editpositions.size()){
//      //	cout << r.getName() << ": wants to edit " << editpositions.size() << " positions\n";
//      //}
//    }
//  }
//
//  cout << "changed " << numbaseschanged << " bases to 'n' in " << numreadschanged << " reads.\n";
//
//  FUNCEND();
//
//  return;
//}
////#define CEBUG(bla)


