/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "FindEnzymesTask.h"

#include <core_api/DNAAlphabet.h>
#include <core_api/Log.h>
#include <core_api/Counter.h>
#include <document_format/DNAAlphabetUtils.h>
#include <document_format/GenbankFeatures.h>

namespace GB2 {

/* TRANSLATOR GB2::FindEnzymesTask */

static LogCategory log(ULOG_ENZYME_PLUGIN);

//////////////////////////////////////////////////////////////////////////
// enzymes -> annotations

FindEnzymesToAnnotationsTask::FindEnzymesToAnnotationsTask(AnnotationTableObject* aobj, const QString& g,
                                                            const DNASequence& seq, LRegion& region, 
                                                            const QList<SEnzymeData>& _enzymes, int maxResults,
                                                            int _minHitCount, int _maxHitCount)
: Task(tr("Find and store enzymes"), TaskFlags_NR_FOSCOE), enzymes(_enzymes), aObj(aobj), groupName(g), maxHitCount(_maxHitCount), minHitCount(_minHitCount)
{
    GCOUNTER( cvar, tvar, "FindEnzymesToAnnotationsTask" );
    fTask = new FindEnzymesTask(seq, region, enzymes, maxResults);
    addSubTask(fTask);
}

Task::ReportResult FindEnzymesToAnnotationsTask::report() {
    if (isCanceled() || hasErrors()) {
        return ReportResult_Finished;
    }
    if (aObj.isNull()) {
        stateInfo.setError(  tr("Annotation table does not exist") );
        return ReportResult_Finished;
    }

    if (aObj->isStateLocked()) {
        stateInfo.setError(  tr("Annotation table is read-only") );
        return ReportResult_Finished;
    }

    bool useSubgroups = enzymes.size() > 1 || groupName.isEmpty();
    foreach(const SEnzymeData& ed, enzymes) {
        QList<SharedAnnotationData> anns = fTask->getResultsAsAnnotations(ed->id);
        if(anns.size() >= minHitCount && anns.size() <= maxHitCount){
            QString group = useSubgroups ? groupName + "/" + ed->id : groupName;
            QList<Annotation*> annotations;
            foreach(const SharedAnnotationData& ad, anns) {
                annotations.append(new Annotation(ad));
            }
            aObj->addAnnotations(annotations, group);
        }
    }
    
    return ReportResult_Finished;
}


//////////////////////////////////////////////////////////////////////////
// find multiple enzymes task
FindEnzymesTask::FindEnzymesTask(const DNASequence& seq, LRegion& region, const QList<SEnzymeData>& enzymes, int mr)
:Task(tr("Find Enzymes"), TaskFlags_NR_FOSCOE), maxResults(mr)
{
    assert(seq.alphabet->isNucleic());
    //for every enzymes in selection create FindSingleEnzymeTask
    foreach(const SEnzymeData& e, enzymes) {
        addSubTask(new FindSingleEnzymeTask(seq, region, e, this));
    }
}

void FindEnzymesTask::onResult(int pos, const SEnzymeData& enzyme) {
    QMutexLocker l(&resultsLock);
    if (results.size() > maxResults) {
        if (!isCanceled()) {
            stateInfo.setError(  tr("Number of results exceed %1, stopping").arg(maxResults) );
            cancel();
        }
        return;
    }
    results.append(FindEnzymesAlgResult(enzyme, pos));
}

QList<SharedAnnotationData> FindEnzymesTask::getResultsAsAnnotations(const QString& enzymeId) const {
    QList<SharedAnnotationData> res;
    
    QString cutStr;
    QString dbxrefStr;
    bool found = true;
    foreach(const FindEnzymesAlgResult& r, results) {
        if (r.enzyme->id != enzymeId) {
            continue;
        }
        found = true;
        if (!r.enzyme->accession.isEmpty()) {
            QString accession = r.enzyme->accession;
            if (accession.startsWith("RB")) {
                accession = accession.mid(2);
            }
            dbxrefStr = "REBASE:"+ accession;
        } else if (!r.enzyme->id.isEmpty()) {
            dbxrefStr = "REBASE:"+ r.enzyme->id;
        }
        if (r.enzyme->cutDirect != ENZYME_CUT_UNKNOWN) {
            cutStr = QString::number(r.enzyme->cutDirect);
            if (r.enzyme->cutComplement != ENZYME_CUT_UNKNOWN  && r.enzyme->cutComplement!=r.enzyme->cutDirect) {
                cutStr+="/"+QString::number(r.enzyme->cutComplement);
            }
        }
        break;
    }
    if (!found) {
        return res;
    }

    foreach(const FindEnzymesAlgResult& r, results) {
        if (r.enzyme->id == enzymeId) {
            AnnotationData* ad = new AnnotationData();
            ad->name = r.enzyme->id;
            ad->location.append(LRegion(r.pos, r.enzyme->seq.size()));
            if (!dbxrefStr.isEmpty()) {
                ad->qualifiers.append(Qualifier("db_xref", dbxrefStr));
            }
            if (!cutStr.isEmpty()) {
                ad->qualifiers.append(Qualifier(GBFeatureUtils::QUALIFIER_CUT, cutStr));
            }
            res.append(SharedAnnotationData(ad));
        }
    }
    return res;
}

Task::ReportResult FindEnzymesTask::report() {
    if (!hasErrors() && !isCanceled()) {
        log.info(tr("Found %1 restriction sites").arg(results.count()));
    }
    return ReportResult_Finished;
}

void FindEnzymesTask::cleanup() {
    results.clear();
}

//////////////////////////////////////////////////////////////////////////
// find single enzyme task
FindSingleEnzymeTask::FindSingleEnzymeTask(const DNASequence& _seq, LRegion& region, const SEnzymeData& _enzyme, 
                                           FindEnzymesAlgListener* l, int mr)
: Task(tr("Find enzyme '%1'").arg(_enzyme->id), TaskFlag_NoRun), 
seq(_seq), region(region), enzyme(_enzyme), maxResults(mr), resultListener(l)
{
    assert(seq.alphabet->isNucleic());
    if (resultListener == NULL) {
        resultListener = this;
    }
    
    SequenceWalkerConfig swc;
    swc.seq = seq.constData() + region.startPos;
    swc.seqSize = region.len;
    swc.chunkSize = region.len;
    addSubTask(new SequenceWalkerTask(swc, this, tr("Find enzyme '%1' parallel").arg(enzyme->id)));
}

void FindSingleEnzymeTask::onResult(int pos, const SEnzymeData& enzyme) {
    QMutexLocker l(&resultsLock);
    if (results.size() > maxResults) {
        if (!isCanceled()) {
            stateInfo.setError(  FindEnzymesTask::tr("Number of results exceed %1, stopping").arg(maxResults) );
            cancel();
        }
        return;
    }
    results.append(FindEnzymesAlgResult(enzyme, pos));
}

void FindSingleEnzymeTask::onRegion(SequenceWalkerSubtask* t, TaskStateInfo& ti) {
    if (enzyme->seq.isEmpty()) {
        return;
    }
    if (!enzyme->alphabet->isNucleic()) {
        log.info(tr("Non-nucleic enzyme alphabet: %1, enzyme: %2, skipping..").arg(enzyme->alphabet->getId()).arg(enzyme->id));
        return;
    }
    bool useExtendedComparator = enzyme->alphabet->getId() == BaseDNAAlphabetIds::NUCL_DNA_EXTENDED 
                                || seq.alphabet->getId() == BaseDNAAlphabetIds::NUCL_DNA_EXTENDED;
  
    const SequenceWalkerConfig& c = t->getGlobalConfig();
    if (useExtendedComparator) {
        FindEnzymesAlgorithm<ExtendedDNAlphabetComparator> algo;
        algo.run(seq, c.range, enzyme, resultListener, ti);
    } else {
        FindEnzymesAlgorithm<ExactDNAAlphabetComparatorNX> algo;
        algo.run(seq, c.range, enzyme, resultListener, ti);
    }
}

void FindSingleEnzymeTask::cleanup() {
    results.clear();
}


}//namespace
