/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "FastaFormat.h"

#include "DocumentFormatUtils.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <core_api/DNAAlphabet.h>

#include <gobjects/DNASequenceObject.h>
#include <gobjects/AnnotationTableObject.h>
#include <gobjects/GObjectTypes.h>
#include <util_text/TextUtils.h>
#include <util_gui/GUIUtils.h>
#include <memory>

namespace GB2 {

/* TRANSLATOR GB2::FastaFormat */
/* TRANSLATOR GB2::IOAdapter */
/* TRANSLATOR GB2::Document */

FastaFormat::FastaFormat(QObject* p) : DocumentFormat(p)
{
	formatName = tr("FASTA");
}

QStringList FastaFormat::getSupportedDocumentFileExtensions() {
	QStringList l;
	l<<"fa"<<"mpfa"<<"fna"<<"fsa"<<"fas"<<"fasta"<<"sef"<<"seq"<<"seqs";
	return l;
}

bool FastaFormat::isDataFormatSupported(const char* data, int size) const {
    int n = TextUtils::skip(TextUtils::WHITES, data, size);
    int newSize = size - n;
    const char* newData = data + n;
    if (newSize <= 0 || newData[0] != '>' ) {
		return false;
	}
	return !TextUtils::contains(TextUtils::BINARY, data, size);
}

bool FastaFormat::isObjectOpSupported(const Document* d , DocumentFormat::DocObjectOp op, GObjectType t) const {
    Q_UNUSED(d); Q_UNUSED(op);
    return t == GObjectTypes::DNA_SEQUENCE;
}


bool FastaFormat::checkConstraints(const DocumentFormatConstraints& c) const {
	bool ret = true;
    foreach (GObjectType t, c.supportedObjectTypes) {
        ret = ret && ( GObjectTypes::DNA_SEQUENCE == t );
	}
    if( !ret ) {
        return ret;
    }
    if (c.checkRawData) {
		ret = ret && isDataFormatSupported(c.rawData.constData(), c.rawData.size());
	}
    if( c.supportsStreamingRead ) {
        ret = ret && true;
    }
    return ret;
}

#define READ_BUFF_SIZE  4096
static void load(IOAdapter* io, const QString& docUrl, QList<GObject*>& objects, TaskStateInfo& ti,
                 int gapSize, int predictedSize, QString& writeLockReason, bool onlyOne ) {
    writeLockReason.clear();
	QByteArray readBuff(READ_BUFF_SIZE+1, 0);
	char* buff = readBuff.data();
	qint64 len = 0;
    char fastaHeaderStartChar = '>';
	QBitArray fastaHeaderStart = TextUtils::createBitMap(fastaHeaderStartChar);

    bool merge = gapSize!=-1;
    QByteArray sequence;
    QStringList headers;
    QSet<QString> names;
    QList<LRegion> mergedMapping;
    QByteArray gapSequence((merge ? gapSize : 0), 0);

    sequence.reserve(predictedSize);

    //skip leading whites if present
    bool lineOk = true;
    QBitArray nonWhites = ~TextUtils::WHITES;
    io->readUntil(buff, READ_BUFF_SIZE, nonWhites, IOAdapter::Term_Exclude, &lineOk);

    int sequenceStart = 0;
    while (!ti.cancelFlag) {
        //read header
        len = io->readUntil(buff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk);
		if (len == 0) { //end if stream
			break;
		}
		if (!lineOk) {
			ti.setError(FastaFormat::tr("line_is_too_long"));
		}
        QString headerLine = QString(QByteArray::fromRawData(buff+1, len-1)).trimmed();
        if (buff[0]!= fastaHeaderStartChar) {
            ti.setError(FastaFormat::tr("first_line_is_not_a_fasta_header"));
        }

        //read sequence
        if (!merge) {
            sequence.clear();
        } else if (sequence.size() > 0) {
            sequence.append(gapSequence);
            sequenceStart = sequence.size();
        }
		do {
			len = io->readUntil(buff, READ_BUFF_SIZE, fastaHeaderStart, IOAdapter::Term_Exclude);
			if (len <= 0) {
				break;
			}
            len = TextUtils::remove(buff, len, TextUtils::WHITES);
            buff[len] = 0;
        	sequence.append(buff);
			ti.progress = io->getProgress();
		} while (!ti.cancelFlag);

        if (merge) {
            headers.append(headerLine);
            mergedMapping.append(LRegion(sequenceStart, sequence.size() - sequenceStart));
        } else {
            QString objName = TextUtils::variate(headerLine, "_", names);
            names.insert(objName);
            DNASequence seq( headerLine, sequence );
            //TODO parse header
            seq.info.insert(DNAInfo::FASTA_HDR, headerLine);
            seq.info.insert(DNAInfo::ID, headerLine);
            DocumentFormatUtils::addSequenceObject(objects, objName, seq);
        }

        if( onlyOne ) {
            break;
        }
    }

    assert(headers.size() == mergedMapping.size());

    if (!ti.hasErrors() && !ti.cancelFlag && merge && !headers.isEmpty()) {
        DocumentFormatUtils::addMergedSequenceObject(objects, docUrl, headers, sequence, mergedMapping);
    }
    if (!ti.hasErrors() && !ti.cancelFlag && objects.isEmpty()) {
        ti.setError(Document::tr("Document is empty."));
    }

    if (merge && headers.size() > 1) {
        writeLockReason = DocumentFormat::MERGED_SEQ_LOCK;
    }
}

Document* FastaFormat::loadExistingDocument(IOAdapterFactory* iof, const QString& url, TaskStateInfo& ti, const QVariantMap& _fs) {
    //read file
	std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
	if (!io->open(url, IOAdapterMode_Read)) {
        ti.setError(Translations::errorOpeningFileRead(url));
		return NULL;
	}

    QVariantMap fs = _fs;
    QList<GObject*> objects;

    //get settings
    int gapSize = qBound(-1, DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_GAP_SIZE_SETTINGS, -1), 1000*1000);
    int predictedSize = qMax(100*1000,
        DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_SEQUENCE_SIZE_SETTINGS, gapSize==-1 ? 0 : io->left()));

    QString lockReason;
    load(io.get(), url, objects, ti, gapSize, predictedSize, lockReason, false );
	io->close();

	if (ti.hasErrors() || ti.cancelFlag) {
		return NULL;
	}

    DocumentFormatUtils::updateFormatSettings(objects, fs);
    Document* doc = new Document(this, iof, url, objects, fs, lockReason);
	return doc;

}

Document* FastaFormat::loadExistingDocument( IOAdapter* io, TaskStateInfo& ti, const QVariantMap& _fs ) {
    if( NULL == io || !io->isOpen() ) {
        ti.setError(Translations::badArgument("IO adapter"));
        return NULL;
    }
    QVariantMap fs = _fs;
    QList<GObject*> objects;

    int gapSize = qBound(-1, DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_GAP_SIZE_SETTINGS, -1), 1000*1000);
    int predictedSize = qMax(100*1000,
        DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_SEQUENCE_SIZE_SETTINGS, gapSize==-1 ? 0 : io->left()));

    QString lockReason;
    load( io, io->getUrl(), objects, ti, gapSize, predictedSize, lockReason, true );

    if (ti.hasErrors() || ti.cancelFlag) {
        return NULL;
    }

    DocumentFormatUtils::updateFormatSettings(objects, fs);
    Document* doc = new Document( this, io->getFactory(), io->getUrl(), objects, fs, lockReason );
    return doc;
}

#define LINE_LEN 70
static void saveOneFasta( IOAdapter* io, GObject* fastaObj, TaskStateInfo& tsi ) {
    DNASequenceObject* seqObj = qobject_cast< DNASequenceObject* >( fastaObj );

    if ( NULL == seqObj ) {
        tsi.setError(Translations::badArgument("NULL sequence" ));
        return;
    }

    //writing header;
    QByteArray block;
    // TODO better header out of info tags
    QString hdr = seqObj->getDNASequence().info.value(DNAInfo::FASTA_HDR).toString();
    if (hdr.isEmpty()) {
        hdr = seqObj->getGObjectName();
    }
    block.append('>').append(hdr).append( '\n' );
    try {
        if (io->writeBlock( block ) != block.length()) {
            throw 0;
        }
        const char* seq = seqObj->getSequence().constData();
        int len = seqObj->getSequence().length();
        for (int i = 0; i < len; i += LINE_LEN ) {
            int chunkSize = qMin( LINE_LEN, len - i );
            if (io->writeBlock( seq + i, chunkSize ) != chunkSize
                || !io->writeBlock( "\n", 1 )) {
                    throw 0;
            }
        }
    } catch (int) {
        QString url = seqObj->getDocument() ? seqObj->getDocument()->getURL() : "";
        tsi.setError(Translations::errorWritingFile(url));
    }
}

void FastaFormat::storeDocument(Document* doc, TaskStateInfo& ti, IOAdapterFactory* iof, const QString& newDocURL) {
    assert(doc->getDocumentModLock(DocumentModLock_FORMAT_AS_INSTANCE) == NULL);
    if (iof == NULL) {
        iof = doc->getIOAdapterFactory();
    }
    std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
    QString url = newDocURL.isEmpty() ? doc->getURL() : newDocURL;
    if (!io->open(url, IOAdapterMode_Write)) {
        ti.setError(Translations::errorOpeningFileWrite(url));
        return;
    }
    save(io.get(), doc, ti);
    io->close();
}

// stores only first fasta from document
void FastaFormat::storeDocument( Document* doc, TaskStateInfo& ts, IOAdapter* io ) {
    if( NULL == io || !io->isOpen() ) {
        ts.setError(Translations::badArgument("IO adapter"));
        return;
    }
    if( NULL == doc || doc->getObjects().isEmpty() ) {
        ts.setError(Translations::badArgument("doc"));
        return;
    }
    saveOneFasta( io, doc->getObjects().first(), ts );
}

void FastaFormat::save(IOAdapter* io, Document* d, TaskStateInfo& ti) {
    //TODO: check saved op states!!!
    foreach( GObject* o, d->getObjects() ) {
        saveOneFasta( io, o, ti );
    }
}

}//namespace
