//
// C++ Implementation: arxivsearcher
//
// Description: 
//
//
// Author: Thach Nguyen <thach.nguyen@rmit.edu.au>, (C) 2008
//
// Copyright: See COPYING file that comes with this distribution
//
//
#include "arxivsearcher.h"
#include "searchmanager.h"

#include <klocale.h>
#include <kio/job.h>
#include <kstandarddirs.h>
#include <kconfig.h>
#include <kcombobox.h>
#include <klineedit.h>
#include <kaccelmanager.h>
#include <knuminput.h>
#include <qdom.h>
#include <qregexp.h>
#include <qlabel.h>
#include <qfile.h>
#include <qlayout.h>
#include <qwhatsthis.h>

#include <iostream>

namespace {
  static const char* ARXIV_BASE_URL = "http://export.arxiv.org";
  static const char* ARXIV_SEARCH_CGI = "api/query";
}


ArXivSearcher::ArXivSearcher(QObject *parent, const char *name)
	: searcher(parent, name),m_step(Begin), m_started(false)
{
}


ArXivSearcher::~ArXivSearcher()
{
}
QString ArXivSearcher::defaultName() {
	return i18n("ArXiv");
}

QString ArXivSearcher::source() const {
	return m_name.isEmpty() ? defaultName() : m_name;
}

void ArXivSearcher::readConfig(KConfig* config_, const QString& group_) {

}


void ArXivSearcher::saveConfig(KConfig* config){
}

void ArXivSearcher::search(SearchKey key1, SearchKey key2, SearchKey key3 , const QString& value1, const QString& value2, const QString& value3, int operator1, int operator2) {
 
    QString queryString;

    if (!value1.isEmpty())
    {
        QString str = value1;
	str.simplifyWhiteSpace();
	str.replace(" ", " AND ");
	switch(key1)
        {
	case All:
	    queryString = QString::fromLatin1("all:");
            break;
        case Title:
            queryString = QString::fromLatin1("ti:");
            break;

        case Author:
            queryString = QString::fromLatin1("au:");
            break;

	case Journal:
            queryString += QString::fromLatin1("jr:");
            break;

        case Subject:
            queryString = QString::fromLatin1("sc:");
            break;

        case Abstract:
            queryString = QString::fromLatin1("abs:");
            break;

        default:
            stop();
            return;
        }
	queryString += str;
    }


    if (!value2.isEmpty() )
    {
        QString str = value2;
	str.simplifyWhiteSpace();
	str.replace(" ", " AND ");
	if (!queryString.isEmpty())
        {
            switch(operator1)
            {
            case 0:
                queryString += QString::fromLatin1(" AND ");
                break;
            case 1:
                queryString += QString::fromLatin1(" OR ");
                break;
            case 2:
                queryString += QString::fromLatin1(" ANDNOT ");
                break;
            default:
                stop();
                return;
            }

        }

        switch(key2)
        {
        case All:
	    queryString += QString::fromLatin1("all:");
            break;
        case Title:
            queryString += QString::fromLatin1("ti:");
            break;

        case Author:
            queryString += QString::fromLatin1("au:");
            break;

	case Journal:
            queryString += QString::fromLatin1("jr:");
            break;

        case Subject:
            queryString += QString::fromLatin1("sc:");
            break;

        case Abstract:
            queryString += QString::fromLatin1("abs:");
            break;

        default:
            stop();
            return;
        }
		queryString += str;
    }

    if (!value3.isEmpty() )
    {
        QString str = value3;
	str.simplifyWhiteSpace();
	str.replace(" ", " AND ");
	if (!queryString.isEmpty())
        {
            switch(operator2)
            {
            case 0:
                queryString += QString::fromLatin1(" AND ");
                break;
            case 1:
                queryString += QString::fromLatin1(" OR ");
                break;
            case 2:
                queryString += QString::fromLatin1(" ANDNOT ");
                break;
            default:
                stop();
                return;
            }

        }
        switch(key3)
        {
        case All:
	    queryString += QString::fromLatin1("all:");
            break;
        case Title:
            queryString += QString::fromLatin1("ti:");
            break;

        case Author:
            queryString += QString::fromLatin1("au:");
            break;

	case Journal:
            queryString += QString::fromLatin1("jr:");
            break;

        case Subject:
            queryString += QString::fromLatin1("sc:");
            break;

        case Abstract:
            queryString += QString::fromLatin1("abs:");
            break;

        default:
            stop();
            return;
        }
		queryString += str;

    }
	search(queryString);


}

void ArXivSearcher::search(QString queryString){
    m_started = true;

    m_data.truncate(0);

    m_url = KURL(ARXIV_BASE_URL);
    m_url.addPath(QString::fromLatin1(ARXIV_SEARCH_CGI));

    QString str;
    m_query = queryString;


    if (m_query.isEmpty())
    {
        stop();
        return;
    }
    m_url.addQueryItem(QString::fromLatin1("search_query"), m_query);
    m_url.addQueryItem(QString::fromLatin1("start"), "0");
    m_url.addQueryItem(QString::fromLatin1("max_results"), "0");


    m_step = Search;

    m_job = KIO::get(m_url, false, false);
    connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
            SLOT(slotData(KIO::Job*, const QByteArray&)));
    connect(m_job, SIGNAL(result(KIO::Job*)),
            SLOT(slotComplete(KIO::Job*)));
	
	
}

void ArXivSearcher::stop() {
	if(!m_started) {
		return;
	}
	if(m_job) {
		m_job->kill();
		m_job = 0;
	}
	m_started = false;
	m_data.truncate(0);
	m_step = Begin;
	emit signalDone(this);
}

void ArXivSearcher::slotData(KIO::Job*, const QByteArray& data_) {
	QDataStream stream(m_data, IO_WriteOnly | IO_Append);
	stream.writeRawBytes(data_.data(), data_.size());
}

void ArXivSearcher::slotComplete(KIO::Job* job_) {
  // since the fetch is done, don't worry about holding the job pointer
	m_job = 0;

	if(job_->error()) {
		emit signalMessage(job_->errorString(), 0);
		stop();
		return;
	}

	if(m_data.isEmpty()) {
		std::cerr << "ArXivSearcher::slotComplete() - no data\n";
		stop();
		return;
	}

  switch(m_step) {
	  case Search:
		  searchResults();
		  break;
	  case Fetch:
		  fetchResults();
		  break;
	  default:
		  std::cerr << "ArXivSearcher::slotComplete() - wrong step = " << m_step << "\n";
		  break;
  }
}

void ArXivSearcher::searchResults(){
	
	QString str = QString::fromUtf8(m_data, m_data.size());
	
//	std::cerr << str.ascii() << "\n";

	QDomDocument dom;
	if(!dom.setContent(m_data, false)) {
		std::cerr << "ArXivSearcher::searchResults() - server did not return valid XML.\n";
		stop();
		return;
	}

	int count = 0;
	for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) {
		if ( n.nodeName() == QString::fromLatin1 ( "opensearch:totalResults" ) )
		{
			QDomElement e = n.toElement();	
			m_total = e.text().toInt();
			break;
		}
	}

	m_waitingRetrieveRange = true;
	m_step = Wait;
	if (m_total > 0)
		emit signalQueryResult(m_total);	
	else{
		signalMessage(i18n("No reference was found"), 1);
		stop();
	}

	
}


void ArXivSearcher::retrieveRange(unsigned int min, unsigned int max){
	if (m_step != Wait)
		return;
	m_waitingRetrieveRange = false;
    if ((min < 1 && max < 1) || max < min)
    {
        stop();
        return;
    }
    m_url = KURL(ARXIV_BASE_URL);
    m_url.addPath(QString::fromLatin1(ARXIV_SEARCH_CGI));

    m_url.addQueryItem(QString::fromLatin1("search_query"), m_query);
    m_url.addQueryItem(QString::fromLatin1("start"), QString::number(min-1));
    m_url.addQueryItem(QString::fromLatin1("max_results"), QString::number(max-min+1));

    m_data.truncate(0);
    m_step = Fetch;

    m_job = KIO::get(m_url, false, false);
    connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
            SLOT(slotData(KIO::Job*, const QByteArray&)));
    connect(m_job, SIGNAL(result(KIO::Job*)),
            SLOT(slotComplete(KIO::Job*)));
	

}


void ArXivSearcher::fetchResults(){

	QDomDocument dom;
	if(!dom.setContent(m_data, false)) {
		std::cerr << "ArXivSearcher::fetchResults() - server did not return valid XML.\n";
		stop();
		return;
	}

	BibEntry *entry;
	
	for(QDomNode m = dom.documentElement().firstChild(); !m.isNull(); m = m.nextSibling()) {
		QString m_nodeName = m.nodeName();
		if ( m_nodeName == QString::fromLatin1 ( "entry" ) )
		{
			entry = new BibEntry(QString::fromLatin1("misc"), QString());

			RefField *field = BibEntryDefTable::self()->getRefField(QString::fromLatin1("author"));
			QString linkSt;
			if (field)
				linkSt = field->connectingString;
			else
				linkSt = QString::fromLatin1(" and ");
			QString author;

			for ( QDomNode n = m.firstChild(); !n.isNull(); n = n.nextSibling() )
			{
				QString n_nodeName = n.nodeName();
				if (n_nodeName == QString::fromLatin1 ("author") ){
					for (QDomNode o = n.firstChild(); !o.isNull(); o = o.nextSibling() ){
						if (o.nodeName() ==  QString::fromLatin1 ( "name" ) ){
							QDomElement e = o.toElement();
							if (!e.isNull()){
								if (!e.text().isEmpty()){
								if (author.isEmpty() )
									author = e.text();
								else	
									author = author + linkSt + e.text();
								}
							}
							break;	
						}
					}
				}
				else{
					QDomElement e = n.toElement();
					if (!e.isNull()){
						QString str = e.text();
						if (!str.isEmpty()){
							if (n_nodeName == QString::fromLatin1 ("title") )
								entry->setField(QString::fromLatin1("title"), str);
							if (n_nodeName == QString::fromLatin1 ("summary") )
								entry->setField(QString::fromLatin1("abstract"), str);
							if (n_nodeName == QString::fromLatin1 ("id") )
								entry->setField(QString::fromLatin1("url"), str);
							if (n_nodeName == QString::fromLatin1 ("arxiv:comment") )
								entry->setField(QString::fromLatin1("comment"), str);
							if (n_nodeName == QString::fromLatin1 ("<arxiv:doi>") )
								entry->setField(QString::fromLatin1("doi"), str);
							if (n_nodeName == QString::fromLatin1 ("arxiv:journal_ref") ){
								entry->setEntryType(QString::fromLatin1 ("article") );
								QRegExp regexp1(QString::fromLatin1("(.+)[ ](\\S+)[ ]\\(([\\d][\\d][\\d][\\d])\\)[ ](.+)") );
								QRegExp regexp2(QString::fromLatin1("(.+)[ ](\\S+),[ ]\\(([\\d][\\d][\\d][\\d])\\),[ ](.+)") );
								QRegExp regexp3(QString::fromLatin1("(.+)[ ](\\S+)[ ]\\(([\\d][\\d][\\d][\\d])\\),[ ](.+)") );

								if (regexp1.exactMatch(str))
        							{
									entry->setField(QString::fromLatin1("journal"), regexp1.cap(1) );
									entry->setField(QString::fromLatin1("volume"), regexp1.cap(2) );
									entry->setField(QString::fromLatin1("year"), regexp1.cap(3) );
									entry->setField(QString::fromLatin1("pages"), regexp1.cap(4) );
								}
								else if (regexp2.exactMatch(str))
        							{
									entry->setField(QString::fromLatin1("journal"), regexp2.cap(1) );
									entry->setField(QString::fromLatin1("volume"), regexp2.cap(2) );
									entry->setField(QString::fromLatin1("year"), regexp2.cap(3) );
									entry->setField(QString::fromLatin1("pages"), regexp2.cap(4) );
								}
								else if (regexp3.exactMatch(str))
        							{
									entry->setField(QString::fromLatin1("journal"), regexp3.cap(1) );
									entry->setField(QString::fromLatin1("volume"), regexp3.cap(2) );
									entry->setField(QString::fromLatin1("year"), regexp3.cap(3) );
									entry->setField(QString::fromLatin1("pages"), regexp3.cap(4) );
								}
								else
									entry->setField(QString::fromLatin1("journal"), str );
								
								
							}
						}
					}
				}	
			}
			if (!author.isEmpty())
				entry->setField(QString::fromLatin1("author"), author);

			emit signalResultFound(new BibEntry(*entry));
			delete entry;
			
		}	
	}	
	stop();

}


void ArXivSearcher::setSource(const QString s){
	m_name = s ;	
}


QStringList ArXivSearcher::searchKey(){
	QStringList keyList;
	keyList << searchManager::self()->searchKeyString(All)
			<< searchManager::self()->searchKeyString(Author)
			<< searchManager::self()->searchKeyString(Title)
			<< searchManager::self()->searchKeyString(Journal)
			<< searchManager::self()->searchKeyString(Subject)
			<< searchManager::self()->searchKeyString(Abstract);
	return keyList;
}


SearcherConfigWidget* ArXivSearcher::configWidget(QWidget* parent_)
{
    return new ArXivConfigWidget(parent_, this);
}


ArXivConfigWidget::ArXivConfigWidget(QWidget* parent_, ArXivSearcher* searcher_ /*=0*/)
        : SearcherConfigWidget(parent_)
{
	m_searcher = searcher_;
	QVBoxLayout* l = new QVBoxLayout ( optionsWidget() );
	l->addWidget ( new QLabel ( i18n ( "This source has no options." ), optionsWidget() ) );
	KURLLabel *urlLab = new KURLLabel( optionsWidget() );
	l->addWidget(urlLab);
	urlLab->setText( "More information about ArXiv");
        urlLab->setURL("http://www.arxiv.org");
    
	connect(urlLab , SIGNAL( leftClickedURL( const QString& ) ), kapp, SLOT( invokeBrowser( const QString& ) ) );
	l->addStretch();
}



#include "arxivsearcher.moc"
