/* 
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 * 
 * The Original Code is the Sablotron XSLT Processor.
 * 
 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
 * Portions created by Ginger Alliance are Copyright (C) 2000 Ginger
 * Alliance Ltd. All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU General Public License Version 2 or later (the
 * "GPL"), in which case the provisions of the GPL are applicable 
 * instead of those above.  If you wish to allow use of your 
 * version of this file only under the terms of the GPL and not to
 * allow others to use your version of this file under the MPL,
 * indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by
 * the GPL.  If you do not delete the provisions above, a recipient
 * may use your version of this file under either the MPL or the
 * GPL.
 */

//  parser.cpp

#include "parser.h"
#include "situa.h"
#include "tree.h"
#include "error.h"
#include "proc.h"

// for the windows-1250 encoding:
#include "enc1250.h"
// (will change to encodings.h)

// 
//
//      TreeConstructer
//
//

SAXHandler TreeConstructer::myHandlerRecord =
{
    tcStartDocument,
    tcStartElement,
    tcEndElement,
    tcStartNamespace,
    tcEndNamespace,
    tcComment,
    tcPI,
    tcCharacters,
    tcEndDocument
};

TreeConstructer::TreeConstructer(Processor *theProcessor_)
:
theProcessor( NZ(theProcessor_) )
{
    theParser = NULL;
    theTree = NULL;
    theDataLine = NULL;
    theLineNumber = 0;
    theSituation = theProcessor_->situation;
    theOutputter = NULL;
}

TreeConstructer::~TreeConstructer()
{
    cdelete(theOutputter);
}


eFlag TreeConstructer::parseDataLineUsingGivenExpat(
    Tree *t, DataLine *d, XML_Parser theParser_)
{
    theTree = t;
    theDataLine = d;
    theParser = theParser_;
    E( feedDocumentToParser(this) );
    E( t -> parseFinished() );
    if (t -> XSLTree)
        t -> stripped += t -> root.strip();
    return OK;
};


eFlag TreeConstructer::parseDataLineUsingExpat(
    Tree *t, DataLine *d)
{
    theParser = XML_ParserCreateNS(NULL, THE_NAMESPACE_SEPARATOR);
    M( theSituation, theParser );
    // XML_UseParserAsHandlerArg(parser);
    XML_SetElementHandler(theParser, 
        tcStartElement,
        tcEndElement);
    XML_SetCharacterDataHandler(theParser, 
        tcCharacters);
    XML_SetNamespaceDeclHandler(theParser, 
        tcStartNamespace, 
        tcEndNamespace);
    XML_SetCommentHandler(theParser,
        tcComment);
    XML_SetProcessingInstructionHandler(theParser,
        tcPI);
    XML_SetUnknownEncodingHandler(theParser, 
        tcUnknownEncoding, 
        NULL);
    XML_SetExternalEntityRefHandler(theParser,
        tcExternalEntityRef);
    XML_SetUserData(theParser, this);
    XML_SetBase(theParser, theProcessor -> findBaseURI( t -> name ));

    E( parseDataLineUsingGivenExpat(t, d, theParser) );

    XML_ParserFree(theParser);
    return OK;
};

eFlag TreeConstructer::parseUsingSAX(Tree *t)
{
    theTree = t;
    theDataLine = NULL;
    M( theSituation, theOutputter = new OutputterObj(theProcessor) );
    // register the handler with the outputter
    E( theOutputter -> setOptionsSAX(&myHandlerRecord, this) );
    E( theOutputter -> eventBeginOutput() );
    return OK;
}

eFlag TreeConstructer::parseUsingSAXForAWhile()
// it's assumed that this is a tree constructer over a dataline
// (not a SAX one)
{
    // removing: 
    //      assert(!theOutputter);
    // since parseUsingSaxForAWhile can be called recursively
    assert(theTree && theDataLine);
    M( theSituation, theOutputter = new OutputterObj(theProcessor) );
    // register the handler with the outputter
    E( theOutputter -> setOptionsSAX(&myHandlerRecord, this) );
    E( theProcessor -> pushOutputter(theOutputter) );
    E( theOutputter -> eventBeginOutput() );
    return OK;
}

eFlag TreeConstructer::parseUsingSAXForAWhileDone()
{
    assert(theOutputter);
    E( theOutputter -> eventEndOutput() );
    E( theProcessor -> popOutputter() ); // deletes the outputter
    // the following was: 
    //   theOutputter = NULL;
    theOutputter = theProcessor -> outputter();
    return OK;
}

int TreeConstructer::getCurrentLineNumber()
{
    if (theParser)
        return XML_GetCurrentLineNumber(theParser);
    else
        return theLineNumber;
}


/* static */
eFlag TreeConstructer::feedDocumentToParser(void* constructer)
{
    Bool quit = FALSE, firstTime = TRUE;
    int res;
    int bytes;
    
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    SituationObj &situation_ = *(this_ -> theSituation);

    if (situation_.isError())
        return NOT_OK;
    XML_Parser parser = NZ( this_ -> theParser );

    situation_.pushCurrent();
    situation_.setCurrFile(this_ -> theDataLine -> fullUri);

    // this is to add the 'xml' namespace declarations
    //
    tcStartNamespace(constructer, "xml", theXMLNamespace);

    while (!quit)
    {
        char *buf = (char*) XML_GetBuffer(parser, PARSE_BUFSIZE);
        bytes = this_ -> theDataLine -> get(buf, PARSE_BUFSIZE); 
        if (situation_.isError())
        {
            XML_ParserFree(parser);
            return NOT_OK;
        };
        quit = (Bool) (bytes < PARSE_BUFSIZE);
        // res = XML_Parse(parser,buf,bytes,quit);
        res = XML_ParseBuffer(parser,bytes,quit);
        if (situation_.isError())
            return NOT_OK;
        if (!res) 
        {
            // situation_.setCurrFile(t -> name); - already set
            // hack to avoid an apparent bug in expat causing crashes when an UTF-8 text
            // happens to start with a byte order mark by mistake
            if (!(firstTime && buf[0] == (char) 0xEF && buf[1] == (char) 0xBB && buf[2] == (char) 0xBF))
                situation_.setCurrLine(XML_GetCurrentLineNumber(parser));
            int code = XML_GetErrorCode(parser); 
            Str eCodeStr, eNameStr;
            eCodeStr = code;
            eNameStr = (char*) XML_ErrorString(code);
            Err2(this_ -> theSituation, E_XML, eCodeStr, eNameStr);
            XML_ParserFree(parser);
        }
        firstTime = FALSE;
    }
    // remove the 'xml' namespace declarations
    //
    tcEndNamespace(constructer, "xml");

    situation_.popCurrent();
    return OK;
}

//
//  tcStartDocument
//  callback for the document start event
//

/* static */
void TreeConstructer::tcStartDocument(
    void* constructer)
{
/*
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;
*/
};



//
//  tcStartElement
//  callback for the element start event
//

/* static */
void TreeConstructer::tcStartElement(
    void *constructer,const char *elName,const char **atts)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    Tree *t = this_ -> theTree;
    char **p = (char**) atts;
    XSL_OP opCode;
    XSL_ATT attCode;
    BOOL itsXSL = FALSE;
    Vertex *v;
    Attribute *a;
    QName q(this_ -> theProcessor);
    int elemLine;

    if (this_ -> theSituation -> isError())
        return;
    this_ -> theSituation -> setCurrLine(
        elemLine = this_ -> getCurrentLineNumber());
    if (setQNameFromExpat(this_, q, elName))
        return;
    if ((t -> XSLTree) && (q.uri == this_ -> theProcessor -> stdPhrase(PHRASE_XSL_NAMESPACE)))
    {
        itsXSL = TRUE;
        opCode = (XSL_OP) lookup((char*) q.getLocal(),xslOpNames);
        if (opCode == XSL_NONE)
        {
            this_ -> theSituation -> error(ET_BAD_XSL, 
					   "" /**theEmptyString*/, 
					   "" /**theEmptyString*/);
            return;
        };
        v = new(this_ -> theProcessor -> getArena()) XSLElement(q, t, opCode, this_ -> theProcessor);
    }
    else
        v = new(this_ -> theProcessor -> getArena()) Element(q, t, this_ -> theProcessor);
    v -> lineno = elemLine;
    t -> appendVertex(v);
    t -> pendingNS.giveCurrent(toE(v) -> namespaces, t);
    toE(v) -> name.findPrefix(toE(v) -> namespaces);
    
    while(*p)
    {
        if (setQNameFromExpat(this_, q, (char *)p[0]))
            return;
        q.findPrefix(toE(v) -> namespaces);
        attCode = (itsXSL ? 
            (XSL_ATT) lookup((char*)q.getLocal(),xslAttNames) : XSLA_NONE);
        a = new(this_ -> theProcessor -> getArena()) 
            Attribute(q,p[1],attCode,this_ -> theProcessor);
        a -> lineno = this_ -> getCurrentLineNumber();
        t -> appendVertex(a);
        p += 2;
    };
    
    if (itsXSL)
    {
        toX(v) -> checkAtts(); 
        // also check if a toplevel element does not have a non-stylesheet parent
        toX(v) -> checkToplevel(); 
    }
    else
    {
        if (t -> XSLTree)
        {
            int k, 
                kLimit = toE(v) -> atts.number();
            for (k = 0; k < kLimit; k++)
                toA(toE(v) -> atts[k]) -> buildExpr(
                    TRUE, EX_NONE);
            // this calls situation.error() on error
        }
    }
}


//
//  tcEndElement
//  callback for the element end event
//

/* static */
void TreeConstructer::tcEndElement(
    void* constructer, const char* name)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    Vertex *v = NZ( t -> stackTop );

    t -> flushPendingText();
    
    if (t -> XSLTree)
        t -> stripped += 
        (isXSLElement(v)? toX(v) : cast(Daddy*, v))
            -> strip();

    if(isXSLElement(v))
    {
        // situation.error() is called in the following
        if (toX(v) -> checkChildren())
            return;
    }
    if (this_ -> theProcessor -> processVertexAfterParse(v, t, this_))
        return;
    
    // t -> popVertex();    this is done in processVertexAfterParse
}


//
//  tcStartNamespace
//  callback for the namespace scope start event
//

/* static */
void TreeConstructer::tcStartNamespace(
    void* constructer, const char* prefix, const char* uri)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    Vertex *newv;

    Phrase prefixPh, uriPh;
    if (prefix)
        this_ -> theProcessor -> dict().insert(prefix, prefixPh);
    else
        prefixPh = UNDEF_PHRASE;
    this_ -> theProcessor -> dict().insert(uri, uriPh);

    t -> pendingNS.appendAndSetOrdinal(
        newv = new(this_ -> theProcessor -> getArena()) NmSpace(
            prefixPh, uriPh, this_ -> theProcessor));
    newv -> lineno = this_ -> getCurrentLineNumber();
    
    // warn on obsolete namespace
    if (uri && !strcmp(oldXSLTNamespace, uri)) /* _PH_ */
        Warn1(this_ -> theSituation, W1_OLD_NS_USED, (char*)uri)
    else
    {
        if (prefix && !strcmp(prefix, "xsl") && 
	      uri && strcmp(theXSLTNamespace, uri)) /* _PH_ */
	  Warn1(this_ -> theSituation, W1_XSL_NOT_XSL, (char*) uri);
    }
};



//
//  tcEndNamespace
//  callback for the namespace scope end event
//

/* static */
void TreeConstructer::tcEndNamespace(
    void* constructer, const char* prefix)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

#ifdef _DEBUG
    // hope this works
    Phrase prefixPh;
    if (prefix)
        prefixPh = this_ -> theProcessor -> dict().lookup(prefix);
    else
        prefixPh = UNDEF_PHRASE;
    assert(toNS(t -> pendingNS.last()) -> prefix == prefixPh);
#endif
    t -> pendingNS.freelast(FALSE);
};


//
//  tcComment
//  callback for the comment event
//

/* static */
void TreeConstructer::tcComment(
    void* constructer, const char* contents)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    if (t -> XSLTree)
        return;

    Comment *newNode;
    newNode = new(this_ -> theProcessor -> getArena()) Comment(contents, this_ -> theProcessor);
    newNode -> lineno = this_ -> getCurrentLineNumber();
    t -> appendVertex(newNode);
};


//
//  tcPI
//  callback for the processing instruction event
//

/* static */
void TreeConstructer::tcPI(
    void* constructer, const char* target, const char* contents)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    if (t -> XSLTree)
        return;

    ProcInstr *newNode;
    Phrase targetPh;
    this_ -> theProcessor -> dict().insert(target, targetPh);

    newNode = new(this_ -> theProcessor -> getArena())
        ProcInstr(targetPh, contents, this_ -> theProcessor);
    newNode -> lineno = this_ -> getCurrentLineNumber();
    t -> appendVertex(newNode);
};


//
//  tcCharacters
//  callback for the character data ("text") event
//

/* static */
void TreeConstructer::tcCharacters(
    void* constructer, const char* contents, int length)
{
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;

    Vertex *newVertex;
    if (!!(newVertex = t -> appendText((char *) contents, length)))
        newVertex -> lineno = this_ -> getCurrentLineNumber();
};



//
//  tcEndDocument
//  callback for the document end event
//

/* static */
void TreeConstructer::tcEndDocument(
    void* constructer)
{
/*
    TreeConstructer *this_ =
        (TreeConstructer*) constructer;
    if (this_ -> theSituation -> isError())
        return;
    Tree *t = this_ -> theTree;
*/
};


//
//  tcUnknownEncoding
//  callback for the unknown encoding event (expat)
//  needs to have "enc1250.h" included
//


/* static */
int TreeConstructer::tcUnknownEncoding(
    void *encodingHandlerData, const char *name, XML_Encoding *info)
{
    int *theTable;
    if (strEqNoCase((char*) name,"windows-1250"))
        theTable = Enc1250;
    else if (strEqNoCase((char*) name,"iso-8859-2"))
        theTable = EncLatin2;
    else
        return 0;
    int i;
    for (i = 0; i < 0x80; i++)
    {
        info -> map[i] = i;
        info -> map[i + 0x80] = theTable[i];
    }
    info -> map[0x7f] = -1;
    info -> data = NULL;
    info -> convert = NULL;
    info -> release = NULL;
    return 1;
};

//
//  tcExternalEntity
//  callback for the external entity reference event (expat)
//

/* static */
int TreeConstructer::tcExternalEntityRef(
    XML_Parser parser, const char* context, const char* base,
    const char* systemId, const char* publicId)
{
    TreeConstructer *this_ =
        (TreeConstructer*) XML_GetUserData(parser);
    if (this_ -> theSituation -> isError())
        return 0;
    Tree *t = this_ -> theTree;

    SituationObj* situation = this_ -> theSituation;

    Log1(situation, L1_READING_EXT_ENTITY, systemId);    
    XML_Parser newParser =
        XML_ExternalEntityParserCreate(parser, context, /* encoding= */ NULL);
    DataLine *newDL;
    Str absolute;
    E( makeAbsoluteURI(systemId, base, absolute) );
    E( this_ -> theProcessor -> addLineNoTree(newDL, absolute, t -> XSLTree) );
    if (!newParser || !newDL)
        return 0;

    TreeConstructer *newTC;
    M( situation, newTC = new TreeConstructer(this_ -> theProcessor));
    if (newTC -> parseDataLineUsingGivenExpat(t, newDL, newParser))
        return 0;
    
    XML_ParserFree(newParser);
    delete newTC;
    return 1;
}

/* static */
eFlag TreeConstructer::setQNameFromExpat(TreeConstructer* this_, QName& qname_, const char* text)
{
    char *p = (char*) text,
        *q = strchr(p, THE_NAMESPACE_SEPARATOR);
    if (q)
    {
        *q = 0;
        qname_.setUri(p);
        *q = NS_SEP;
        qname_.setLocal(q+1);
        if (strchr(q+1,':'))
        {
            DStr msg = "{";
            msg += qname_.getUri();
            msg += "}:";
            msg += qname_.getLocal();
            Err1(this_ -> theProcessor -> situation, E1_EXTRA_COLON, (char *)msg);
        }
    }
    else
    {
        qname_.uri = UNDEF_PHRASE;
        qname_.setLocal(p);
        char *isColon;
        qname_.prefix = UNDEF_PHRASE;
        if (!!(isColon = strchr(p,':')))
        {
            *isColon = 0;

            // fixing what appears a bug in expat - sometimes the xml:lang attr arrives unexpanded
            // apparently only in XSL and when it's not at the top level
            if (!strEqNoCase(p,"xml"))
                Err1(this_ -> theProcessor -> situation, ET_BAD_PREFIX,p)
            else
            {
                qname_.setLocal(isColon + 1);
                qname_.uri = this_ -> theProcessor -> stdPhrase(PHRASE_XML_NAMESPACE);
                E( this_ -> theProcessor -> dict().insert("xml",qname_.prefix) );                
            }
        }
    };
    return OK;
}

OutputterObj *TreeConstructer::getOutputter()
{
    return theOutputter;
}
