/* RDFHandler.java -- Handler for RDF in XML form.
 Copyright (C) 2005  The University of Sheffield.

 This file is part of the CASheW-s editor.

 The CASheW-s editor is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2, or (at your option)
 any later version.
 
 The CASheW-s editor is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with The CASheW-s editor; see the file COPYING.  If not, write to the
 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 02111-1307 USA.
*/

package nongnu.cashews.rdf;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Handler;
import java.util.logging.Logger;

import nongnu.cashews.commons.Pair;
import nongnu.cashews.commons.PairMap;
import nongnu.cashews.commons.PairSet;
import nongnu.cashews.commons.PairStack;
import nongnu.cashews.xml.XmlBaseHandler;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/**
 * This class deals with the parsing of RDF in XML form, as defined by the 
 * <a href="http://www.w3.org/TR/rdf-syntax-grammar/">RDFXML </a> syntax
 * standard.  It turns an XML-based representation of an RDF graph into a
 * <code>Graph</code> object.
 * 
 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
 * @see Graph
 */
public class RDFHandler
  extends XmlBaseHandler
{
  
  /**
   * The RDF namespace.
   */
  public static final String
    RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
 
  /**
   * The resulting graph.
   *
   * @serial the graph of RDF triples.
   */
  private Graph graph;

  /**
   * A <code>Logger</code> instance to log events generated by the
     * parsing process.
     */
  private Logger rdfLogger;
  
  /**
   * Flag to indicate whether we are inside an RDF document
   * or not.  There is not a one-to-one correspondence between
   * an RDF document and the XML document.  RDF is to usually
   * to be expected inside an RDF element.  However, a single
   * <code>Description</code> element may occur on its own, outside
   * this context.  In either of these cases, this flag should
   * be set. 
   */
  private boolean inRDF;
  
  /**
   * Flag to indicate whether we are inside a subject node or
   * not.
   */
  private boolean inSubject;
  
  /**
   * Flag to indicate whether we are inside a predicate node or
   * not.
   */
  private boolean inPredicate;
  
  /**
   * The current triple.
   */
  private Triple triple;
  
  /**
   * The current subject in the current RDF triple.
   */
  private Subject subject;
  
  /**
   * The current predicate in the current RDF triple.
   */
  private Predicate predicate;
  
  /**
   * The current object in the current RDF triple.
   */
  private RDFObject object;
  
  /**
   * The String form of the URI of the current subject.
   */
  private String subjectURI;

  /**
   * The String form of the URI of the current predicate.
   */
  private String predicateURI;
  
  /**
   * The type of the current literal.
   */
  private Type type;
  
  /**
   * The set of ids used in the RDF document.  RDF IDs must be unique
   * within a particular base URI.
   */
  private PairSet<URI,String> ids;

  /**
   * The stack of current subject and predicate pairs.  This allows
   * us to handle nested triples, where the object of one triple
   * is the triple nested inside that triple.
   */
  private PairStack<Subject,Predicate> currentState;

  /**
   * The set of blank node IDs.
   */
  private Set<String> nodeIDs;

  /**
   * Constructs a new <code>RDFHandler</code>, using the specified
   * handler for log messages.
   *
   * @param handler the handler to use for log messages.
   */
  public RDFHandler(Handler handler)
  {
    super(handler);
    rdfLogger = Logger.getLogger("nongnu.cashews.rdf.RDFHandler");
    rdfLogger.addHandler(handler);
    rdfLogger.setLevel(handler.getLevel());
  }
  
  /**
   * Captures the start of the document and sets up the initial
   * state.
   */
  public void startDocument()
  {
    super.startDocument();
    inSubject = false;
    inRDF = false;
    inPredicate = false;
    predicateURI = null;
    type = null;
    subject = null;
    predicate = null;
    object = null;
    ids = new PairSet<URI,String>();
    graph = new Graph();
    subjectURI = null;
    currentState = new PairStack<Subject,Predicate>();
    nodeIDs = new HashSet<String>();
  }
  
  /**
   * Captures the start of an XML element.
   *
   * @param uri the namespace URI.
   * @param localName the local name of the element inside the namespace.
   * @param qName the local name qualified with the namespace URI.  This
   *              may or may not be provided, as we don't ask for namespace
   *              prefixes.
   * @param attributes the attributes of this element.
   * @throws SAXException if some error occurs in parsing.
   */
  public void startElement(String uri, String localName,
			   String qName, Attributes attributes)
    throws SAXException
  {
    super.startElement(uri, localName, qName, attributes);
    if (uri.equals(RDF_NAMESPACE))
      {
	if (localName.equals("RDF"))
	  {
	    /* rdf:RDF */
	    inRDF = true;
	    rdfLogger.finer("Start of RDF block");
	  }
	else if (localName.equals("Description"))
	  {
	    /* rdf:Description */
	    rdfLogger.finer("Start of RDF description block");
	    inRDF = true;
	    if (!inSubject)
	      parseAttributes(attributes);
	  }
	else if (localName.equals("type") && inSubject)
	  setSubjectType(attributes.getValue(RDF_NAMESPACE, "resource"));
	else
	  rdfLogger.warning("Invalid use of RDF namespace: " + uri +
			    localName);
      }
    else
      {
	if (inRDF)
	  {
	    if (inSubject)
	      {
		/* Predicate element */
		predicateURI = uri + localName;
		rdfLogger.finer("Start of predicate: " + predicateURI);
		predicate = parseRDFURI(predicateURI);
		rdfLogger.fine("Created predicate: " + predicate);
		inSubject = false;
		inPredicate = true;
		/* Check for RDF URI object */
		String value = attributes.getValue(RDF_NAMESPACE,
						   "resource");
		if (value != null)
		  {
		    object = parseRDFURI(value);
		    rdfLogger.fine("Created object: " + object);
		  }
		else
		  {
		    /* Check for blank node object */
		    value = attributes.getValue(RDF_NAMESPACE, "nodeID");
		    if (value != null)
		      {
			object = new Blank(value);
			rdfLogger.fine("Created object: " + object);
		      }
		  }
		/* Check for a type */
		value = attributes.getValue(RDF_NAMESPACE, "datatype");
		if (value != null)
		  type = TypeFactory.getInstance(value);
	      }
	    else
	      {
		/* Abbreviation of typed rdf:Description */
		parseAttributes(attributes);
		subjectURI = uri + localName;
		rdfLogger.finer("Start of subject with type: " + subjectURI);
		setSubjectType(subjectURI);
	      }
	  }
      }
  }
  
  /**
   * Attempts to returns the RDF URI parsed from the specified
   * <code>String</code>.
   *
   * @param value the value to parse.
   * @return an RDF URI.
   * @throws SAXException if the URI can't be parsed.
   */
  private RDFURI parseRDFURI(String value)
    throws SAXException
  {
    try
      {
	return new RDFURI(new URI(value));
      }
    catch (URISyntaxException e)
      {
	throw new SAXException("Failed to parse URI: " + e, e);
      }
  }
  
  /**
   * Captures characters within an XML element.
   *
   * @param ch the array of characters.
   * @param start the start index of the characters to use.
   * @param length the number of characters to use from the start index on.
   * @throws SAXException if some error occurs in parsing.
   */
  public void characters(char[] ch, int start, int length)
    throws SAXException
  {
    super.characters(ch, start, length);
    String value = new String(ch, start, length).trim();
    if (value.length() == 0)
      return;
    rdfLogger.finer("Characters: " + value);
    if (inPredicate)
      {
	if (type == null)
	  object = new Literal(value);
	else
	  object = new Literal(value, type);
	rdfLogger.fine("Created object: " + object);
      }
  }
  
  /**
   * Captures the end of an XML element.
   *
   * @param uri the namespace URI.
   * @param localName the local name of the element inside the namespace.
   * @param qName the local name qualified with the namespace URI.  This
   *              may or may not be provided, as we don't ask for namespace
   *              prefixes.
   * @throws SAXException if some error occurs in parsing.
   */
  public void endElement(String uri, String localName,
			 String qName)
    throws SAXException
  {
    super.endElement(uri, localName, qName);
    if (uri.equals(RDF_NAMESPACE))
      {
	if (localName.equals("RDF"))
	  {
	    inRDF = false;
	    rdfLogger.finer("End of RDF block");
	  }
	else if (localName.equals("Description"))
	  endSubject();
      }
    else if (inPredicate && predicateURI.equals(uri + localName))
      {
	inPredicate = false;
	inSubject = true;
	predicateURI = null;
	type = null;
	triple = new Triple(subject, predicate, object);
	rdfLogger.fine("Created triple: " + triple);
	graph.addTriple(triple);
	rdfLogger.finer("End of predicate block");
      }
    else if (inSubject && subjectURI.equals(uri + localName))
      endSubject();
  }

  /**
   * Retrieves the graph created by the parsing process.
   *
   * @return the graph resulting from the parse.
   */
  public Graph getGraph()
  {
    return graph;
  }

  /**
   * Parses the attribute into a collection, from which used
   * attributes can be removed, and then passes them on to
   * the handler.
   *
   * @param attributes the attributes to sort out.
   * @throws SAXException if some error occurs in parsing.
   */
  private void parseAttributes(Attributes attributes)
    throws SAXException
  {
    PairMap<String,String,String> attribs =
      new PairMap<String,String,String>();
    for (int a = 0; a < attributes.getLength(); ++a)
      attribs.put(attributes.getURI(a), attributes.getLocalName(a),
		  attributes.getValue(a));
    rdfLogger.finer("Attributes: " + attribs);
    handleAttributes(attribs);
  }

  /**
   * Handles the attributes resulting from an RDF element.
   *
   * @param attributes the attributes to sort out.
   * @throws SAXException if some error occurs in parsing.
   */
  protected void handleAttributes(PairMap<String,String,String> attributes)
    throws SAXException
  {
    if (inPredicate)
      {
	/* A nested triple */
	rdfLogger.finer("Left state: " + 
			currentState.push(subject,predicate));
	inPredicate = false;
	subject = null;
	predicate = null;
      }
    /* Check for RDF URI subject */
    String value = attributes.get(RDF_NAMESPACE, "about");
    if (value != null)
      {
	subject = parseRDFURI(value);
	attributes.remove(RDF_NAMESPACE, "about");
      }
    else
      {
	value = attributes.get(RDF_NAMESPACE, "ID");
	if (value != null)
	  {
	    attributes.remove(RDF_NAMESPACE, "ID");
	    boolean added =
	      ids.add(getBaseURI(), value);
	    if (added)
	      subject = new
		RDFURI(getBaseURI().resolve("#" + value));
	    else
	      rdfLogger.severe("Duplicate node ID: " 
			       + getBaseURI().resolve("#" + value));
	  }
	else
	  {
	    /* Check for blank node subject */
	    value = attributes.get(RDF_NAMESPACE, "nodeID");
	    if (value != null)
	      {
		attributes.remove(RDF_NAMESPACE, "nodeID");
		boolean added = nodeIDs.add(value);
		if (added)
		  subject = new Blank(value);
		else
		  rdfLogger.severe("Duplicate blank node ID: " + value);
	      }
	    else
	      subject = new Blank(generateBlankID());
	  }
      }
    if (subject == null)
      rdfLogger.severe("No subject found in RDF " + 
		       "description.");
    else 
      {
	inSubject = true;
	rdfLogger.fine("Created subject: " + subject);
      }
    /* Handle any remaining attributes as property attributes */
    for (Map.Entry<Pair<String,String>,String> entry : attributes.entrySet())
      {
	Pair<String,String> key = entry.getKey();
	triple = new Triple(subject, 
			    parseRDFURI(key.getLeft() + key.getRight()),
			    new Literal(entry.getValue()));
	rdfLogger.fine("Created triple using property attribute: " + triple);
	graph.addTriple(triple);
      }
  }

  /**
   * Sets the type of the subject node to the value
   * parsed from the supplied string.
   *
   * @param value the value to parse.
   */
  private void setSubjectType(String value)
  {
    if (value != null && subject instanceof Node)
      {
	Node node = (Node) subject;
	node.setType(value);
	rdfLogger.fine("Found type: " + node.getType());
      }
  }

  /**
   * Generate an ID for a blank node.
   *
   * @return a blank node ID.
   */
  private String generateBlankID()
  {
    boolean added = false;
    String randomID;
    do
      {
	randomID = Long.toString(Math.round(Math.random() * Long.MAX_VALUE));
	added = nodeIDs.add(randomID);
      } while (!added);
    return randomID;
  }

  /**
   * Handle the end of a subject node, including one that is
   * nested inside another.  Subject nodes take two forms, so
   * it becomes necessary to have common handling for these.
   */
  private void endSubject()
  {
    inSubject = false;
    subjectURI = null;
    predicateURI = null;
    if (currentState.empty())
      subject = null;
    else
      {
	/* Closure of nested triple; return to previous state */
	object = (Node) subject;
	Pair<Subject,Predicate> previous = currentState.pop();
	rdfLogger.finer("New state: " + previous);
	subject = previous.getLeft();
	predicate = previous.getRight(); 
	if (subject instanceof Node)
	  {
	    URI subjectType = ((Node) subject).getType();
	    if (subjectType != null)
	      subjectURI = subjectType.toString();
	  }
	if (predicate instanceof RDFURI)
	  predicateURI = ((RDFURI) predicate).getURI().toString();
	inPredicate = true;
      }
    rdfLogger.finer("End of subject block");
  }

}
