/*
Copyright (C) 2000-2010  Ministere de la culture et de la communication (France), AJLSM
See LICENCE file
 */
package fr.gouv.culture.sdx.search.lucene.query;

import java.io.IOException;
import java.text.Collator;
import java.util.BitSet;
import java.util.Locale;
import java.util.TreeMap;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.regex.JavaUtilRegexCapabilities;
import org.apache.lucene.search.regex.RegexCapabilities;
import org.apache.lucene.search.regex.RegexTermEnum;
import org.apache.regexp.RE;
import org.apache.regexp.RESyntaxException;

import fr.gouv.culture.sdx.exception.SDXException;
import fr.gouv.culture.sdx.exception.SDXExceptionCode;
import fr.gouv.culture.sdx.search.lucene.Field;
import fr.gouv.culture.sdx.utils.Bits;
import fr.gouv.culture.sdx.utils.Utilities;
import fr.gouv.culture.sdx.utils.logging.LoggingUtils;

/*
 * Cette classe étend la classe Terms de SDX.
 * 
 * Elle n'est intéressante que dans le cas où l'on cherche à filtrer
 * la liste de termes à retourner sur une valeur avec ou sans troncature.
 * Dans ce cas, elle est 10 fois plus rapide que la classe Terms originale.
 * Ce gain de performance est dû à l'utilisation d'un {@link RegexTermEnum}.
 */
/** A list of terms for a document base filtered against a regular expression.
 * 
 * This class optimizes {@link Terms} for the lists of terms filtered by a value
 * with or without wildcard. In this case, the class is 10th faster than {@link Terms}.
 * This gain comes with the use of a {@link RegexTermEnum}.
 * 
 * @author Malo Pichot <malo.pichot@asjlm.com>
 * @since SDX v. 2.4.1
 */
public class RegexTerms extends Terms {
	
	private RegexCapabilities m_regexImpl;
	private String m_prefix = "";
	private String m_field = ""; // the field name
	private String m_value = ""; // the value as a filter ; its original version
	private String m_regex; // the regular expression builds with the value (m_value)
	private String m_internalFieldName = "";//interning the field name for better performance with Lucene
	private IndexReader m_reader; // the Lucene index reader
	
	/**Creates a list of terms according to a chain with truncation
    *
    *  <p>
    * A super.getLog() must be set and then the Terms must be setUp.
    *
    * @see #enableLogging
    * @see #setUp
    */
	public RegexTerms() {
	}
	
	/** Sets the RegexCapabilities to this RegexTerms
	 * 
	 * <p>Sets the RegexCapabilities to this RegexTerms. If the RegexCapabilities
	 * passed to the method is null, uses {@link JavaUtilRegexCapabilities}
	 * </p>
	 * 
	 * @param regexCapabilities The {@link RegexCapabilities} to set
	 */
	public void setRegexCapabilities (RegexCapabilities regexCapabilities) {
		this.m_regexImpl = regexCapabilities;
    	if (null == m_regexImpl) m_regexImpl = new JavaUtilRegexCapabilities();
	}
	
	/** Sets the SDX {@link SearchLocations} to this RegexTerms
	 * 
	 * Sets the SDX {@link SearchLocations} to this RegexTerms. Throws a warning message
	 * when the {@link SearchLocations} contains more than one Lucene Reader.
	 * 
	 * @param searchLocations	The {@link SearchLocations} to set.
	 * 
	 * @see fr.gouv.culture.sdx.search.lucene.query.AbstractResponse#setSearchLocations(fr.gouv.culture.sdx.search.lucene.query.SearchLocations)
	 */
	public void setSearchLocations (SearchLocations searchLocations) {
		super.setSearchLocations(searchLocations);
        // If there is more than one Reader in the searchlocation, then log a warning
        if (super._searchLocations.size() > 1) {
        	this._searchLocations.getFirstDocumentBase().getId();
        	SDXException sdx_e = new SDXException(SDXExceptionCode.WARN_USE_FIRST_SEARCHLOCATION,null);
        	LoggingUtils.logWarn(this._logger, sdx_e.getMessage(_locale) );
        }
	}
	
	/** Sets the field to this RegexTerms
	 * 
	 * @param field	The field to set
	 * @throws SDXException
	 */
	public void setField (String field) throws SDXException {
		if (!Utilities.checkString(field)) {
        	throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_FIELD_NAME_NULL, null, null);
        }
		else {
			this.m_field = field;
			this.m_internalFieldName = field.intern();
			setFieldFilter(field);
		}
	}
	
	/** Sets the {@link IndexReader} for this RegexTerms
	 * 
	 * @param reader	The {@link IndexReader} to set
	 */
	public void setIndexReader (IndexReader reader) {
		this.m_reader = reader;
	}
	
	/** Sets the regular expression that may filter the list of terms
	 * 
	 * @param value The regular expression (as a {@link String}) to set. 
	 * @throws SDXException	Throws a SDXException for incorrect regular expression
	 */
	public void setRegex (String value)
		throws SDXException {
		this.m_value = value;
		if (!Utilities.checkString(value)) { // warns a message on the empty value
			SDXException sdx_e = new SDXException(SDXExceptionCode.WARN_STRING_FILTER_NULL,null);
			LoggingUtils.logWarn(this._logger, sdx_e.getMessage(_locale));
		}
		try { // Tests the value ; it musts be a valid regular expression
			RE _testRegex = org.apache.regexp.REUtil.createRE(value);
			this.m_regex = value;
		} catch (RESyntaxException e) {
			String[] args = new String[1];
			args[0] = value;
			throw new SDXException (this._logger, SDXExceptionCode.ERROR_PARSE_REGULAR_EXPRESSION, args, e);
		}
	}
	
	/**
     *	Builds a term list filtered by a SDX {@link Results}, and optionally
     * a value.
     *
     *  @param	searchLocations	The SDX Search Locations object
     *  @param	results	The Results object to extract the Lucene Query
     *	@param	field	The field name.
     *  @param	value	The value (may be null)
     */
    public void setUp(SearchLocations sLocs, Results sdxResults, String field, String value) 
    		throws SDXException
	{
    	setUp(sLocs, sdxResults.getQuery(), field, value);
    }

    /**
     *	Builds a term list filtered by a {@link Query}, and optionally 
     * a value.
     *
     *  @param	searchLocations	The SDX Search Locations object
     *  @param	sdxQuery	The SDX Query object to extract the Lucene Query
     *	@param	field	The field name.
     *  @param	value	The value (may be null)
     */
    public void setUp(SearchLocations sLocs, Query sdxQuery, String field, String value) 
    throws SDXException 
    {
    	if (sdxQuery==null || sdxQuery.getLuceneQuery()==null) {
    		throw new SDXException(null, SDXExceptionCode.ERROR_QUERY_NULL, null, null);
    	}
    	setUp(sLocs, sdxQuery.getLuceneQuery(), field, value);
    }
    
    /**
     * Builds a term list filtered by a SDX {@link Query}, and optionally
     * a value.
     * 
     * @param searchLocations	The SDX Search Locations object.
     * @param sdxQuery	The SDX {@link Query}.
     * @param field	The field name.
     * @param value	The value (may be null).
     * @param regexCapabilities	The {@link RegexCapabilities}.
     * @throws SDXException
     */
    public void setUp(SearchLocations sLocs, Query sdxQuery, String field, String value, RegexCapabilities regexCapabilities) 
    throws SDXException 
    {
    	if (sdxQuery==null || sdxQuery.getLuceneQuery()==null) {
    		throw new SDXException(null, SDXExceptionCode.ERROR_QUERY_NULL, null, null);
    	}
    	setUp(sLocs, sdxQuery.getLuceneQuery(), field, value, regexCapabilities);
    }
    
    /**
     *	Builds a term list filtered by a Lucene {@link org.apache.lucene.search.Query}, and
     * optionally a value.
     *
     *  @param	searchLocations	The SDX Search Locations object.
     *  @param	query	The Lucene Query object
     *	@param	field	The field name.
     *  @param	value	The value (may be null).
     *  @throws SDXException
     */
    public void setUp(SearchLocations sLocs, org.apache.lucene.search.Query luceneQuery, String field, String value) 
    throws SDXException 
    {
    	setUp(sLocs, luceneQuery, field, value, null);
    }
    
    /** Builds a term list filtered by a Lucene {@link org.apache.lucene.search.Query}, and
     * optionally a value.
     *
     * @param	searchLocations	The SDX Search Locations object
     * @param	query	The Lucene {@link org.apache.lucene.search.Query} object.
     * @param	field	The field name.
     * @param	value	The value (may be null).
     * @param regexCapabilities	The {@link RegexCapabilities}.
     * @throws SDXException
     */
    public void setUp(SearchLocations sLocs, org.apache.lucene.search.Query luceneQuery, String field, String value, RegexCapabilities regexCapabilities) 
    throws SDXException 
    {

    	if (sLocs == null) { // Throws an exception if SearchLocations is not valid
    		throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_SEARCHLOCATIONS_NULL, null, null);
    	}
    	
    	// sets the field
    	setField(field);
    	
    	// sets the regular expression
    	setRegex(value);
    	
    	// sets the RegexCapabilities
    	setRegexCapabilities(regexCapabilities);
        
    	// Sets the SDX Search Locations
    	this.setSearchLocations(sLocs);
    	
    	// Sets the IndexReader
    	setIndexReader( sLocs.getReader() );

        initCollator(super._searchLocations.getField(field));
        termList = new TreeMap(sortCollator);
        
        // Builds the query that filter the list
    	QueryWrapperFilter qwf;
    	try {
    		qwf = new QueryWrapperFilter( luceneQuery );
    	} catch (Exception e) {
    		String[] args = new String[1];
    		args[0] = e.getMessage();
    		throw new SDXException(null, SDXExceptionCode.ERROR_LUCENE_QUERY_NULL, args, e);
    	} finally {
    		luceneQuery = null;
    	}

    	try {
    		termList = getRegexTerms( qwf.bits(m_reader) );
    	} catch (RESyntaxException res) {
    		String[] args = new String[1];
    		args[0] = this.m_value;
    		throw new SDXException(_logger, SDXExceptionCode.ERROR_PARSE_REGULAR_EXPRESSION, args, res);
    	} catch (Exception e) {
    		String[] args = new String[2];
    		args[0] = sLocs.getReader().directory().toString();
    		args[1] = e.getMessage();
    		throw new SDXException(null, SDXExceptionCode.ERROR_LUCENE_READ, args, e);
    	} finally {
    		closeReader();
    		qwf = null;
    	}

    	super.setNbPages(countPages());

    }
    
    /** Builds a list of terms from a field, optionally filtered by a regular expression.
     * 
     * @param The {@link SearchLocations} that indicates the document base to work on.
     * @param The field (as a {@link String}).
     * @param The value (as a {@link String}). Must be a valid regular expression. May be null or empty.
     * @throws SDXException
     */
    public void setUp (SearchLocations sLocs, String field, String value)
    	throws SDXException
    {
    	setUp (sLocs, field, value, null);
    }
    
    /** Builds a list of terms from a field, optionally filtered by a regular expression.
     * 
     * @param The {@link SearchLocations} that indicates the document base to work on.
     * @param The field (as a {@link String}).
     * @param The value (as a {@link String}). Must be a valid regular expression. May be null or empty.
     * @param The {@link RegexCapabilities} to use with the {@link RegexTermEnum}.
     * @throws SDXException
     */
    public void setUp (SearchLocations sLocs, String field, String value, RegexCapabilities regexCapabilities) 
    throws SDXException 
    {

    	if (sLocs == null) { // Throws an exception if SearchLocations is not valid
    		throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_SEARCHLOCATIONS_NULL, null, null);
    	}
    	
    	// sets the field
    	setField(field);
    	
    	// sets the regular expression
    	setRegex(value);
    	
    	// sets the RegexCapabilities
    	setRegexCapabilities(regexCapabilities);
        
    	// Sets the SDX Search Locations
    	this.setSearchLocations(sLocs);
    	
    	// Sets the IndexReader
    	setIndexReader( sLocs.getReader() );
    	
    	Field _field = this._searchLocations.getField(m_field);

        initCollator(_field);
        termList = new TreeMap(sortCollator);

        RegexTermEnum m_regexTermEnum = null;
    	try {
    		m_regexTermEnum = new RegexTermEnum( m_reader, new Term(field, value ), m_regexImpl );
    		buildTermList(m_regexTermEnum);
    	} catch (RESyntaxException res) {
    		String[] args = new String[1];
    		args[0] = this.m_value;
    		throw new SDXException(_logger, SDXExceptionCode.ERROR_PARSE_REGULAR_EXPRESSION, args, res);
    	} catch (SDXException sdx_e) { // on a recu une exception SDX, on se contente de la passer telle quelle
    		throw sdx_e;
    	} catch (Exception e) {
    		String[] args = new String[2];
    		args[0] = sLocs.getReader().directory().toString();
    		args[1] = e.getMessage();
    		throw new SDXException(null, SDXExceptionCode.ERROR_LUCENE_READ, args, e);
    	} finally {
    		// freeing resources
    		closeReader();
    		try {
    			if (m_regexTermEnum != null) m_regexTermEnum.close();
    		} catch (IOException e) {
    			String[] args = new String[2];
    			args[0] = _field.getCode();
    			args[1] = e.getMessage();
    			throw new SDXException(null, SDXExceptionCode.ERROR_LUCENE_TERMENUM_CLOSE, args, e);
    		}
    	}

    	super.setNbPages(countPages());

    }
    
    /* Private method to build the final terms list.
     * 
     * @param The {@link RegexTermEnum} to walk through.
     * @throws SDXException
     */
    private void buildTermList (RegexTermEnum _regexTermEnum) 
    	throws SDXException
	{
    	try {
            Term t = null;
            String text;
            boolean collecting = false;
            if (_regexTermEnum != null) {
                do {
                    if (_regexTermEnum.term() != null)
                        t = _regexTermEnum.term();
                    if (t != null && t.field() == m_internalFieldName) {
                        collecting = true;
                        text = t.text();
                        TermInfo ti = (TermInfo) termList.get(text);
                        if (ti != null)
                            ti.update(m_reader, _regexTermEnum.term());
                        else {
                            ti = new TermInfo();
                            ti.enableLogging(super.getLog());
                            ti.setUp(m_reader, _regexTermEnum.term());
                            termList.put(text, ti);
                        }
                    } else if (collecting) break;
                } while (_regexTermEnum.next());
            }
        } catch (IOException e) {
            String[] args = new String[1];
            args[0] = e.getMessage();
            throw new SDXException(null, SDXExceptionCode.ERROR_GET_TERMS, args, e);
        }
    }
    
    /** Builds a list of terms (as a {@link TreeMap}).
     * 
     * @param docs	The subset of documents (as a {@link BitSet}).
     * @return	The list of terms as a {@link TreeMap}.
     * @throws SDXException
     */
    public TreeMap getRegexTerms(BitSet docs) 
    throws SDXException {

    	TreeMap m_return = new TreeMap();
    	
    	if (null == this.m_regexImpl 
    			|| null == m_reader ) 
    	{
    		return m_return; // TODO : is the the better solution? Harmonize with other lists of terms...
    	}
    	
		// Prepares the RegexCapabilities
    	setRegexCapabilities(this.m_regexImpl);
    	m_regexImpl.compile(this.m_regex);
    	String m_regexPrefix = m_regexImpl.prefix();
    	if (m_regexPrefix == null) m_regexPrefix = "";
    	
    	Field _field = this._searchLocations.getField(m_field);

    	// Sets the Collator for the final list of terms
    	if (_field.getCollator() == null)
    		m_return = new TreeMap(Collator.getInstance(new Locale("fr", "FR"))); // TODO : is it OK?
    	else
    		m_return = new TreeMap(_field.getCollator());

    	//String m_regex = Utilities.buildRegexValue(value, "ISO-8859-1"); // the regular expression builds with the value
    	
    	RegexTermEnum m_regexTermEnum = null;
    	try {
    		m_regexTermEnum = new RegexTermEnum(this.m_reader, 
							    				new Term(m_internalFieldName, m_regex), 
							    				m_regexImpl);
    		boolean collecting = false;
    		BitSet termDocumentSet;
    		int nbDocs;
    		String text;
    		do {
    			
    			if (m_regexTermEnum.term() != null 
					&& m_internalFieldName == m_regexTermEnum.term().field()) 
    			{

    				collecting = true;

    				/* We build the bitset for this term and compare
    				 * it with the one we have, and then count what's
    				 * left.
    				 */

    				termDocumentSet = getDocumentSet(m_reader, m_regexTermEnum.term());
    				if (null != docs) termDocumentSet.and(docs);
    				nbDocs = Bits.countBits(termDocumentSet);
    				if (nbDocs > 0) {
    					text = m_regexTermEnum.term().text();
						TermInfo ti = (TermInfo) m_return.get(m_regexTermEnum.term().text());
						if (null != ti) {
							ti.update(m_reader, m_regexTermEnum.term());
						}
						else {
							ti = new TermInfo();
							ti.enableLogging(super.getLog());
							ti.setUp(m_reader, m_regexTermEnum.term());
							m_return.put(m_regexTermEnum.term().text(), ti);
						}
						ti = null;
    				}
    				termDocumentSet = null;

    			} else if (collecting) break;

    		} while (m_regexTermEnum.next());

    		return m_return;

    	} catch (IOException e) {
    		String[] args = new String[1];
    		args[0] = e.getMessage();
    		throw new SDXException(null, SDXExceptionCode.ERROR_GET_TERMS, args, e);
    	} finally {
    		try { //freeing resources
    			if (m_regexTermEnum != null) m_regexTermEnum.close();
    		} catch (IOException e) {
    			String[] args = new String[2];
    			args[0] = _field.getCode();
    			args[1] = e.getMessage();
    			throw new SDXException(null, SDXExceptionCode.ERROR_LUCENE_TERMENUM_CLOSE, args, e);
    		}
    	}
    }
    
    /* Private method to close the current IndexReader
     * 
     * @throws SDXException
     */
    private void closeReader() throws SDXException {
    	if (m_reader != null) {
    		try {
    			m_reader.close();
    		} catch (IOException e) {
    			String[] args = new String[1];
    			args[0] = e.getMessage();
    			throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_LUCENE_READER_CLOSE, args, e);
    		}
    	}
    }

}
