/*
 * Tokenizer.cs
 *
 * This work is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * This work is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software 
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 *
 * As a special exception, the copyright holders of this library give
 * you permission to link this library with independent modules to
 * produce an executable, regardless of the license terms of these
 * independent modules, and to copy and distribute the resulting
 * executable under terms of your choice, provided that you also meet,
 * for each linked independent module, the terms and conditions of the
 * license of that module. An independent module is a module which is
 * not derived from or based on this library. If you modify this
 * library, you may extend this exception to your version of the
 * library, but you are not obligated to do so. If you do not wish to
 * do so, delete this exception statement from your version.
 *
 * Copyright (c) 2003 Per Cederberg. All rights reserved.
 */

using System.Collections;
using System.IO;
using System.Text;

namespace Gnu.Grammar {

    /**
     * A character stream tokenizer. This class groups the characters read 
     * from the stream together into tokens ("words"). The grouping is
     * controlled by token patterns that contain either a fixed string to
     * search for, or a regular expression. If the stream of characters 
     * don't match any of the token patterns, a parse exception is thrown. 
     *
     * @author   Per Cederberg, <per at percederberg dot net>
     * @version  1.0
     */
    public class Tokenizer {
    
        /**
         * The list with all token patterns.
         */
        private ArrayList patterns = new ArrayList();

        /**
         * The input stream to read from. When this is set to null, no
         * further input is available.
         */
        private TextReader input = null;

        /**
         * The buffer with previously read characters. Normally characters
         * are appended in blocks to this buffer, and for every token that
         * is found, its characters are removed from the buffer.
         */
        private StringBuilder buffer = new StringBuilder();

        /**
         * The position of the first character in the buffer. This value
         * will be incremented when reading characters from the buffer.
         */
        private int position = 0;

        /**
         * The line number of the first character in the buffer. This 
         * value will be incremented when reading past line breaks. 
         */
        private int line = 1;

        /**
         * The column number of the first character in the buffer. This 
         * value will be updated for every character read. 
         */    
        private int column = 1;
    
        /**
         * The cache with regular expression matches. This cache only 
         * contains matches for positions in the buffer after the current
         * position. This map is indexed by the token pattern. 
         */
        private Hashtable matchCache = new Hashtable();

        /**
         * Creates a new tokenizer for the specified input stream.
         * 
         * @param Input          the input stream to read
         */
        public Tokenizer(TextReader Input) {
            input = Input;
        }
    
        /**
         * Returns the token pattern with the specified id.
         * 
         * @param Id             the token pattern id
         * 
         * @return the token pattern found, or
         *         null if not present
         */
        public TokenPattern GetPattern(int Id) {
            TokenPattern  pattern;
        
            for (int i = 0; i < patterns.Count; i++) {
                pattern = (TokenPattern) patterns[i];
                if (pattern.GetId() == Id) {
                    return pattern;
                }
            }
            return null;
        }

        /**
         * Returns the current line number. This number will be the line
         * number of the next token returned.
         * 
         * @return the current line number
         */
        public int GetCurrentLine() {
            return line;
        }
    
        /**
         * Returns the current column number. This number will be the 
         * column number of the next token returned.
         * 
         * @return the current column number
         */
        public int GetCurrentColumn() {
            return column;
        }

        /**
         * Adds a new token pattern to the tokenizer. The pattern will be
         * added last in the list, choosing a previous token pattern in 
         * case two matches the same string.
         * 
         * @param Pattern        the pattern to add
         */
        public void AddPattern(TokenPattern Pattern) {
            patterns.Add(Pattern);        
        }

        /**
         * Finds the next token on the stream. This method will return 
         * null when end of file has been reached. It will return a parse
         * exception if no token matched the input stream, or if a token
         * pattern with the error flag set matched. Any tokens matching a
         * token pattern with the ignore flag set will be silently ignored 
         * and the next token will be returned.
         * 
         * @return the next token found, or 
         *         null if end of file was encountered
         *
         * @throws ParseException if the input stream couldn't be read or
         *             parsed correctly
         */
        public Token Next() {
            Token  token = null;
        
            do {
                token = NextToken();
                if (token == null) {
                    return null;
                } else if (token.GetPattern().IsError()) {
                    throw new ParseException(
                        ParseException.ErrorType.InvalidToken,
                        token.GetPattern().GetErrorMessage(),
                        token.GetStartLine(),
                        token.GetStartColumn());
                } else if (token.GetPattern().IsIgnore()) {
                    token = null;
                }
            } while (token == null);

            return token;
        }

        /**
         * Finds the next token on the stream. This method will return 
         * null when end of file has been reached. It will return a parse
         * exception if no token matched the input stream.
         * 
         * @return the next token found, or 
         *         null if end of file was encountered
         *
         * @throws ParseException if the input stream couldn't be read or
         *             parsed correctly
         */
        private Token NextToken() {
            TokenMatch  match;
            Token       token;
            bool        retry;

            // Find longest matching string 
            do {
                retry = false;
                if (buffer.Length == 0) {
                    ReadInput();
                }
                match = FindMatch(buffer.ToString());
                if (match != null && match.IsPossibleMatch()) {
                    retry = ReadInput();
                }
            } while (retry);

            // Return token results
            if (match != null && match.IsExactMatch()) {
                token = match.CreateToken(line, column); 
                buffer.Remove(0, match.Length());
                position += match.Length();
                line = token.GetEndLine();
                column = token.GetEndColumn() + 1;
                return token;
            } else if (buffer.Length == 0) {
                return null;
            } else {
                throw new ParseException(
                    ParseException.ErrorType.UnexpectedCharacter,
                    buffer[0].ToString(),
                    line,
                    column);
            }
        }
    
        /**
         * Reads characters from the input stream and appends them to the 
         * input buffer. This method is safe to call even though the end
         * of file has been reached.
         * 
         * @return true if characters were read and appended, or
         *         false if end of file was found
         * 
         * @throws ParseException if an error was encountered while 
         *             reading the input stream
         */
        private bool ReadInput() {
            char[]  chars = new char[4096];
            int     length;

            if (input != null) {
                try {
                    length = input.Read(chars, 0, chars.Length);
                } catch (IOException e) {
                    throw new ParseException(
                        ParseException.ErrorType.IO,
                        e.Message,
                        -1,
                        -1);
                }
                if (length > 0) {
                    buffer.Append(chars, 0, length);
                    matchCache.Clear();
                    return true;
                } else {
                    input.Close();
                    input = null;
                }
            }
            return false;
        }
    
        /**
         * Finds the best matching token from a string. The best matching
         * token will be the longest exact match, or an empty possible 
         * match.  if no exact match was found. If one of the token 
         * patterns returned a possible match, the possible match flag 
         * will be set in the returned match.
         *  
         * @param Str            the string to match 
         * 
         * @return the best matching token, or
         *         null if no match was found
         */
        private TokenMatch FindMatch(string Str) {
            TokenMatch  bestMatch = null;
            TokenMatch  match;
            bool        possible = false;

            for (int i = 0; i < patterns.Count; i++) {
                match = FindMatch((TokenPattern) patterns[i], Str);
                //match = ((TokenPattern) patterns[i]).Match(Str);
                if (match != null) {
                    if (bestMatch == null) {
                        bestMatch = match;
                    } else if (match.IsExactMatch() 
                            && match.Length() > bestMatch.Length()) {
                        bestMatch = match;
                    }
                    if (match.IsPossibleMatch()) {
                        possible = true;
                    }
                }
            }
            if (bestMatch != null && possible) {
                bestMatch.SetPossible(true);
            }
            return bestMatch;
        }

        /**
         * Returns a pattern match for the specified string. This method
         * will also check and update the match cache. 
         * 
         * @param Pattern        the pattern to check
         * @param Str            the string to check
         * 
         * @return the token match found, or null if none found
         */
        private TokenMatch FindMatch(TokenPattern Pattern, string Str) {
            TokenMatch  match;
        
            // Check for non-regexp patterh
            if (Pattern.GetPatternType() != TokenPattern.PatternType.RegExp) {
                return Pattern.Match(Str);
            }

            // Check cache for previous match
            if (matchCache.ContainsKey(Pattern)) {
                match = (TokenMatch) matchCache[Pattern];
                if (match == null || match.GetPosition() > position) {
                    return null;
                }
                matchCache.Remove(Pattern);
                if (match.GetPosition() == position) {
                    return match;
                }
            }

            // Find pattern match        
            match = Pattern.Match(Str);

            // Cache regular expression match
            if (match == null || !match.IsExactMatch()) {
                matchCache.Add(Pattern, null);
            } else if (match.GetPosition() > 0) {
                match.SetPosition(position + match.GetPosition());
                matchCache.Add(Pattern, match);
                return null;
            }
        
            return match;
        }
    }
}
