// 
// Copyright (c) 2006-2008 Ben Motmans
// 
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// Author(s):
//    Ben Motmans <ben.motmans@gmail.com>
//

using System;
using System.Text;
using System.Collections.Generic;

namespace Anculus.Core
{
	// Boyer-Moore exact search algorithm
	// Preprocessing time: Θ(m + |Σ|)
	// Search time: Ω(n/m), O(n)
	// 
	// Handbook of Advanced String-Matching Algorithms - Christian Charras, Thierry Lecroq (http://igm.univ-mlv.fr/~lecroq/string/node14.html)
	public class BoyerMooreSearchAlgorithm : AbstractSearchAlgorithm
	{
		protected const int AlphabetSize = 256;
		
		public override int[] SearchAll (string text, int start, int count, string keyword)
		{
			CheckArguments (text, start, count, keyword);

			List<int> results = null;
			if (count == int.MaxValue)
				results = new List<int> ();
			else
				results = new List<int> (count);
			
			foreach (int result in SearchIterator (text, start, keyword)) {
				results.Add (result);
				if (results.Count == count)
					break;
			}
			
			return results.ToArray ();
		}

		public override int SearchFirst (string text, int start, string keyword)
		{
			CheckArguments (text, start, int.MaxValue, keyword);
			
			IEnumerator<int> iter = SearchIterator (text, start, keyword).GetEnumerator ();
			if (iter.MoveNext ())
				return iter.Current;
			return -1;
		}

		protected static IEnumerable<int> SearchIterator (string text, int start, string keyword)
		{
			if (start > (text.Length - keyword.Length))
				yield break;

			byte[] tb = null;
			if (start == 0)
				tb = Encoding.UTF8.GetBytes (text);
			else
				tb = Encoding.UTF8.GetBytes (text.Substring (start));
			int n = tb.Length;

			byte[] kb = Encoding.UTF8.GetBytes (keyword);
			int m = kb.Length;
			
			int i, j;
			int[] gs = new int[m + 1];
			int[] bc = new int[AlphabetSize];
			
			//preprocessing
			GoodSuffixShift (kb, m, ref gs);
			BadCharacterShift (kb, m, ref bc);
			
			//search
			j = 0;
			while (j <= (n - m)) {
				for (i = m - 1; i >= 0 && kb[i] == tb[i + j]; --i);
				if (i < 0) {
					yield return Encoding.UTF8.GetCharCount (tb, 0, j); //convert back to char index
					j += gs[0];
				} else {
					j += Math.Max (gs[i], bc[tb[i + j]] - m + 1 + i);
				}
			}
		}
		
		protected static void BadCharacterShift (byte[] keyword, int m, ref int[] bc)
		{
			int i;
			for (i = 0; i < AlphabetSize; ++i)
				bc[i] = m;

			for (i = 0; i < (m - 1); ++i)
				bc[keyword[i]] = m - i - 1;
		}
		
		protected static void GoodSuffixShift (byte[] keyword, int m, ref int[] gs)
		{
			int i, j = 0;
			int[] suff = new int[m + 1];
			
			CalculateSuffixes (keyword, m, ref suff);
			
			for (i = 0; i < m; ++i)
				gs[i] = m;
			
			for (i = (m - 1); i >= -1; --i) {
				if (i == -1 || suff[i] == i + 1) {
					for (; j < (m - 1 - i); ++j) {
						if (gs[j] == m)
							gs[j] = m - 1 - i;
					}
				}
			}
			
			for (i = 0; i <= (m - 2); ++i)
				gs[m - 1 - suff[i]] = m - 1 - i;
		}

		protected static void CalculateSuffixes (byte[] keyword, int m, ref int[] suff)
		{
			int f = 0, g, i;
			
			suff[m - 1] = m;
			g = m - 1;
			for (i = (m - 2); i >= 0; --i) {
				if (i > g && suff[i + m - 1 - f] < i - g) {
					suff[i] = suff[i + m - 1 - f];
				} else {
					if (i < g)
						g = i;
					f = i;
					while (g >= 0 && keyword[g] == keyword[g + m - 1 - f])
						--g;
					suff[i] = f - g;
				}
			}
		}
	}
}