# ----------------------------------------------------------
# AdvaS Advanced Search 
# module for the basic string/list methods
#
# (C) 2002 - 2005 Frank Hofmann, Chemnitz, Germany
# email fh@efho.de
# ----------------------------------------------------------

# changed 2004-11-29

import re
import string

def split_line (line):
	"split a line of text into single words"

	# define regexp tokens - ascii codes, only
	tokens = re.compile(r"[\w']+")
	# tokens = re.compile(r"[\w']+", re.UNICODE)

	# split the line
	words = tokens.findall(line)

	# return list of words
	return words

def cmp_strings (term1, term2):
	"compares two strings"

	# returns 0 if equal, -1 if term1 < term2, 1 else

	# get length of the terms
	len1 = len(term1)
	len2 = len(term2)
	len_check = len1

	# equal both terms (add spaces)
	if (len1>len2):
		term2 = term2 + " "*(len1-len2)
		len_check = len1
	# end if

	if (len2>len1):
		term1 = term1 + " "*(len2-len1)
		len_check = len2
	# end if

	for i in range(len_check):
		if term1[i]<term2[i]:
			return -1
		# end if

		if term1[i]>term2[i]:
			return 1
		# end if
	# end for

	# return value
	return 0

def is_comment (line):
	"verifies a line for being a comment"

	# remove any whitespace at the beginning
	new_line = string.lstrip(line)

	# is comment? (UNIX style)
	if new_line.startswith("#"):
		return 1
	else:
		return 0

def kmp_search (text, pattern):
	"search pattern in a text using Knuth-Morris-Pratt algorithm"

	i = 0
	j = -1
	next = {0: -1}

	# initialize next array
	while 1:
		if ((j == -1) or (pattern[i] == pattern[j])):
			i = i + 1
			j = j + 1
			next[i] = j
		else:
			j = next[j]
		# end if

		if (i >= len(pattern)):
			break
	# end while

	# search
	i = 0
	j = 0
	positions = []
	
	while 1:
		if ((j == -1) or (text[i] == pattern[j])):
			i = i + 1
			j = j + 1
		else:
			j = next[j]
		# end if

		if (i >= len(text)):
			return positions
		# end if

		if (j >= len(pattern)):
			positions.append(i - len(pattern))
			i = i - len(pattern) + 1
			j = 0
		# end if
	# end while


