#!/usr/bin/python
# -*- coding: UTF-8 -*-

"""
    This file is part of asd.
    
    asd is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    asd Copyright 2005 Antonini Daniele <arpeda@gmail.com>

"""


import string
import struct
import sys
import re
import os.path

sys.path.append("/home/arpeda/my-projects/asd-main/asd/ASD/Indexer/")
import stemmer

dictionary = {}

def usage():
    return

def remove_dups(lst):
    """ Removes duplicate elements from list. Drawbacks:
        - Returns an unsorted list. 
        - Does not work with lists, dicts etc. as list elements.
    """
    list = {}
    for item in lst:
        list[item] = None
    return list.keys()

def concatenate( lst1, lst2 ):
    return remove_dups( lst1 + lst2 )

def intersect( lst1, lst2 ):
    return filter( lambda x: x in lst1, lst2 )

def difference( lst1, lst2 ):
    return filter( lambda x: x not in lst2, lst1 ) #lst1 - lst2

def removeEmptyOperation( lst1, op ):
    lst2 = [op]
    return filter( lambda x: x not in lst2, lst1 )

if not ( os.path.isfile("/var/cache/man/asd/lessico.asd") and  os.path.isfile("/var/cache/man/asd/document.asd") and os.path.isfile("/var/cache/man/asd/man_page_title.asd") ):
    sys.exit( "inverted-file not exists: execute create_asdb to create database" )

## Check parameter
##
if len(sys.argv) == 1:
    usage(  )
    sys.exit( -1 )

## Read lessico and create hash
##
fileDict = open( '/var/cache/man/asd/lessico.asd','r' )
tmpList = fileDict.readlines()
fileDict.close()

for value in tmpList:
    hashValue = string.split( value, ' ', 1 )
    dictionary[hashValue[0]] = hashValue[1]

## Get words to search
##
wordToSearchTmp = []
wordToSearchStemmed = []

for i in range( 1, len(sys.argv) ):
    parameter = string.split( sys.argv[i] )

    if len( parameter ) > 1:
        wordToSearchTmp += parameter
    else:
        wordToSearchTmp.append( sys.argv[i] )

## Stemming word
##
ps = stemmer.PorterStemmer()

for word in wordToSearchTmp:
    word = (ps.stem( word, 0, len(word)-1 )).lower()
    wordToSearchStemmed.append( word )

remove_dups( wordToSearchStemmed )

print "Your words are stemmed in ",
print wordToSearchStemmed

## Searching Fase
##   - checking if words are present in lessico
##
noWord = []
docIDList = []

for word in wordToSearchStemmed:
    if not dictionary.has_key( word ):
        noWord.append(word)

if len(noWord):
    print "No man pages contains:",
    for i in range( 0, len(noWord) ):
        print " " + noWord[i],
    print
    sys.exit( 0 )

## Sodisfy query
##
invertedFile = open( '/var/cache/man/asd/document.asd','r' )

numElementType = invertedFile.read( 1 )
docIDType = invertedFile.read( 1 )
frequenceType = invertedFile.read( 1 )

beginList = dictionary[wordToSearchStemmed[0]]
invertedFile.seek( int(beginList) )
elementi = invertedFile.read( struct.calcsize( numElementType ) )
elementi = struct.unpack( numElementType, elementi )[0]

for i in range( 0, elementi ):
    """ Read docID and frequence """
    coppia = invertedFile.read( struct.calcsize( docIDType ) + struct.calcsize( frequenceType ) )
    docIDList.append( struct.unpack( docIDType+frequenceType, coppia)[0] )

docIDList = map( int, docIDList )

### intersect docIDList of first word with docIDList of remaining words
##
for i in range( 1, len(wordToSearchStemmed) ):
    tmpDocIDList = []
    
    beginList = dictionary[wordToSearchStemmed[i]]
    invertedFile.seek( int(beginList) )
    elementi = invertedFile.read( struct.calcsize( numElementType ) )
    elementi = struct.unpack( numElementType, elementi )[0]
    
    for i in range( 0, elementi ):
        coppia = invertedFile.read( struct.calcsize( docIDType ) + struct.calcsize( frequenceType ) )
        tmpDocIDList.append( struct.unpack( docIDType+frequenceType, coppia)[0] )

    tmpDocIDList = map( int, tmpDocIDList )
    docIDList = intersect( docIDList, tmpDocIDList )
    
invertedFile.close()

## Load man_page_title
##
hashManPage = {}

fileManPageTitle = open( '/var/cache/man/asd/man_page_title.asd','r' )

tmpList = fileManPageTitle.readlines()

for value in tmpList:
    hashValue = string.split( value, ' ', 1 )
    hashManPage[int(hashValue[0])] = hashValue[1]

fileManPageTitle.close(  )

## Print man pages that soddisfy query
##
print
for i in range( 0, len(docIDList) ):
    print hashManPage[docIDList[i]],
