#### engine-userdict2-lib.rb: Library for engine-userdict2.rb
#### $Id: engine-userdict2-lib.rb,v 1.2 2005/03/07 07:51:32 komatsu Exp $
####
#### Copyright (C) 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
####     All rights reserved.
####     This is free software with ABSOLUTELY NO WARRANTY.
####
#### You can redistribute it and/or modify it under the terms of 
#### the GNU General Public License version 2.

require 'prime/taiyaki.rb'
$KCODE = "e"

$TOTAL = 0
$OVERED = 0
$WORDS = 0

class PrimeDynamicFileCore
  def initialize(filename)
    if File::exist?(filename) then
      @file = open(filename, 'r+')
    else
      @file = open(filename, 'w+')
      initialize_file()
      @file.flush()
    end
  end

  def lock()
    @file.flock(File::LOCK_EX)
    if block_given?() then
      yield
      unlock()
    end
  end

  def unlock()
    @file.flock(File::LOCK_UN)
    @file.flush()
  end

  def initialize_file()
    # do nothing
  end

  def append_data(data)
    @file.seek(0, IO::SEEK_END)
    pointer = @file.pos
    @file.write(data)
    return pointer
  end
end

class PrimeHash < PrimeDynamicFileCore
  def initialize(dictname)
    @size = 65536
    super(dictname + '-hash')
    @filename_hashtable = File::dirname(dictname) + "/hash-table"
    initialize_table()
  end

  def initialize_file()
    @file.seek(0)
    @size.times {|i|
      @file.write("\000\000\000\000")
    }
  end

  def initialize_table()
    if File::exist?(@filename_hashtable) then
      read_table()
    else
      make_table()
      write_table()
    end
  end

  def make_table()
    @table = Array.new(@size)
    @size.times {|i|
      @table[i] = i
    }
    k = 7
    4.times {
      @size.times {|i|
        s = @table[i]
        k = (k + s) % @size
        @table[i] = @table[k]
        @table[k] = s
      }
    }
  end

  def read_table()
    open(@filename_hashtable, 'r') {|io|
      @table = io.read.unpack('n' * @size)
    }
  end

  def write_table()
    open(@filename_hashtable, 'w') {|io|
      io.write(@table.pack('n' * @size))
    }
  end

  def hash(key)
#    puts "PrimeHash::hash(key = #{key})"
    value = key.length % @size
    key.length.times {|i|
      value = (value + key[i]) % @size
      value = @table[value]
    }
    return (value * 4)
  end

  def search(key, context = 0)
#    puts "PrimeHash::search(key = #{key}, context = #{context})"
    key += [context].pack('N')
    @file.seek(hash(key))
    pointer = @file.read(4).unpack('N').first
#    puts "PrimeHash::search:  pointer = #{pointer}"
    while pointer != 0 do
      @file.seek(pointer)
      (nextpointer, value, length) = @file.read(4 + 4 + 1).unpack('NNC')
      key2  = @file.read(length)
      if key == key2 then
        return value
      else
        pointer = nextpointer
      end
    end
    return 0
  end

  def add_key(key, value, context = 0)
    # puts "PrimeHash::add_key(key = '#{key}', value = '#{value}', context = #{context})"
    key += [context].pack('N')

    lock {
      pointer = hash(key)
      @file.seek(pointer)
      nextpointer = @file.read(4).unpack('N').first
      overed = 0
      while nextpointer != 0 do
        overed += 1
        pointer = nextpointer
        @file.seek(pointer)
        (nextpointer, value2, length) = @file.read(4 + 4 + 1).unpack('NNC')
        key2 = @file.read(length)
        if key == key2 then
          if value != value2 then
            $stderr.puts "ERROR: PrimeHash::add_key value mismatch."
            return
          else
            return
          end
        end
      end
      
      $TOTAL  += 1
      $OVERED += overed
      data = [0, value, key.length].pack('NNC') + key
      value_pointer = append_data(data)

      @file.seek(pointer)
      @file.write([value_pointer].pack('N'))
    }
  end
end

class PrimeDynamicDictWord < PrimeDynamicFileCore
  def initialize(dictname)
    super(dictname + '-words')
  end

  def initialize_file()
    append_data("PRIME DICT WORD")
  end

  def add_word(pron, literal, pos, timestamp, annotation)
#    puts "add_word(#{pron})"
    $WORDS += 1
    data    = [pron.length, literal.length,
               pos, timestamp, annotation].pack('CCnNN') + pron + literal
    pointer = append_data(data)
    return pointer
  end

  def get_word(pointer)
    #    puts "get_word(#{pointer})"
    @file.seek(pointer)
    data = @file.read(12)
    (pron_length, literal_length, pos, timestamp, annotation) = \
    data.unpack('CCnNN')
    pron           = @file.read(pron_length)
    literal        = @file.read(literal_length)
    return [pron, literal, pos, timestamp, annotation]
  end

  def set_word_timestamp(pointer, timestamp)
    @file.seek(pointer + (1 + 1 + 2)) # pron_len, literal_len, pos
    @file.write([timestamp].pack('N'))
  end

  def set_word_annotation(pointer, annotation)
    @file.seek(pointer + (1 + 1 + 2 + 4)) # Above + timestamp
    @file.write([annotation].pack('N'))
  end
end

# INDEX:
# pos_ptr (4), exact_word_ptr (4), prefix_word_ptr (4)
# POS LIST:
# pos (2), next_pos_ptr (4)
# EXACT WORD LIST:
# exact_word (4), next_ext_ptr (4)
# PREFIX WORD LIST:
# prefix_word (4), next_pre_ptr (4)

# WORD:
# pron_length (1), literal_length (1), timestamp (4), pos (2),
# annotation (4), pron (Str), literal (Str)

class PrimeDynamicDictIndex < PrimeDynamicFileCore
  attr_reader :hash

  def initialize(dictname, worddata)
    initialize_analysis()
    @word = worddata
    @hash = PrimeHash.new(dictname)
    super(dictname + '-index')
  end

  def initialize_file()
    append_data("PRIME DICT INDEX")
  end

  def initialize_analysis()
    @num_prefix_link = 0
    @num_exact_link  = 0
    @num_pos_link    = 0
    @num_index       = 0
  end

  def print_analysis()
    puts "Number of indexes:      #{@num_index}"
    puts "Number of pos link:     #{@num_pos_link}"
    puts "Number of prefix link:  #{@num_prefix_link}"
    puts "Number of exact link:   #{@num_exact_link}"
    puts
    puts "Size of indexes(12):    #{@num_index * 12}"
    puts "Size of pos link(6):    #{@num_pos_link * 6}"
    puts "Size of prefix link(8): #{@num_prefix_link * 8}"
    puts "Size of exact link(8):  #{@num_exact_link * 8}"
  end

  ### Input operation

  def new_index(word_ptr, pos, key, context = 0)
    pointer = new_index_append_data(key, context)
    add_index(pointer, word_ptr, pos, key, context)
    return pointer
  end

  def new_index_append_data(key, context)
    @num_index += 1

    data = [0, 0, 0].pack('NNN')
    pointer = append_data(data)

    @hash.add_key(key, pointer, context)
    return pointer
  end

  def add_index(pointer, word_ptr, pos, key, context = 0)
    add_index_pos(pointer, pos)
    add_index_exact_word(pointer, word_ptr)
    add_index_prefix_key(key, word_ptr, context)
  end

  def add_index_prefix_key(key, word_ptr, context)
    # Set prefix word into each prefix index.
    pre_key = ''
    key.split(//).each {|char|
      pre_ptr = @hash.search(pre_key, context)
      if pre_ptr == 0 then
        pre_ptr = new_index_append_data(pre_key, context)
      end
      add_index_prefix_word(pre_ptr, word_ptr)
      pre_key += char
    }

    pre_ptr = @hash.search(pre_key, context)
    if pre_ptr == 0 then
      pre_ptr = new_index_append_data(pre_key, context)
    end
    add_index_prefix_word(pre_ptr, word_ptr)
  end

  def add_index_pos(pointer, pos)
    top_pos_ptr = get_pos_pointer(pointer)
    pos_ptr = top_pos_ptr

    while pos_ptr != 0 do
      (target_pos, next_ptr) = get_pos_link(pos_ptr)
      if pos == target_pos then
        return
      end
      pos_ptr = next_ptr
    end

    @num_pos_link += 1
    data = [pos, top_pos_ptr].pack('nN')
    dict_index_last_pointer = append_data(data)

    set_pos_pointer(pointer, dict_index_last_pointer)
  end

  def add_index_exact_word(pointer, word_ptr)
    top_ext_ptr = get_exact_pointer(pointer)
    ext_ptr = top_ext_ptr

    while ext_ptr != 0 do
      (target_word, next_ptr) = get_word_link(ext_ptr)
      if word_ptr == target_word then
        return
      end
      ext_ptr = next_ptr
    end

    @num_exact_link += 1
    data = [word_ptr, top_ext_ptr].pack('NN')
    dict_index_last_pointer = append_data(data)

    set_exact_pointer(pointer, dict_index_last_pointer)
  end

  def add_index_prefix_word(pointer, word_ptr)
    #| prefix $B$NMWAG?t$O>o$K(B 10 $B0J2<!#(B
    #| prefix $B$NMWAG?t$,(B 10 $B0J>e$N>l9g$O!":G8e$N%G!<%?$r>e=q$-$7$F(B
    #| $B:G=i$N%G!<%?$H$7$F:FMxMQ$9$k!#(B

    top_pre_ptr = get_prefix_pointer(pointer)
    next_ptr = top_pre_ptr
    pre_ptr  = nil
    9.times {
      pre_ptr = next_ptr

      if pre_ptr == 0 then
        @num_prefix_link += 1
        data = [word_ptr, top_pre_ptr].pack('NN')
        dict_index_last_pointer = append_data(data)

        set_prefix_pointer(pointer, dict_index_last_pointer)
        return
      end

      (target_word, next_ptr) = get_word_link(pre_ptr)
      if word_ptr == target_word then
        return
      end
    }

    # Write 0 to the next link.
    @file.seek(pre_ptr + 4)
    @file.write([0].pack('N'))

    data = [word_ptr, top_pre_ptr].pack('NN')
    if next_ptr != 0 then
      @file.seek(next_ptr)
      @file.write(data)
      set_prefix_pointer(pointer, next_ptr)
    else
      @num_prefix_link += 1
      dict_index_last_pointer = append_data(data)
      set_prefix_pointer(pointer, dict_index_last_pointer)
    end      
  end

# =word1
# top_pre_ptr  [word1 =word2]
#              [word2 =word3]
#              [word3 0]

# =word3             
# top_pre_ptr  [word1 =word2]
#              [word2 0]
#              [word3 =word1]


  ### Output operation
  def search_pos_list(key, context = 0)
    pos_list = []
    pointer = @hash.search(key, context)
    if pointer == 0 then
      return pos_list
    else
      pos_ptr = get_pos_pointer(pointer)

      while pos_ptr != 0 do
        (pos, next_ptr) = get_pos_link(pos_ptr)
        pos_list.push(pos)
        pos_ptr = next_ptr
      end
      return pos_list
    end
  end

  def search_prefix(key, context = 0, pos = 0)
    pos = 0 if pos.nil?
    word_list = []
    pointer = @hash.search(key, context)
    if pointer == 0 then
      return word_list
    else
      word_ptr = get_prefix_pointer(pointer)

      while word_ptr != 0 do
        (word, next_ptr) = get_word_link(word_ptr)
        word_data = @word.get_word(word)
        if pos == 0 or word_data[2] == pos then
          word_list.push([word, word_data])
        end
        word_ptr = next_ptr
      end
      return word_list
    end
  end

  def search_exact(key, context = 0, pos = 0)
    pos = 0 if pos.nil?
    word_list = []
    pointer = @hash.search(key, context)
    if pointer == 0 then
      return word_list
    else
      word_ptr = get_exact_pointer(pointer)

      while word_ptr != 0 do
        (word, next_ptr) = get_word_link(word_ptr)
        word_data = @word.get_word(word)
        if pos == 0 or word_data[2] == pos then
          word_list.push([word, word_data])
        end
        word_ptr = next_ptr
      end
      return word_list
    end
  end

  ### Index operation

  def get_pos_pointer(pointer)
    @file.seek(pointer)
    return @file.read(4).unpack('N').first
  end

  def get_exact_pointer(pointer)
    @file.seek(pointer + 4)
    return @file.read(4).unpack('N').first
  end

  def get_prefix_pointer(pointer)
    @file.seek(pointer + 8)
    return @file.read(4).unpack('N').first
  end

  def set_pos_pointer(pointer, pos_ptr)
    @file.seek(pointer)
    @file.write([pos_ptr].pack('N'))
  end

  def set_exact_pointer(pointer, ext_ptr)
    @file.seek(pointer + 4)
    @file.write([ext_ptr].pack('N'))
  end

  def set_prefix_pointer(pointer, pre_ptr)
    @file.seek(pointer + 8)
    @file.write([pre_ptr].pack('N'))
  end

  ### Index Data operation

  def get_word_link(pointer)
    @file.seek(pointer)
    word_ptr = @file.read(4).unpack('N').first
    next_ptr = @file.read(4).unpack('N').first
    return [word_ptr, next_ptr]
  end

  def get_pos_link(pointer)
    @file.seek(pointer)
    pos_ptr  = @file.read(2).unpack('n').first
    next_ptr = @file.read(4).unpack('N').first
    return [pos_ptr, next_ptr]
  end
end

class PrimeDynamicDictPron < PrimeDynamicDictIndex
  def initialize(dictname, worddata)
    super(dictname + '_pron', worddata)
  end

  def set_index(word_ptr, pos, pron, literal, context = 0)
    pointer = @hash.search(pron, context)
    if pointer == 0 then
      new_index(word_ptr, pos, pron, context)
    else
      add_index(pointer, word_ptr, pos, pron, context)
    end
  end

  ### Output operation
  def search(pron, literal, pos, context = 0)
    #    puts "pointer = '#{pointer}', literal_ptr = '#{literal_ptr}'"
    pointer = @hash.search(pron, context)
    if pointer == 0 then
      return 0
    else
      return search_literal(pointer, literal, pos)
    end
  end

  def search_literal(pointer, literal, pos)
    #    puts "search_literal(literal_ptr = '#{literal_ptr}', literal = '#{literal}', pos = '#{pos}')"
    literal_ptr = get_exact_pointer(pointer)
    while literal_ptr != 0 do
      (word_ptr, next_ptr) = get_word_link(literal_ptr)
      (w_pron, w_literal, w_pos, w_ts, w_ann) = @word.get_word(word_ptr)
      if w_literal == literal and w_pos == pos then
        return word_ptr
      end
      literal_ptr = next_ptr
    end
    return 0
  end
end

class PrimeDynamicDictLiteral < PrimeDynamicDictIndex
  def initialize(dictname, worddata)
    super(dictname + '_literal', worddata)
  end

  def set_index(word_ptr, pos, pron, literal, context = 0)
    pointer = @hash.search(literal, context)
    if pointer == 0 then
      new_index(word_ptr, pos, literal, context)
    else
      add_index(pointer, word_ptr, pos, literal, context)
    end
  end

  ### Output operation
  def search(pron, literal, pos, context = 0)
    pointer = @hash.search(literal, context)
    if pointer == 0 then
      return 0
    else
      return search_pron(pointer, pron, pos)
    end
  end

  def search_pron(pointer, pron, pos)
    #    puts "search_literal(literal_ptr = '#{literal_ptr}', pron = '#{pron}', pos = '#{pos}')"
    pron_ptr = get_exact_pointer(pointer)
    while pron_ptr != 0 do
      (word_ptr, next_ptr) = get_word_link(pron_ptr)
      (w_pron, w_literal, w_pos, w_ts, w_ann) = @word.get_word(word_ptr)
      if w_pron == pron and w_pos == pos then
        return word_ptr
      end
      pron_ptr = next_ptr
    end
    return 0
  end
end



class PrimeDynamicDict
  def initialize(dictname)
    @dict_word    = PrimeDynamicDictWord.new(dictname)
    @dict_pron    = PrimeDynamicDictPron.new(dictname,    @dict_word)
    @dict_literal = PrimeDynamicDictLiteral.new(dictname, @dict_word)
  end

  ### Output operation
  def search_pos_list(key, context = 0)
    return @dict_pron.search_pos_list(key, context)
  end

  def search_exact(key, context = 0, pos = 0)
    return @dict_pron.search_exact(key, context, pos)
  end

  def search_prefix(key, context = 0, pos = 0)
    return @dict_pron.search_prefix(key, context, pos)
  end


  def search_literal_pos_list(key, context = 0)
    return @dict_literal.search_pos_list(key, context)
  end

  def search_literal_exact(key, context = 0, pos = 0)
    return @dict_literal.search_exact(key, context, pos)
  end

  def search_literal_prefix(key, context = 0, pos = 0)
    return @dict_literal.search_prefix(key, context, pos)
  end


  ### Input operation
  def learn_word(pron, literal, pos, timestamp = 0, context = 0, annotation = 0)
    context_0 = 0

    @dict_word.lock()
    @dict_pron.lock()
    @dict_literal.lock()

    word_ptr = @dict_pron.search(pron, literal, pos, context_0)
    if word_ptr == 0 then
      word_ptr = @dict_word.add_word(pron, literal, pos, timestamp, annotation)
    else
      @dict_word.set_word_timestamp(word_ptr, timestamp)
      @dict_word.set_word_annotation(word_ptr, annotation)
    end

    @dict_pron.set_index(   word_ptr, pos, pron, literal, context_0)
    @dict_literal.set_index(word_ptr, pos, pron, literal, context_0)

    ## case: context != 0
    if context != 0 then
      @dict_pron.set_index(   word_ptr, pos, pron, literal, context)
      @dict_literal.set_index(word_ptr, pos, pron, literal, context)
    end

    @dict_word.unlock()
    @dict_pron.unlock()
    @dict_literal.unlock()

    return word_ptr
  end
end
 
# class PrimeDynamicDict < PrimeDynamicDictIndex
#   def initialize(dictname)
#     worddata = PrimeDynamicDictWord.new(dictname)
#     super(dictname + '-pron', worddata)
#   end

#   ### Input operation

#   def learn_word(pron, literal, pos, timestamp = 0, context = 0, annotation = 0)
#     ## context := 0
#     pointer = @hash.search(pron, 0)
#     if pointer == 0 then
#       word_ptr  = @word.add_word(pron, literal, pos, timestamp, annotation)
#       new_index(word_ptr, pos, pron, 0)
#     else
#       word_ptr = search_literal(pointer, literal, pos)
#       if word_ptr == 0 then
#         word_ptr = @word.add_word(pron, literal, pos, timestamp, annotation)
#         add_index(pointer, word_ptr, pos, pron, 0)
#       else
#         @word.set_word_timestamp(word_ptr, timestamp)
#         @word.set_word_annotation(word_ptr, annotation)
# #        set_word_order()
#       end
#     end

#     ## case: context != 0
#     if context != 0 then
#       pointer = @hash.search(pron, context)
#       if pointer == 0 then
#         new_index(word_ptr, pos, pron, context)
#       else
#         context_word_ptr = search_literal(pointer, literal, pos)
#         if context_word_ptr == 0 then
#           add_index(pointer, word_ptr, pos, pron, context)
#         end
#       end
#     end
#     return word_ptr
#   end

#   ### Output operation
#   def search(pron, literal, pos, context = 0)
# #    puts "pointer = '#{pointer}', literal_ptr = '#{literal_ptr}'"
#     pointer = @hash.search(pron, context)
#     if pointer == 0 then
#       return 0
#     else
#       return search_literal(pointer, literal, pos)
#     end
#   end

#   def search_literal(pointer, literal, pos)
#     #    puts "search_literal(literal_ptr = '#{literal_ptr}', literal = '#{literal}', pos = '#{pos}')"
#     literal_ptr = get_exact_pointer(pointer)
#     while literal_ptr != 0 do
#       (word_ptr, next_ptr) = get_word_link(literal_ptr)
#       (w_pron, w_literal, w_pos, w_ts, w_ann) = @word.get_word(word_ptr)
#       if w_literal == literal and w_pos == pos then
#         return word_ptr
#       end
#       literal_ptr = next_ptr
#     end
#     return 0
#   end
# end

class PrimeDictPOS
  ### Manages Part of Speech for PRIME Dict.
  ### In the concrete, This class manages the relation between POS and index
  ### number.

  ## { pos_name_length(1) pos_name(string) } ...

  def initialize (dictname = nil, suffix = "")
    @suffix   = "-pos" + suffix
    @dictname = dictname
    @filename = @dictname + @suffix

    @pos_index = []
    @pos_name  = {}

    initialize_file()
    load_file()
  end

  def initialize_file ()
    if File::exist?(@filename) then
      @file = open(@filename, 'r+')
    else
      @file = open(@filename, 'w+')
      data = [1].pack('C') + '-'
      @file.write(data)
      @file.seek(0)
    end
  end

  def load_file ()
    if @file.eof? then
      return false
    else
      until @file.eof? do
        length = @file.read(1).unpack('C')[0]
        pos_name = @file.read(length)
        set_pos(pos_name)
      end
      return true
    end
  end

  def set_pos(pos_name)
    index = @pos_index.length
    @pos_name[pos_name] = index
    @pos_index.push(pos_name)
    return index
  end

  def add_pos(pos_name)
    index = set_pos(pos_name)
    data = [pos_name.length].pack('C') + pos_name
    @file.seek(0, IO::SEEK_END)
    @file.write(data)
    return index
  end

  def get_index (pos_name)
    if @pos_name.has_key?(pos_name) then
      return @pos_name[pos_name]
    elsif load_file() and @pos_name.has_key?(pos_name) then
      return @pos_name[pos_name]
    else
      return add_pos(pos_name)
    end
  end

  def get_pos (index)
    if @pos_index.length > index then
      return @pos_index[index]
    elsif load_file() and @pos_index.length > index then
      return @pos_index[index]
    else
      return nil
    end
  end
end
