# encoding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search
   metadata harvest, extract topics and associated writings from document set
   (topics use topic_register header)

 * Author: Ralph Amissah

 * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/licenses/gpl.html>

   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: simple xml representation (sax style)

=end
module HARVEST_topics
  require_relative 'author_format'                      # author_format.rb
  include SiSU_Viz
  class Songsheet
    @@the_idx_topics={}
    def initialize(opt,env)
      @opt,@env=opt,env
      @file_list=opt.files
    end
    def songsheet
      puts 'topics:'
      idx_array={}
      @opt.f_pths.each do |y|
        lang_hash_file_array={}
        name=y[:f] 
        filename=y[:pth] + '/' + y[:f] 
        File.open(filename,'r') do |file|
          file.each_line("\n\n") do |line|
            if line =~/^@(?:title|creator|classify):(?:\s|$)/m
              lang_hash_file_array[y[:lng_is]] ||= []
              lang_hash_file_array[y[:lng_is]] << line
            elsif line =~/^@\S+?:(?:\s|$)/m \
            or line =~/^(?:\s*\n|%+ )/
            else break
            end
          end
        end
        lang_hash_file_array.each_pair do |lang,a|
          idx_array[lang] ||= []
          idx_array=HARVEST_topics::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest
        end
      end
      the_idx=HARVEST_topics::Index.new(@opt,@env,idx_array,@@the_idx_topics).construct_book_topic_index
      HARVEST_topics::Output_index.new(@opt,the_idx).html_print.html_songsheet
    end
  end
  class Harvest
    def initialize(opt,env,data,filename,name,idx_array,lang)
      @opt,@env,@data,@filename,@name,@idx_array,@lang=opt,env,data,filename,name,idx_array,lang
    end
    def extract_harvest
      data,filename,name,idx_array,lang=@data,@filename,@name,@idx_array,@lang
      @idx_lst,@title,@subtitle,@fulltitle,@author,@author_format=nil,nil,nil,nil,nil,nil
      rgx={}
      rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
      rgx[:title]=/^@title:[ ]+(.+)/
      rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
      rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)\n/m
      data.each do |para|
        if para=~ rgx[:idx]
          @idx_list=rgx[:idx].match(para)[1]
        end
        if para=~ rgx[:title]
          @title=rgx[:title].match(para)[1]
        end
        if para=~ rgx[:subtitle]
          @subtitle=rgx[:subtitle].match(para)[1]
        end
        if para=~ rgx[:author]
          @author_format=rgx[:author].match(para)[1]
        end
        break if @title and @subtitle and @author and @idx_lst
      end
      @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title
      if @title \
      and @author_format \
      and @idx_list
        creator=FORMAT::Author.new(@author_format.strip).author_details
        @authors,@authorship=creator[:authors],creator[:authorship]
        file=if name=~/~[a-z]{2,3}\.ss[mt]$/
          name.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
        else
          name.sub(/\.ss[mt]$/,'')
        end
        page=if @env.output_dir_structure.by_language_code?
          #fix
        end
        page=if @env.output_dir_structure.by_language_code?
          "#{lang}/sisu_manifest.html"
        else
          "sisu_manifest.#{lang}.html"
        end
        idx_array[lang] <<=if @idx_list =~/;/
          g=@idx_list.scan(/[^;]+/)
          idxl=[]
          g.each do |i|
            i.strip!
            idxl << { filename: filename, file: file, rough_idx: i, title: @fulltitle, author: creator, page: page, lang: lang  }
          end
          idxl
        else { filename: filename, file: file, rough_idx: @idx_list, title: @fulltitle, author: creator, page: page, lang: lang  }
        end
      else
        p "missing required field in #{@filename} - [title]: <<#{@title}>>; [author]: <<#{@author_format}>>; [idx]: <<#{@idx_list}>>" if @opt.cmd.inspect =~/[VM]/
      end
      idx_array[lang].flatten!
      idx_array
    end
  end
  class Index
    def initialize(opt,env,idx_array,the_idx)
      @opt,@env,@idx_array,@the_idx=opt,env,idx_array,the_idx
      @@the_idx_topics=@the_idx
    end
    def capital(txt)
      txt[0].chr.capitalize + txt[1,txt.length]
    end
    def contents(lang,hash,idx)
      names=''
      idx[:author][:last_first_format_a].each do |n|
        s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
        names=if @env.output_dir_structure.by_language_code?
          names += %{<a href="authors.html##{s}">#{n}</a>, }
        else
          names += %{<a href="authors.#{lang}.html##{s}">#{n}</a>, }
        end
      end
      hash << { filename: idx[:filename], file: idx[:file], author: names, title: idx[:title], page: idx[:page] }
    end
    def construct_book_topic_index
      idx_array=@idx_array
      idx_array.each_pair do |lang,idx_array|
        @@the_idx_topics[lang] ||= {}
        idx_array.each do |idx|
          @lv0,@lv1,@lv2,@lv3,@lv4={},{},{},{},{}
          if idx[:rough_idx]
            idx_lst=idx[:rough_idx].scan(/[^:]+/)
          else
            puts "no topic register in: << #{idx[:filename]} >>"
            next
          end
          idx_lst_alt=[]
          idx_lst.each {|lev| idx_lst_alt << lev.scan(/[^|]+/)}
          depth = idx_lst_alt.length - 1
          range = 0..depth
          range.each do |t|
            if idx_lst_alt[t]
              case t
              when 0
                lev0=idx_lst_alt[t]
                lev0.each do |lv0|
                  lv0=capital(lv0)
                  if @@the_idx_topics[lang][lv0].class==NilClass
                    @@the_idx_topics[lang][lv0]={ md: [] }
                  end
                  @lv0=lv0 if lev0.length==1
                  j=@@the_idx_topics[lang][lv0][:md]
                  contents(lang,j,idx) if idx_lst_alt.length - 1 == t
                end
              when 1
                lev1=idx_lst_alt[t]
                lev1.each do |lv1|
                  lv1=capital(lv1)
                  if @@the_idx_topics[lang][@lv0][lv1].class==NilClass
                    @@the_idx_topics[lang][@lv0][lv1]={ md: [] }
                  end
                  @lv1=lv1 if lev1.length==1
                  j=@@the_idx_topics[lang][@lv0][lv1][:md]
                  contents(lang,j,idx) if idx_lst_alt.length - 1 == t
                end
              when 2
                lev2=idx_lst_alt[t]
                lev2.each do |lv2|
                  lv2=capital(lv2)
                  if @@the_idx_topics[lang][@lv0][@lv1][lv2].class==NilClass
                    @@the_idx_topics[lang][@lv0][@lv1][lv2]={ md: [] }
                  end
                  @lv2=lv2 if lev2.length==1
                  j=@@the_idx_topics[lang][@lv0][@lv1][lv2][:md]
                  contents(lang,j,idx) if idx_lst_alt.length - 1 == t
                end
              when 3
                lev3=idx_lst_alt[t]
                lev3.each do |lv3|
                  lv3=capital(lv3)
                  if @@the_idx_topics[lang][@lv0][@lv1][@lv2][lv3].class==NilClass
                    @@the_idx_topics[lang][@lv0][@lv1][@lv2][lv3]={ md: [] }
                  end
                  @lv3=lv3 if lev3.length==1
                  j=@@the_idx_topics[lang][@lv0][@lv1][@lv2][lv3][:md]
                  contents(lang,j,idx) if idx_lst_alt.length - 1 == t
                end
              when 4
                lev4=idx_lst_alt[t]
                lev4.each do |lv4|
                  lv4=capital(lv4)
                  if @@the_idx_topics[lang][@lv0][@lv1][@lv2][@lv3][lv4].class==NilClass
                    @@the_idx_topics[lang][@lv0][@lv1][@lv2][@lv3][lv4]={ md: [] }
                  end
                  @lv4=lv4 if lev4.length==1
                  j=@@the_idx_topics[lang][@lv0][@lv1][@lv2][@lv3][lv4][:md]
                  contents(lang,j,idx) if idx_lst_alt.length - 1 == t
                end
              end
            end
          end
        end
      end
      @the_idx
    end
  end
  class Output_index
    require_relative 'i18n'                               # i18n.rb
    def initialize(opt,the_idx)
      @opt,@the_idx=opt,the_idx
      @env=SiSU_Env::Info_env.new
      @rc=SiSU_Env::Get_init.instance.sisu_yaml.rc
      @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
      @letter=@alph.shift
      @vz=SiSU_Env::Get_init.instance.skin
    end
    def html_file_open
      @the_idx.keys.each do |lng|
        @output ||={}
        @output[lng] ||={}
        harvest_pth,file='',''
        if @env.output_dir_structure.by_language_code?
          harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/#{lng}/manifest"
          file="#{harvest_pth}/topics.html"
        else @env.output_dir_structure.by_filetype?
          harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/manifest"
          file="#{harvest_pth}/topics.#{lng}.html"
        end
        FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth)
        puts "file://#{file}"
        @output[lng][:html]=File.new(file,'w')
        if @opt.cmd.inspect =~/-M/
          @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w')
        end
      end
    end
    def html_file_close
      @the_idx.keys.each do |lng|
        @output[lng][:html].close
        @output[lng][:html_mnt].close if @output[lng][:html_mnt].class==File
      end
    end
    def html_print
      def html_songsheet
        html_file_open
        html_head
        html_alph
        html_body
        html_tail
        html_file_close
      end
      def html_head_adjust(lng,type='')
        css_path,authors='',''
        if @env.output_dir_structure.by_language_code?
          css_path=(type !~/maintenance/) \
          ? '../../_sisu/css/harvest.css'
          : 'harvest.css'
          authors='authors.html'
        elsif @env.output_dir_structure.by_filetype?
          css_path=(type !~/maintenance/) \
          ? '../_sisu/css/harvest.css'
          : 'harvest.css'
          authors="authors.#{lng}.html"
        elsif @env.output_dir_structure.by_filename?
          css_path=(type !~/maintenance/) \
          ? '../_sisu/css/harvest.css'
          : 'harvest.css'
          authors="authors.#{lng}.html"
        end
        ln=SiSU_i18n::Languages.new.language.list
        harvest_languages=''
        @the_idx.keys.each do |lng|
          if @env.output_dir_structure.by_language_code?
            harvest_pth="../../#{lng}/manifest"
            file="#{harvest_pth}/topics.html"
          else @env.output_dir_structure.by_filetype?
            harvest_pth='.'
            file="#{harvest_pth}/topics.#{lng}.html"
          end
          l=ln[lng][:t]
          harvest_languages += %{<a href="#{file}">#{l}</a>&nbsp;&nbsp;&nbsp;}
        end
        sv=SiSU_Env::Info_version.instance.get_version
        <<WOK
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>SiSU Metadata Harvest - Topics</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="dc.title" content= "SiSU metadata harvest, Topics - SiSU information Structuring Universe, Structured information Serialised Units" />
<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
<meta name="generator" content="#{sv[:project]} #{sv[:version]} of #{sv[:date_stamp]} (n*x and Ruby!)" />
<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
<link rel="stylesheet" href="#{css_path}" type="text/css" />
<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
</head>
<body bgcolor="#ffffff" text="#000000" link="#003090" lang="en" xml:lang="en">
<a name="top" id="top"></a>
<a name="up" id="up"></a>
<a name="start" id="start"></a>
<h1>SiSU Metadata Harvest - Topics</h1>
<p>[<a href="../index.html">&nbsp;HOME&nbsp;</a>] also see <a href="#{authors}">SiSU Metadata Harvest - Authors</a></p>
<p>#{@env.widget_static.search_form}</p>
<hr />
<p class="tiny">#{harvest_languages}</p>
<hr />
WOK
      end
      def html_head
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << html_head_adjust(lng,'maintenance') if @opt.cmd.inspect =~/M/
          @output[lng][:html] << html_head_adjust(lng)
        end
      end
      def html_alph
        a=[]
        a << '<p>'
        @alph.each do |x|
          a << ((x =~/[0-9]/) \
          ? ''
          : %{<a href="##{x}">#{x}</a>,&nbsp;})
        end
        a=a.join
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << a if @opt.cmd.inspect =~/M/
          @output[lng][:html] << a
        end
      end
      def html_tail
        a =<<WOK
<hr />
<a name="bottom" id="bottom"></a>
<a name="down" id="down"></a>
<a name="end" id="end"></a>
<a name="finish" id="finish"></a>
<a name="stop" id="stop"></a>
<a name="credits"></a>
#{@vz.credits_sisu}
</body>
</html>
WOK
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << a if @output[lng][:html_mnt].class==File
          @output[lng][:html] << a
        end
      end
      def do_html(lng,html)
        @output[lng][:html] << html
      end
      def do_html_maintenance(lng,html)
        @output[lng][:html_mnt] << html if @output[lng][:html_mnt].class==File
      end
      def do_string(lng,attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html(lng,html)
        do_html_maintenance(lng,html) if @output[lng][:html_mnt].class==File
      end
      def do_string_default(lng,attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html(lng,html)
      end
      def do_string_maintenance(lng,attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html_maintenance(lng,html) if @output[lng][:html_mnt].class==File
      end
      def do_string_name(lng,attrib,string)
        f=/^(\S)/.match(string)[1]
        if @letter < f
          while @letter < f
            if @alph.length > 0
              @letter=@alph.shift
              if @output[lng][:html_mnt].class==File
                @output[lng][:html_mnt] << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
              end
              @output[lng][:html] << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
            else break
            end
          end
        end
        name=string.strip.gsub(/\s+/,'_')
        html=%{<p class="#{attrib}"><a name="#{name}">#{string}</a></p>}
        do_html(lng,html)
        do_html_maintenance(lng,html) if @output[lng][:html_mnt].class==File
      end
      def do_array(lng,lv,array)
        lv+=1
        array.each do |b|
          do_case(lng,lv,b)
        end
      end
      def do_hash_md(lng,attrib,hash)
        if @env.output_dir_structure.by_language_code?
          manifest_at=hash[:file] + '.html'
        elsif @env.output_dir_structure.by_filetype?
          manifest_at=hash[:file] + '.' + lng +  '.html'
        elsif @env.output_dir_structure.by_filename?
          manifest_at="../#{hash[:file]}/#{hash[:page]}"
        end
        html=%{<a href="#{manifest_at}">#{hash[:title]}</a> - #{hash[:author]}}
        do_string_default(lng,attrib,html)
      end
      def do_hash_md_maintenance(lng,attrib,hash)
        if @output[lng][:html_mnt].class==File #should not be run for presentation output
          html=%{[<a href="#{hash[:file]}.sst">src</a>]&nbsp;&nbsp;<a href="file://#{@env.path.output}/#{hash[:file]}/#{hash[:page]}">#{hash[:title]}</a> - #{hash[:author]}}
          do_string_maintenance(lng,attrib,html)
        end
      end
      def do_hash(lng,lv,hash)
        lv+=1
        key=[]
        hash.each_key do |m|
          if m == :md
            do_case(lng,lv,hash[m])
          elsif m != :title \
          and m != :author \
          and m != :filename \
          and m != :file \
          and m != :rough_idx \
          and m != :page
            key << m
          elsif m == :title
            do_hash_md(lng,'work',hash)
            do_hash_md_maintenance(lng,'work',hash)
          end
        end
        if key.length > 0
          key.sort.each do |m|
            attrib="lev#{lv}"
            lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m)
            do_case(lng,lv,hash[m])
          end
        end
      end
      def do_case(lng,lv,a)
        y = a.class
        case
        when y==String
          attrib="lev#{lv}"
          lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a)
        when y==Array
          do_array(lng,lv,a)
        when y==Hash
          do_hash(lng,lv,a)
        end
      end
      def html_body
        the_idx=@the_idx
        the_idx.each_pair do |lng,lng_array|
          lng_array.sort.each do |a|
            do_case(lng,-1,a)
          end
        end
      end
      self
    end
    def screen_print
      def do_string(lv,string)
        s=' '*4
        puts s*lv + string
      end
      def do_array(lng,lv,array)
        lv+=1
        array.each do |b|
          do_case(lng,lv,b)
        end
      end
      def do_hash_md(lng,lv,hash)
        string=hash[:title] + ' - ' + hash[:author]
        do_string(lng,lv,string)
      end
      def do_hash(lng,lv,hash)
        lv+=1
        key=[]
        hash.each_key do |m|
          if m == :md
            do_case(lng,lv,hash[m])
          elsif m != :title \
          and m != :author \
          and m != :filename \
          and m != :file \
          and m != :rough_idx \
          and m != :page
            key << m
          elsif m == :title
            do_hash_md(lng,lv,hash)
          end
        end
        if key.length > 0
          key.sort.each do |m|
            do_string(lng,lv,m)
            do_case(lng,lv,hash[m])
          end
        end
      end
      def do_case(lng,lv,a)
        s=' '*4
        y = a.class
        case
        when y==String
          do_string(lng,lv,a)
        when y==Array
          do_array(lng,lv,a)
        when y==Hash
          do_hash(lng,lv,a)
        end
      end
      def cycle
        the_idx=@the_idx
        the_idx.keys.each do |lng|
          the_idx[lng].each do |a|
            do_case(lng,-1,a)
          end
        end
      end
      self
    end
    def screen_print_unsorted
      def do_string(lng,lv,string)
        s=' '*4
        puts s*lv + string
      end
      def do_array(lng,lv,array)
        lv+=1
        array.each do |b|
          do_case(lng,lv,b)
        end
      end
      def do_hash_md(lng,lv,hash)
        string=hash[:title] + ' - ' + hash[:author]
        do_string(lng,lv,string)
      end
      def do_hash(lng,lv,hash)
        lv+=1
        hash.each_key do |m|
          if m == :md
            do_case(lng,lv,hash[m])
          else
            if m != :title \
            and m != :author \
            and m != :filename \
            and m != :file \
            and m != :rough_idx \
            and m != :page
              do_string(lng,lv,m)
              do_case(lng,lv,hash[m])
            elsif m == :title
              do_hash_md(lng,lv,hash)
            else
            end
          end
        end
      end
      def do_case(lng,lv,a)
        s=' '*4
        y = a.class
        case
        when y==String
          do_string(lng,lv,a)
        when y==Array
          do_array(lng,lv,a)
        when y==Hash
          do_hash(lng,lv,a)
        end
      end
      def cycle
        the_idx=@the_idx
        the_idx.keys.each do |lng|
          the_idx[lng].each do |a|
            do_case(lng,-1,a)
          end
        end
      end
      self
    end
  end
end
__END__
terms -|_  t{tl1} -|_ {fa}[fa]{filenames and other details}
       |           |_ {tl2} -|_ {fa}[fa]{filenames and other details}
       |           |         |_{tl3} -|_ {fa}[fa]{filenames and other details}
       |           |         |        |_{tl4} - {fa}[fa]{filenames and other details}
       |           |         |        |
       |           |         |        |_{tl4a} - {fa}[fa]{filenames and other details}
       |           |         |        |
       |           |         |        |_{tl4b} - {fa}[fa]{filenames and other details}
       |           |         |        |
       |           |         |        |_ ...
       |           |         |
       |           |         |_{tl3a} - {fa}[fa]{filenames and other details}
       |           |
       |           |_{tl2a} - {fa}[fa]{filenames and other details}
       |
       |_ t{tl1a} -|_ {fa}[fa]{filenames and other details}
                   |_ ...
