=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search

 * Author: Ralph Amissah

 * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
   2007 Ralph Amissah All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
   2007 Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licenses/gpl.html>
   <http://www.gnu.org/copyleft/gpl.html>
   <http://www.jus.uio.no/sisu/gpl.fsf>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: common file for xml generation

=end
module SiSU_text_parts
  class Split_text_object
    require "#{SiSU_lib}/param"
    require "#{SiSU_lib}/xml_format"
    include SiSU_Viz
    include SiSU_XML_format
    @@alt_id_count=0
    @@dp=nil
    attr_reader :format,:text,:ocn,:lev_para_ocn
    def initialize(md,para)
      @md,@para=md,para
      @format,@ocn='null','null'
      #@format,@ocn=nil,nil
      @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
    end
    def lev_segname_para_ocn #using shared_txt instead, watch #% watch closely
      if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
        if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @format,segname,@text,@ocn=$1,$2,$3,$4
          @format="#@format~#{segname}" #
        elsif  /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @format,@text,@ocn=$1,$2,$3
        elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @format,@text,@ocn=$1,$2,$3
        elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @@alt_id_count+=1
          @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
          @format="#@format~#{segname}" #
        elsif  /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @@alt_id_count+=1
          @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}"
        elsif /^(?:<:i([1-9])>\s*_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @format,@text,@ocn="_#{$1}\*",$2,$3,$4
        elsif /^(_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @format,@text,@ocn=$1,$2,$3
        elsif  /<:(i[1-9])>\s*(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @format,@text,@ocn=$1,$2,$3
        end
      else
        if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
          @text,@ocn=$1,$2
        end
        if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06
          @text=/(.+?)/m.match(@para)[1]
        end
        if /^(\d)~\S*\s+(.+)/m.match(@para)
          @format,@text=$1,$2
        end
      end
      @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
        SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn)
      else
        SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>")
      end
      self
    end
    def lev_segname_para
      if @para =~/^(\d~|<:.+?>).+/
        if /^([1-6])~(\S+)\s+(\S.+)/m.match(@para)
          @format,segname,@text=$1,$2,$3
          @format="#@format~#{segname}" #
        elsif /^([1-6]~)\s+(\S.+)/m.match(@para)
          @format,@text=$1,$2
        elsif /<:(.+?)>\s*(\S.+?)/m.match(@para)
          @format,@text=$1,$2
        elsif /^([1-6])~(\S+)\s+(\S.+?)/m.match(@para)
          @@alt_id_count+=1
          @format,segname,@text=$1,$2,$3
          @format="#@format~#{segname}" #
        elsif /^([1-6]~)\s+(\S.+?)/m.match(@para)
          @@alt_id_count+=1
          @format,@text=$1,$2
        end
      else
        if /(.+?)/m.match(@para)
          @text=$1
        end
        if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06
          @text=/(.+?)/m.match(@para)[1]
        end
        if /^(\d)~\S*\s+(.+)/m.match(@para)
          @format,@text=$1,$2
        end
      end
      @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
        SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn)
      else
        SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>")
      end
      self
    end
  end
end
module SiSU_XML_munge
  class Trans
    require "#{SiSU_lib}/defaults"
    def initialize(md)
      @sys=SiSU_Env::System_call.new
      @dir=SiSU_Env::Info_env.new(md.fns)
      @dp=SiSU_Env::Info_env.new.digest.pattern
      @url_brace=SiSU_Viz::Skin.new.url_decoration
    end
    def char_enc #character encode
      def utf8(para='')
        if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
          #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü
          #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷
          ##para.gsub!(//, '&#;')
          ##para.gsub!(//, '&;')
          para.gsub!(/¢/, '&#162;') # '&cent;'   # &#162;
          para.gsub!(/£/, '&#163;') # '&pound;'  # &#163;
          para.gsub!(/¥/, '&#165;') # '&yen;'    # &#165;
          para.gsub!(/§/, '&#167;') # '&sect;'   # &#167;
          para.gsub!(/©/, '&#169;') # '&copy;'   # &#169;
          para.gsub!(/ª/, '&#170;') # '&ordf;'   # &#170;
          para.gsub!(/«/, '&#171;') # '&laquo;'  # &#171;
          para.gsub!(/®/, '&#174;') # '&reg;'    # &#174;
          para.gsub!(/°/, '&#176;') # '&deg;'    # &#176;
          para.gsub!(/±/, '&#177;') # '&plusmn;' # &#177;
          para.gsub!(/²/, '&#178;') # '&sup2;'   # &#178;
          para.gsub!(/³/, '&#179;') # '&sup3;'   # &#179;
          para.gsub!(/µ/, '&#181;') # '&micro;'  # &#181;
          para.gsub!(/¶/, '&#182;') # '&para;'   # &#182;
          para.gsub!(/¹/, '&#185;') # '&sup1;'   # &#185;
          para.gsub!(/º/, '&#186;') # '&ordm;'   # &#186;
          para.gsub!(/»/, '&#187;') # '&raquo;'  # &#187;
          para.gsub!(/¼/, '&#188;') # '&frac14;' # &#188;
          para.gsub!(/½/, '&#189;') # '&frac12;' # &#189;
          para.gsub!(/¾/, '&#190;') # '&frac34;' # &#190;
          para.gsub!(/×/, '&#215;') # '&times;'  # &#215;
          para.gsub!(/÷/, '&#247;') # '&divide;' # &#247;
          para.gsub!(/¿/, '&#191;') # '&iquest;' # &#191;
          para.gsub!(/À/, '&#192;') # '&Agrave;' # &#192;
          para.gsub!(/Á/, '&#193;') # '&Aacute;' # &#193;
          para.gsub!(/Â/, '&#194;') # '&Acirc;'  # &#194;
          para.gsub!(/Ã/, '&#195;') # '&Atilde;' # &#195;
          para.gsub!(/Ä/, '&#196;') # '&Auml;'   # &#196;
          para.gsub!(/Å/, '&#197;') # '&Aring;'  # &#197;
          para.gsub!(/Æ/, '&#198;') # '&AElig;'  # &#198;
          para.gsub!(/Ç/, '&#199;') # '&Ccedil;' # &#199;
          para.gsub!(/È/, '&#200;') # '&Egrave;' # &#200;
          para.gsub!(/É/, '&#201;') # '&Eacute;' # &#201;
          para.gsub!(/Ê/, '&#202;') # '&Ecirc;'  # &#202;
          para.gsub!(/Ë/, '&#203;') # '&Euml;'   # &#203;
          para.gsub!(/Ì/, '&#204;') # '&Igrave;' # &#204;
          para.gsub!(/Í/, '&#205;') # '&Iacute;' # &#205;
          para.gsub!(/Î/, '&#206;') # '&Icirc;'  # &#206;
          para.gsub!(/Ï/, '&#207;') # '&Iuml;'   # &#207;
          para.gsub!(/Ð/, '&#208;') # '&ETH;'    # &#208;
          para.gsub!(/Ñ/, '&#209;') # '&Ntilde;' # &#209;
          para.gsub!(/Ò/, '&#210;') # '&Ograve;' # &#210;
          para.gsub!(/Ó/, '&#211;') # '&Oacute;' # &#211;
          para.gsub!(/Ô/, '&#212;') # '&Ocirc;'  # &#212;
          para.gsub!(/Õ/, '&#213;') # '&Otilde;' # &#213;
          para.gsub!(/Ö/, '&#214;') # '&Ouml;'   # &#214;
          para.gsub!(/Ø/, '&#216;') # '&Oslash;' # &#216;
          para.gsub!(/Ù/, '&#217;') # '&Ugrave;' # &#217;
          para.gsub!(/Ú/, '&#218;') # '&Uacute;' # &#218;
          para.gsub!(/Û/, '&#219;') # '&Ucirc;'  # &#219;
          para.gsub!(/Ü/, '&#220;') # '&Uuml;'   # &#220;
          para.gsub!(/Ý/, '&#221;') # '&Yacute;' # &#221;
          para.gsub!(/Þ/, '&#222;') # '&THORN;'  # &#222;
          para.gsub!(/ß/, '&#223;') # '&szlig;'  # &#223;
          para.gsub!(/à/, '&#224;') # '&agrave;' # &#224;
          para.gsub!(/á/, '&#225;') # '&aacute;' # &#225;
          para.gsub!(/â/, '&#226;') # '&acirc;'  # &#226;
          para.gsub!(/ã/, '&#227;') # '&atilde;' # &#227;
          para.gsub!(/ä/, '&#228;') # '&auml;'   # &#228;
          para.gsub!(/å/, '&#229;') # '&aring;'  # &#229;
          para.gsub!(/æ/, '&#230;') # '&aelig;'  # &#230;
          para.gsub!(/ç/, '&#231;') # '&ccedil;' # &#231;
          para.gsub!(/è/, '&#232;') # '&egrave;' # &#232;
          para.gsub!(/é/, '&#233;') # '&acute;'  # &#233;
          para.gsub!(/ê/, '&#234;') # '&circ;'   # &#234;
          para.gsub!(/ë/, '&#235;') # '&euml;'   # &#235;
          para.gsub!(/ì/, '&#236;') # '&igrave;' # &#236;
          para.gsub!(/í/, '&#237;') # '&acute;'  # &#237;
          para.gsub!(/î/, '&#238;') # '&icirc;'  # &#238;
          para.gsub!(/ï/, '&#239;') # '&iuml;'   # &#239;
          para.gsub!(/ð/, '&#240;') # '&eth;'    # &#240;
          para.gsub!(/ñ/, '&#241;') # '&ntilde;' # &#241;
          para.gsub!(/ò/, '&#242;') # '&ograve;' # &#242;
          para.gsub!(/ó/, '&#243;') # '&oacute;' # &#243;
          para.gsub!(/ô/, '&#244;') # '&ocirc;'  # &#244;
          para.gsub!(/õ/, '&#245;') # '&otilde;' # &#245;
          para.gsub!(/ö/, '&#246;') # '&ouml;'   # &#246;
          para.gsub!(/ø/, '&#248;') # '&oslash;' # &#248;
          para.gsub!(/ù/, '&#250;') # '&ugrave;' # &#250;
          para.gsub!(/ú/, '&#251;') # '&uacute;' # &#251;
          para.gsub!(/û/, '&#252;') # '&ucirc;'  # &#252;
          para.gsub!(/ü/, '&#253;') # '&uuml;'   # &#253;
          para.gsub!(/þ/, '&#254;') # '&thorn;'  # &#254;
          para.gsub!(/ÿ/, '&#255;') # '&yuml;'   # &#255;
        end
      end
      def html(para='')
        if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
          para.gsub!(/¢/, '&cent;')      # &#162;
          para.gsub!(/£/, '&pound;')     # &#163;
          para.gsub!(/¥/, '&yen;')       # &#165;
          para.gsub!(/§/, '&sect;')      # &#167;
          para.gsub!(/©/, '&copy;')      # &#169;
          para.gsub!(/ª/, '&ordf;')      # &#170;
          para.gsub!(/«/, '&laquo;')     # &#171;
          para.gsub!(/®/, '&reg;')       # &#174;
          para.gsub!(/°/, '&deg;')       # &#176;
          para.gsub!(/±/, '&plusmn;')    # &#177;
          para.gsub!(/²/, '&sup2;')      # &#178;
          para.gsub!(/³/, '&sup3;')      # &#179;
          para.gsub!(/µ/, '&micro;')     # &#181;
          para.gsub!(/¶/, '&para;')      # &#182;
          para.gsub!(/¹/, '&sup1;')      # &#185;
          para.gsub!(/º/, '&ordm;')      # &#186;
          para.gsub!(/»/, '&raquo;')     # &#187;
          para.gsub!(/¼/, '&frac14;')    # &#188;
          para.gsub!(/½/, '&frac12;')    # &#189;
          para.gsub!(/¾/, '&frac34;')    # &#190;
          para.gsub!(/×/, '&times;')     # &#215;
          para.gsub!(/÷/, '&divide;')    # &#247;
          para.gsub!(/¿/, '&iquest;')    # &#191;
          para.gsub!(/À/, '&Agrave;')    # &#192;
          para.gsub!(/Á/, '&Aacute;')    # &#193;
          para.gsub!(/Â/, '&Acirc;')     # &#194;
          para.gsub!(/Ã/, '&Atilde;')    # &#195;
          para.gsub!(/Ä/, '&Auml;')      # &#196;
          para.gsub!(/Å/, '&Aring;')     # &#197;
          para.gsub!(/Æ/, '&AElig;')     # &#198;
          para.gsub!(/Ç/, '&Ccedil;')    # &#199;
          para.gsub!(/È/, '&Egrave;')    # &#200;
          para.gsub!(/É/, '&Eacute;')    # &#201;
          para.gsub!(/Ê/, '&Ecirc;')     # &#202;
          para.gsub!(/Ë/, '&Euml;')      # &#203;
          para.gsub!(/Ì/, '&Igrave;')    # &#204;
          para.gsub!(/Í/, '&Iacute;')    # &#205;
          para.gsub!(/Î/, '&Icirc;')     # &#206;
          para.gsub!(/Ï/, '&Iuml;')      # &#207;
          para.gsub!(/Ð/, '&ETH;')       # &#208;
          para.gsub!(/Ñ/, '&Ntilde;')    # &#209;
          para.gsub!(/Ò/, '&Ograve;')    # &#210;
          para.gsub!(/Ó/, '&Oacute;')    # &#211;
          para.gsub!(/Ô/, '&Ocirc;')     # &#212;
          para.gsub!(/Õ/, '&Otilde;')    # &#213;
          para.gsub!(/Ö/, '&Ouml;')      # &#214;
          para.gsub!(/Ø/, '&Oslash;')    # &#216;
          para.gsub!(/Ù/, '&Ugrave;')    # &#217;
          para.gsub!(/Ú/, '&Uacute;')    # &#218;
          para.gsub!(/Û/, '&Ucirc;')     # &#219;
          para.gsub!(/Ü/, '&Uuml;')      # &#220;
          para.gsub!(/Ý/, '&Yacute;')    # &#221;
          para.gsub!(/Þ/, '&THORN;')     # &#222;
          para.gsub!(/ß/, '&szlig;')     # &#223;
          para.gsub!(/à/, '&agrave;')    # &#224;
          para.gsub!(/á/, '&aacute;')    # &#225;
          para.gsub!(/â/, '&acirc;')     # &#226;
          para.gsub!(/ã/, '&atilde;')    # &#227;
          para.gsub!(/ä/, '&auml;')      # &#228;
          para.gsub!(/å/, '&aring;')     # &#229;
          para.gsub!(/æ/, '&aelig;')     # &#230;
          para.gsub!(/ç/, '&ccedil;')    # &#231;
          para.gsub!(/è/, '&egrave;')    # &#232;
          para.gsub!(/é/, '&acute;')     # &#233;
          para.gsub!(/ê/, '&circ;')      # &#234;
          para.gsub!(/ë/, '&euml;')      # &#235;
          para.gsub!(/ì/, '&igrave;')    # &#236;
          para.gsub!(/í/, '&acute;')     # &#237;
          para.gsub!(/î/, '&icirc;')     # &#238;
          para.gsub!(/ï/, '&iuml;')      # &#239;
          para.gsub!(/ð/, '&eth;')       # &#240;
          para.gsub!(/ñ/, '&ntilde;')    # &#241;
          para.gsub!(/ò/, '&ograve;')    # &#242;
          para.gsub!(/ó/, '&oacute;')    # &#243;
          para.gsub!(/ô/, '&ocirc;')     # &#244;
          para.gsub!(/õ/, '&otilde;')    # &#245;
          para.gsub!(/ö/, '&ouml;')      # &#246;
          para.gsub!(/ø/, '&oslash;')    # &#248;
          para.gsub!(/ù/, '&ugrave;')    # &#250;
          para.gsub!(/ú/, '&uacute;')    # &#251;
          para.gsub!(/û/, '&ucirc;')     # &#252;
          para.gsub!(/ü/, '&uuml;')      # &#253;
          para.gsub!(/þ/, '&thorn;')     # &#254;
          para.gsub!(/ÿ/, '&yuml;')      # &#255;
        end
      end
      self
    end
    def tidywords(wordlist)
      wordlist.each do |x|
        x.gsub!(/&/,'&amp;') unless x =~/&\S+;/
      end
    end
    def markup(para='')
      #if para !~/^<:code>/
        wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
        para=tidywords(wordlist).join(' ').strip
      #end
      para.gsub!(/(^|\s+)<\s+/,'\1&lt; '); para.gsub!(/\s+>(\s+|$)/,' &gt;\1')
      para.gsub!(/<:pb>\s*/,'')
      para.gsub!(/<+[-~]#>+/,'')
      para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'')
      if para !~/^<:code>/
        #embeds a red-bullet image -->
        para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,
	  %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4})
        para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,
	  %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1})
        para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,
          '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune
        para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
          %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})
        para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
	  '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
        para.gsub!(/&nbsp;/,'&#160;')
        #para.gsub!(/&nbsp;/,' ') #clean
      else
        para.gsub!(/(^|[^}])_</m,'\1&lt;'); para.gsub!(/(^|[^}])_>/m,'\1&gt;') #code-block: angle brackets special characters
        para.gsub!(/(^|[^}])_</m,'\1&lt;'); para.gsub!(/(^|[^}])_>/m,'\1&gt;')
        para.gsub!(/&nbsp;/,'&#160;')
      end
      para
    end
    def markup_light(para='')
      para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>')
      para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>')
      para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>')
      para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>')
      para.gsub!(/<br(\s*\/)?>/,'<br />')
      para.gsub!(/<:pb>\s*/,'')
      para.gsub!(/<[-~]#>/,'')
      para.gsub!(/(^|\s)&\s+/,'\1&amp; ') #sort
      para.gsub!(/&([^;]{1,5})/,'&amp;\1') #sort, rough estimate, revisit #WATCH found in node not sax
      para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,
        "<image.path>#{@dir.url.images_local}\/\\1</image.path>")
      para.gsub!(/&nbsp;/,'&#160;')
      #para.gsub!(/&nbsp;/,' ') #clean
      wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
      para=tidywords(wordlist).join(' ').strip
      para
    end
    def markup_fictionbook(para='')
      para.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]')
      para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>')
      para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>')
      para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>')
      para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>')
      para.gsub!(/<br(\s*\/)?>/,'<br />')
      para.gsub!(/<:pb>\s*/,'')
      para.gsub!(/<[-~]#>/,'')
      #temporary -->
      para.gsub!(/<:\S+?>/,'')
      #<-- temporary
      para.gsub!(/<[-~]#>/,'')
      para.gsub!(/(^|\s)&\s+/,'\1&amp; ') #sort
      para.gsub!(/&([^;]{1,5})/,'&amp;\1') #sort, rough estimate, revisit #WATCH found in node not sax
      para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,
        "<image.path>#{@dir.url.images_local}\/\\1</image.path>")
      para.gsub!(/&nbsp;/,'&#160;')
      #para.gsub!(/&nbsp;/,' ') #clean
      wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
      para=tidywords(wordlist).join(' ').strip
      para
    end
    def markup_group(para='')
      para.gsub!(/</,'&lt;'); para.gsub!(/>/,'&gt;')
      para.gsub!(/&lt;:?br(?:\s+\/)?&gt;/,'<br />')
      para.gsub!(/&lt;(link xmlns:xlink=".+?")&gt;/,'<\1>')
      para.gsub!(/&lt;(\/link)&gt;/,'<\1>')
      para.gsub!(/&lt;(\/?en)&gt;/,'<\1>')
      para
    end
  end
end
module SiSU_XML_tags #Format
  require "#{SiSU_lib}/param"
  include SiSU_Param
  include SiSU_Viz
  class RDF
    def initialize(md='',seg_name=[],tracker=0)
      @dc_title=@dc_subtitle=@dc_creator=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@dc_relation=@dc_coverage=@dc_rights=@copyright=@owner=@keywords=''
      #seg_name=%{#{@@seg_name[@@tracker]} - } if @@seg_name[@@tracker]
      @md=md
      @sfx,@pdf=@md.sfx,@md.pdf
      @rdfurl=%{  rdf:about="http://www.jus.uio.no/lm/toc"\n}
      if @md.dc_title                                                            # DublinCore 1 - title
        @rdf_title=%{    dc.title="#{seg_name}#{@md.dc_title}"\n}
        @dc_title=%{  <meta name="dc.title" content="#{seg_name}#{@md.dc_title}" />\n}
      end
      if @md.dc_creator                                                          # DublinCore 2 - creator/author (author)
        @rdf_creator=%{    dc.creator="#{@md.dc_creator}"\n}
        content=meta_content_clean(@md.dc_creator)
        @dc_creator=%{  <meta name="dc.creator" content="#{content}" />\n}
      end
      if @md.dc_subject                                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
        @rdf_subject=%{    dc.subject="#{@md.dc_subject}"\n}
        content=meta_content_clean(@md.dc_subject)
        @dc_subject=%{  <meta name="dc.subject" content="#{content}" />\n}
      end
      if @md.dc_description                                                      # DublinCore 4 - description
        @rdf_description=%{    dc.description="#{@md.dc_description}"\n}
        content=meta_content_clean(@md.dc_description)
        @dc_description=%{  <meta name="dc.description" content="#{content}" />\n}
      end
      if @md.dc_publisher                                                        # DublinCore 5 - publisher (current copy published by)
        @rdf_publisher=%{    dc.publisher="#{@md.dc_publisher}"\n}
        content=meta_content_clean(@md.dc_publisher)
        @dc_publisher=%{  <meta name="dc.publisher" content="#{content}" />\n}
      end
      if @md.dc_contributor                                                      # DublinCore 6 - contributor
        @rdf_contributor=%{    dc.contributor="#{@md.dc_contributor}"\n}
        content=meta_content_clean(@md.dc_contributor)
        @dc_contributor=%{  <meta name="dc.contributor" content="#{content}" />\n}
      end
      if @md.dc_date                                                             # DublinCore 7 - date year-mm-dd
        @rdf_date=%{    dc.date="#{@md.dc_date}"\n}
        @dc_date=%{  <meta name="dc.date" content="#{@md.dc_date}" #{@md.date_scheme} />\n}
      end
      if @md.dc_date_created                                                     # DublinCore 7 - date.created year-mm-dd
        @rdf_date_created=%{    dc.date.created="#{@md.dc_date_created}"\n}
        @dc_date_created=%{  <meta name="dc.date.created" content="#{@md.dc_date_created}" #{@md.date_created_scheme} />\n}
      end
      if @md.dc_date_issued                                                      # DublinCore 7 - date.issued year-mm-dd
        @rdf_date_issued=%{    dc.date.issued="#{@md.dc_date_issued}"\n}
        @dc_date_issued=%{  <meta name="dc.date.issued" content="#{@md.dc_date_issued}" #{@md.date_issued_scheme} />\n}
      end
      if @md.dc_date_available                                                   # DublinCore 7 - date.available year-mm-dd
        @rdf_date_available=%{    dc.date.available="#{@md.dc_date_available}"\n}
        @dc_date_available=%{  <meta name="dc.date.available" content="#{@md.dc_date_available}" #{@md.date_available_scheme} />\n}
      end
      if @md.dc_date_valid                                                       # DublinCore 7 - date.valid year-mm-dd
        @rdf_date_valid=%{    dc.date.valid="#{@md.dc_date_valid}"\n}
        @dc_date_valid=%{  <meta name="dc.date.valid" content="#{@md.dc_date_valid}" #{@md.date_valid_scheme} />\n}
      end
      if @md.dc_date_modified                                                    # DublinCore 7 - date.modified year-mm-dd
        @rdf_date_modified=%{    dc.date.modified="#{@md.dc_date_modified}"\n}
        @dc_date_modified=%{  <meta name="dc.date.modified" content="#{@md.dc_date_modified}" #{@md.date_modified_scheme} />\n}
      end
      if @md.dc_type                                                             # DublinCore 8 - type (genre eg. report, convention etc)
        @rdf_type=%{    dc.type="#{@md.dc_type}"\n}
        content=meta_content_clean(@md.dc_type)
        @dc_type=%{  <meta name="dc.type" content="#{content}" />\n}
      end
      if @md.dc_format                                                           # DublinCore 9 - format (use your mime type)
        @rdf_format=%{    dc.format="#{@md.dc_format}"\n}
        content=meta_content_clean(@md.dc_format)
        @dc_format=%{  <meta name="dc.format" content="#{content}" />\n}
      end
      if @md.dc_identifier                                                       # DublinCore 10 - identifier (your identifier, could use urn which is free)
        @rdf_identifier=%{    dc.identifier="#{@md.dc_identifier}"\n}
        content=meta_content_clean(@md.dc_identifier)
        @dc_identifier=%{  <meta name="dc.identifier" content="#{content}" />\n}
      end
      if @md.dc_source                                                           # DublinCore 11 - source (document source)
        @rdf_source=%{    dc.source="#{@md.dc_source}"\n}
        content=meta_content_clean(@md.dc_source)
        @dc_source=%{  <meta name="dc.source" content="#{content}" />\n}
      end
      if @md.dc_language \
      and @md.dc_language[:name]                                                  # DublinCore 12 - language (English)
        @rdf_language=%{    dc.language="#{@md.dc_language[:name]}"\n}
        @dc_language=%{  <meta name="dc.language" content="#{@md.dc_language[:name]}" />\n}
      end
      if @md.language_original \
      and @md.language_original[:name]
        @rdf_language_original=%{    dc.language="#{@md.language_original[:name]}"\n}
        @language_original=%{  <meta name="dc.language" content="#{@md.language_original[:name]}" />\n}
      end
      if @md.dc_relation                                                         # DublinCore 13 - relation
        @rdf_relation=%{    dc.relation="#{@md.dc_relation}"\n}
        content=meta_content_clean(@md.dc_relation)
        @dc_relation=%{  <meta name="dc.relation" content="#{content}" />\n}
      end
      if @md.dc_coverage                                                         # DublinCore 14 - coverage
        @rdf_coverage=%{    dc.coverage="#{@md.dc_coverage}"\n}
        content=meta_content_clean(@md.dc_coverage)
        @dc_coverage=%{  <meta name="dc.coverage" content="#{content}" />\n}
      end
      if @md.dc_rights                                                           # DublinCore 15 - rights
        @rdf_rights=%{    dc.rights="#{@md.dc_rights}"\n}
        content=meta_content_clean(@md.dc_rights)
        @dc_rights=%{  <meta name="dc.rights" content="#{content}" />\n}
      end
      content=meta_content_clean(@md.keywords)
      @keywords=%{  <meta name="keywords" content="#{content}" />\n} if @md.keywords
      @vz=SiSU_Env::Get_init.instance.skin
    end
    def meta_content_clean(content='')
      unless content.nil?
        content.tr!('"',"'")
      end
      content
    end
    def rdftoc #tocHead #values strung together, because some empty, and resulting output (line breaks) is much better
      #<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
      #         xmlns:dc="http://purl.org/dc/elements/1.1/">
      #  <rdf:Description rdf:about="http://www.jus.uio.no/lm/doc"
      #      dc:creator="Author"
      #      dc:title="Title"
      #      dc:description="Description if any"
      #      dc:date="Publication Date"
      #  />
      #</rdf:RDF>
      #Dublin Core
#### XML only :-( KEEP
#<<WOK
#<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
#     xmlns:dc="http://purl.org/dc/elements/1.1/">
#  <rdf:Description
#    #@rdfurl#@rdf_title#@rdf_subtitle#@rdf_creator#@rdf_subject#@rdf_description#@rdf_publisher#@rdf_contributor#@rdf_date#@rdf_dateCreated#@rdf_dateIssued#@rdf_dateAvailable#@rdf_dateValid#@rdf_dateModified#@rdf_type#@rdf_format#@rdf_identifier#@rdf_source#@rdf_language#@rdf_relation #@rdf_coverage#@rdf_rights
#    />\n
#</rdf:RDF>\n
#WOK
    end
    def rdfseg #segHead
      rdftoc
    end
    def comment_xml(extra='')
      generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})"  if @md.sisu_version[:version]
      lastdone="Last Generated on: #{Time.now}"
      rubyv="Ruby version: #{@md.ruby_version}"
      sc=if @md.sc_info
        "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}"
      else ''
      end
      if extra.empty?
<<WOK
<!-- Document processing information:
     * #{generator}
     * #{rubyv}
     * #{sc}
     * #{lastdone}
     * SiSU http://www.jus.uio.no/sisu
-->
WOK
     else
<<WOK
<!-- Document processing information:
     * #{extra}
     * #{generator}
     * #{rubyv}
     * #{sc}
     * #{lastdone}
     * SiSU http://www.jus.uio.no/sisu
-->
WOK
     end
    end
    def comment_xml_sax
      desc='SiSU XML, SAX type representation'
      comment_xml(desc)
    end
    def comment_xml_node
      desc='SiSU XML, Node type representation'
      comment_xml(desc)
    end
    def comment_xml_dom
      desc='SiSU XML, DOM type representation'
      comment_xml(desc)
    end
    def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better
#{@vz.js_head}
<<WOK
#@dc_title#@dc_subtitle#@dc_creator#@dc_subject#@dc_description#@dc_publisher#@dc_contributor#@dc_date#@dc_date_created#@dc_date_issued#@dc_date_available#@dc_date_valid#@dc_date_modified#@dc_type#@dc_format#@dc_identifier#@dc_source#@dc_language#@dc_relation#@dc_coverage#@dc_rights#@copyright#@owner
#{@vz.txt_generator}
#{@vz.png_ico}
WOK
    end
  end
end
module SiSU_Tables
  require "#{SiSU_lib}/xml_tables"
end
__END__

