#
# scanmail.rb
#
#   Copyright (c) 1998-2001 Minero Aoki <aamine@dp.u-netsurf.ne.jp>
#
#   This program is free software.
#   You can distribute/modify this program under the terms of
#   the GNU Lesser General Public License version 2 or later.
#

require 'scanner'
begin
  require 'tmail/mails.so'
rescue LoadError


module TMail

  class MailScanner < Scanner

    Version = Version_R = '0.8.18'
    Version.freeze


    def initialize( str, header, comments )
      super( str )
      @header = header
      @comments = comments

      @atom_mode = :atom
      @recv_mode = false

      case header
      when 'CTypeH', 'CEncodingH', 'CDispositionH'
        @atom_mode = :token
      when 'RecvH'
        @recv_mode = true
      end
    end


    # exp

    atomchars  = Regexp.quote( "\#!$%&`'*+{|}~^/=?" ) + '\\-'
    tokenchars = Regexp.quote( "\#!$%&`'*+{|}~^." )   + '\\-'
    eucchars   = "\xa1\xa1-\xf3\xfe"
    jisstr     = '\\e..[^\\e]*\\e..'

    ATOM    = /\A[\w#{atomchars}#{eucchars}]+|#{jisstr}/
    TOKEN   = /\A[\w#{tokenchars}#{eucchars}]+|#{jisstr}/
    DIGIT   = /\A\d+\z/

    LWSP    = /\A(?:\n|\r\n|\r)?[ \t]+/

    BACKSLASH = /\A\\/

    BEGIN_Q = /\A"/
    Q_ENT   = /\A[^"\\\e]+|#{jisstr}/
    END_Q   = /\A"/

    BEGIN_C = /\A\(/
    C_ENT   = /\A[^\)\(\\\e]+|#{jisstr}/
    END_C   = /\A\)/

    BEGIN_D = /\A\[/
    D_ENT   = /\A[^\]\\]+|#{jisstr}/
    END_D   = /\A\]/


    RECV_TOKEN = {
      'from' => :FROM,
      'by'   => :BY,
      'via'  => :VIA,
      'with' => :WITH,
      'id'   => :ID,
      'for'  => :FOR
    }

    OMIT = '$omit'

    def scan( ret )
      sret = nil
      vret = OMIT

      until sret do
        unless @scan.rest? then
          sret = false; vret = '$'
          break
        end

        @scan.skip LWSP

        case @atom_mode
        when :atom
          if vret = @scan.scan( ATOM ) then
            sret = :ATOM

            if DIGIT === vret then
              sret = :DIGIT
              vret = vret
            elsif @recv_mode then
              sret = RECV_TOKEN.fetch( vret.downcase, sret )
            end
            break
          end

        when :token
          if vret = @scan.scan( TOKEN ) then
            sret = :TOKEN
            break
          end

        else
          bug! 'atom mode is not atom/token'
        end

        if @scan.skip( BEGIN_Q ) then
          sret = :QUOTED
          vret = quoted

        elsif @scan.skip( BEGIN_C ) then
          @comments.push comment if @comments

        elsif @scan.skip( BEGIN_D ) then
          sret = :DOMLIT
          vret = domlit

        else
          sret = vret = @scan.getch
        end
      end

      ret[0] = sret
      ret[1] = vret
      debug_report ret if @debug

      ret
    end



    private


    def quoted
      ret = ''
      while true do
        if    temp = @scan.scan( Q_ENT ) then ret << temp
        elsif @scan.skip( END_Q )        then break
        elsif @scan.skip( BACKSLASH )    then ret << @scan.getch
        else
          unless @scan.rest? then
            scan_error! "found unterminated quoted-string"
          end
          bug! 'in quoted, no match'
        end
      end

      ret
    end

    
    def comment
      ret = ''
      nest = 1

      while nest > 0 and @scan.rest? do
        if    temp = @scan.scan( C_ENT ) then ret << temp
        elsif @scan.skip( END_C )        then nest -= 1
        elsif @scan.skip( BEGIN_C )      then nest += 1
        elsif @scan.skip( BACKSLASH )    then ret << @scan.getch
        else
          unless @scan.rest? then
            scan_error! "found unterminated comment"
          end
          bug! 'in comment, no match'
        end
      end
      if nest > 0 then
        scan_error! "found unterminated comment"
      end

      ret
    end

    
    def domlit
      ret = ''

      while true do
        if temp = @scan.scan( D_ENT ) then
          ret << temp
        end
        if    @scan.skip( END_D )     then break
        elsif @scan.skip( BACKSLASH ) then ret << @scan.getch
        else
          unless @scan.rest? then
            scan_error! "found unterminated domain literal"
          end
          bug! 'in domlit, no match'
        end
      end

      ret
    end

  end   # class TMail::MailScanner

end   # module TMail

end   # ifndef mails.so
