# Method library for file collation system

# Copyright (C) 2012 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: Ruby code library for the docoll scripts

require 'digest/md5'
require 'digest/sha1'


# Method definitions
# ~~~~~~~~~~~~~~~~~~
# (in alphabetical order)

def CheckDir( dir, perm )
  if ! File.exists?( dir ); return "\n  Directory #{ dir } does not exist" end
  if ! File.directory?( dir ); return "\n  #{ dir } is not a directory" end
  error_msg = ''
  if ! File.readable?( dir ); error_msg += "\n  Directory #{ dir } is not readable" end
  if ! File.executable?( dir ); error_msg += "\n  Directory #{ dir } is not executable (listable)" end
  if perm == 'w' && ! File.writable?( dir ); error_msg += "\n  Directory #{ dir } is not writeable" end
  return error_msg
end


def CheckParameters( )
  error_msg = ''
  error_msg += CheckDir( $parameters[ "CollationRootDir" ], 'w' )
  $parameters[ "SourceRootDirs" ].each \
  do |source_dir|
    error_msg += CheckDir( source_dir, 'r' )
  end
  if $parameters[ "LogLevel" ] !~ /DEBUG|INFO|WARN|ERROR|FATAL/
      error_msg += "\n  Invalid LogLevel #{ $parameters[ "LogLevel" ] }"
  end
  if $parameters[ "LogToFile" ]
    dir = $parameters[ "LogPath" ].gsub( %r|[^/]*$|, '' )
    error_msg += CheckDir( dir, 'w' )
  end
  return error_msg
end


def CopyFile( source_path, target_path )
  install_out = ` install -D -p #{ ShellEscape( source_path ) } #{ ShellEscape( target_path ) } 2>&1 `
  return $?.exitstatus, install_out
end


def CreateHardLink( original_path, new_path )
  # Ensure new_path's directory exists
  new_path_dir = new_path.sub( %r|/[^/]*$|, '' )
  exitstatus, message = EnsureDirExists( new_path_dir )
  if exitstatus != 0 then return exitstatus, message end

  # TODO: better to use a Ruby method than an OS command but following begin-rescue did not work
  #begin
  #  File.link( collated_file.path, collated_path )
  #  $num_collated_paths_created += 1
  #  # TODO: get error message instead of number -- but how?
  #rescue Errno => errno
  #  # TODO: should this be an error or a warning?
  #  $log.write( 'E', "Failed to link file:" + \
  #    "\n  Original: #{ collated_file.path }" + \
  #    "\n  New: #{ collated_path }" + \
  #    "\n  Error: #{ errno }" \
  #  ) 
  #end
  ln_out = ` ln #{ ShellEscape( original_path ) } #{ ShellEscape( new_path ) } 2>&1 `
  return $?.exitstatus, ln_out
end


def EnsureDirExists( dir )
  # OS mkdir is used because Dir.mkdir does not create missing parent directories
  if Dir.exists?( dir ); return 0, nil end
  mkdir_out = ` mkdir -p #{ ShellEscape( dir ) } 2>&1 `
  return $?.exitstatus, mkdir_out
end


def GetChecksums( path )
  begin
    content = File.read( path )
  rescue => error_info
    $log.write( Log::ERROR, "Unable to read file contents" \
      + "\n  Path: #{ path }" \
      + "\n  #{ error_info }" \
    ) 
    return nil, nil
  end
  md5 = Digest::MD5.hexdigest( content )
  sha1 = Digest::SHA1.hexdigest( content )
  return md5, sha1
end


def GetConfigFileData( fd )
  # TODO: don't treat # in quoted values as a comment
  # TODO: allow comments after data

  continuing = false
  data = ''
  getting_array = false
  getting_hash = false
  key = ''
  begin
    while (line = fd.readline)
      line.strip!

      # Ignore comments and empty lines
      if line.index( '#' ) != nil || line == ''; next end

      # Gather key and value
      data += line
      case data[ -1 ]
        when "\\"
          continuing = true
          data.slice!( -1, 1 )
          data.rstrip!
          next
        when "["
          if getting_array
            return nil, nil, \
              "\n  New array started before end of array started on line" \
              + array_start_line
          end
          if getting_hash
            return nil, nil, \
              "\n  Array started before end of hash started on line" \
              + hash_start_line
          end
          data.slice!( -1, 1 )
          data.rstrip!
          array_start_line = "#{ $INPUT_LINE_NUMBER }"
          if data[ -1 ] != "="
            return nil, nil, \
              "\n  Array start line not of format 'key = [' on line " \
              + array_start_line
          end
          getting_array = true
          array = Array.new
        when "]"
          if ! getting_array
            return nil, nil, \
              + "\n  Array end on line #{ $INPUT_LINE_NUMBER } before array started"
          end
          data.slice!( -1, 1 ).rstrip!
          if data != ''
            return nil, nil, \
              + "\n  Data invalidly given before ] on line #{ $INPUT_LINE_NUMBER }"
          end
          return key, array, ""
        when "{"
          if getting_array
            return nil, nil, \
              "\n  Hash started before end of array started on line" \
              + array_start_line
          end
          if getting_hash
            return nil, nil, \
              "\n  New hash started before end of hash started on line" \
              + hash_start_line
          end
          data.slice!( -1, 1 )
          data.rstrip!
          hash_start_line = "#{ $INPUT_LINE_NUMBER }"
          if data[ -1 ] != "="
            return nil, nil, \
              "\n  Hash start line not of format 'key = {' on line " \
              + hash_start_line
          end
          getting_hash = true
          hash = Hash.new
        when "}"
          if ! getting_hash
            return nil, nil, \
              "\n  Hash end on line #{ $INPUT_LINE_NUMBER } before hash started"
          end
          data.slice!( -1, 1 ).rstrip!
          if data != ''
            return nil, nil, \
              "\n  Data invalidly given before } on line #{ $INPUT_LINE_NUMBER }"
          end
          return key, hash, ""
      end
      if key == ''
        if ! data.include?( '=' )
          return nil, nil, "\n  No = in #{ data }"
        end
        key, data, rest = data.split( '=' )
        if rest != nil
          return nil, nil, "\n '=' not supported in value (" + data + rest + ')'
        end
        key.rstrip!
        if data == nil
          data = ""
        else
          data.lstrip!
        end
      end
      if data != nil && data != ''
        if getting_array
          array += [ data ]
          data = ""
        elsif getting_hash
          # TODO: would be nice to accept "key: value" too
          if ! data.include?( " => " )
            return nil, nil, \
              + "\n  Hash key/value on line #{ $INPUT_LINE_NUMBER } does not have ' => '"
          end
          hash_key, hash_value = data.split( " => " )
          hash = hash.merge!( { hash_key.rstrip => hash_value.lstrip } )
          data = ""
        else
          return key, data, ""
        end
      end
    end
  rescue EOFError
    if continuing
      error_msg += "\n  End of file found when continuation line expected"
    end
    if getting_array
      error_msg += "\n  End of file found before end of array started on line" \
        + array_start_line
    end
    if getting_hash
      error_msg += "\n  End of file found before end of hash started on line" \
        + hash_start_line
    end
    return "", "", ""
  end
end


def GetInodeAndMtime( path )
  begin
    stat = File.stat( path )
    inode = stat.ino
    mtime = stat.mtime
    mtime = mtime.round( 0 )
  rescue => error_info
    $log.write( Log::ERROR, "Unable to stat path" \
      + "\n  Path: #{ path }" \
      + "\n  #{ error_info }" \
    ) 
    return nil, nil
  end
  return inode, mtime
end


def GetMIMEtype( path )
  # The OS file command is used rather than one of the Ruby methods because
  # research suggested it is more dependable.
  # The file command does not set a non-zero return code when it is unable to
  # read the file so errors must be detected by examining the output.
  # Normal output patterns seen look like:
  #   text/x-c++; charset=us-ascii\n
  # or
  #   application/msword application/msword\n
  # When unable to read a file, error output seen looks like
  #   regular file, no read permission\n
  # Based on these output patterns, normality is detected by looking for a / in
  # the first word.
  # The validation regexp means:
  #   One or more non-whitespace
  #   Followed by /
  #   Followed by one or more non-whitespace
  #   Optionally followed by a ;
  #   Optionally followed by a space and any number of any character
  #   Followed by the end of line
  #   Treat any following line end as part of the line.
  file_out = ` file --brief --mime #{ ShellEscape( path  ) } 2>&1 `
  # TODO: strip any trailing \n using the sub below
  file_out.chomp!
  #if file_out =~ /\S+\/\S+;?( .*)?$/m
  if file_out =~ /\S+\/\S+;?( .*)?$/
    # Get the first word, removing any trailing ;
    mime_type = file_out.sub( /;? .*$/m, "" )
  else
    mime_type = "unknown"
  end
  return mime_type
end


def InitialiseParameters
  # In alphabetical order ...

  $parameters = Hash.new

  $parameters[ "ConfigFile" ] = ""

  $parameters[ "CollationRootDir" ] = "/srv/docoll/default/"

  $parameters[ "LeadingDirsToStrip" ] = [ \
    %r|/srv/rsync/|, \
    %r|^[A-Z]/ |, \
    %r|^home/[^/]+/|, \
    %r|.*/All Users/Documents/|, \
    %r|.*/My Documents/|, \
    %r|^My Music/|, \
    %r|^My Pictures/| \
  ]

  $parameters[ "Database" ] = { \
    :db_name => "",
    :host => "",
    :password => "",
    :port => 0,
    :user => ""
  }

  $parameters[ "ExtensionsToInclude" ] = [ \
    ".doc",
    ".docx",
    ".odp",
    ".ods",
    ".odt",
    ".pdf",
    ".pps",
    ".ppsx",
    ".ppt",
    ".pptx",
    ".rtf",
    ".txt",
    ".xls",
    ".xlsx"
  ]

  $parameters[ "LogLevel" ] = "INFO"

  $parameters[ "LogPath" ] = "/var/log/docoll/" \
    + "#{ File.basename( $0 ) }.#{ Time.now.strftime( '%y-%m-%d@%H:%M:%S' ) }.log"

  if $stdout.tty?
    $parameters[ "LogToFile" ] = false
  else
    $parameters[ "LogToFile" ] = true
  end

  $parameters[ "MinimumFileSize" ] = 256

  $parameters[ "PathsToExclude" ] = [ \
    %r|/Thumbs.db$| 
  ]

  $parameters[ "SourceRootDirs" ] = [ "/srv/rsync/" ]

  $parameters[ "SourcesToExclude" ] = [ ]

end


def LogParameters( )
  # TODO: would be nice to show whether the parameters came from defaults, 
  #   configuration file or command line
  info_msg = "Parameters:"
  $parameters.each \
  do |parameter|
    info_msg += "\n  #{ parameter[ 0 ] }: #{ parameter[ 1 ] }"
  end
  $log.write( Log::INFO, info_msg )
end


def NormaliseParameters( )
  $parameters[ "CollationRootDir" ] = NormaliseDir( $parameters[ "CollationRootDir" ] )

  if $parameters[ "ConfigFile" ] != ''
    $parameters[ "ConfigFile" ] = NormalisePath( $parameters[ "ConfigFile" ] )
  end
  
  if $parameters[ "ExtensionsToInclude" ][ 0 ].class == String 
    # The array has come from the config file; ensure lower case
    $parameters[ "ExtensionsToInclude" ].each_index \
    do |x|
      $parameters[ "ExtensionsToInclude" ][ x ].downcase!
    end
  end

  if $parameters[ "LeadingDirsToStrip" ][ 0 ].class == String 
    # The array has come from the config file; convert to array of case-insentive regexps
    $parameters[ "LeadingDirsToStrip" ].each_index \
    do |x|
      regexp = Regexp.new( $parameters[ "LeadingDirsToStrip" ][ x ], Regexp::IGNORECASE )
      $parameters[ "LeadingDirsToStrip" ][ x ] = regexp
    end
  end

  if $parameters[ "PathsToExclude" ][ 0 ].class == String 
    # The array has come from the config file; convert to array of regexps
    $parameters[ "PathsToExclude" ].each_index \
    do |x|
      regexp = Regexp.new( $parameters[ "PathsToExclude" ][ x ] )
      $parameters[ "PathsToExclude" ][ x ] = regexp
    end
  end

  $parameters[ "SourceRootDirs" ].each_index \
  do |x|
    $parameters[ "SourceRootDirs" ][ x ] = NormaliseDir( $parameters[ "SourceRootDirs" ][ x ] )
  end
end


def NormaliseDir( dir )
  return NormalisePath( dir ) + "/"
end


def NormalisePath( path )
  while path.gsub!( /\/\//, "/" ) != nil; end   # Remove any //
  normalised_path = File.absolute_path( path )
  return normalised_path
end


def ParseConfigFile( config_path, *valid_keywords )
  # TODO: change from valid_keywords to desired_keywords?
  # TODO: nice to pass name of variable to load with config data rather than assuming $parameters
  # TODO: support "=" in value

  begin
    fd = File.open( config_path, 'r' )
  rescue => error_info
    return "\n  Config file: #{ error_info }"
  end
  error_msg = ""
  while true
    key, value, get_error_msg = GetConfigFileData( fd )
    if get_error_msg == ''
      if key == ""; break end
      # Validate key
      valid = false
      valid_keywords[ 0 ].each \
      do |valid_keyword|
        if key == valid_keyword; valid = true; break; end
      end
      if valid
        $parameters[ key ] = value
      else
        error_msg += "\n  Invalid keyword '#{ key }' on line number #{ $INPUT_LINE_NUMBER }"
      end 
  
      # Validate value
      if value == ""
        error_msg += "\n  No value on line number #{ $INPUT_LINE_NUMBER }"
      end
    else
      error_msg += get_error_msg
      break
    end
  end
  if error_msg != ''
    error_msg = "Configuration file (#{ config_path }) error(s):" + error_msg
  end
  fd.close
  return error_msg
end


def ShellEscape( str )
  # Returns str modified so it will be the same after shell tokenising
  return "'" + str.gsub( "'", '\'"\'"\'' ) + "'"
end


def StrToTime( str )
  components = str.split( /[-\s:]/ )
  components.map! { |item| item.to_i }
  Time.new( *components ).round( 0 )
end

