#! /usr/bin/ruby -wEUTF-8:UTF-8

# Copyright (C) 2011 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: harmonises the docoll scripts' database with the collation
#   directory tree and the source directory tree(s) in the file system

require 'English'
require 'find'
require 'getoptlong'
require 'pg'

require './CollatedFile'
require './Log'
require './SourceFile'
require './docoll_db_lib'
require './docoll_lib'


# Method definitions
# ~~~~~~~~~~~~~~~~~~
# (in alphabetical order)

def CheckFileSystemCollatedPathsAreInDB( )
  # TODO: any way to get length of an enumerable collection and so drop the next call?
  paths = GetCollationPathsFromTree( $parameters[ "CollationRootDir" ] )
  $log.write( Log::INFO, \
    "Found #{ paths.length } paths in collation directory tree; removing" \
    + " any of non-included and ensuring rest are in DB" \
  )
  paths.each \
  do |path|
    path = File.absolute_path( path )

    # Check path is validly (UTF-8) encoded 
    # TODO: make this a library call
    if ! path.valid_encoding?
      $log.write( Log::ERROR, "Invalid encoding in path name #{ path }. Manual renaming may fix this problem" )
      next    
    end 

    # Exclusions
    reason = GetAnyFileTypeExclusionReason( path )
    if reason == "directory"
      if ( Dir.entries( path ) - %w{ . .. } ).empty?
        begin
          Dir.delete( path )
        rescue => error_info
          $log.write( Log::ERROR, "Unable to remove empty directory" \
            + "\n  Path: #{ path }" \
            + "\n  #{ error_info }" \
          ) 
          next
        end
        $num_empty_dirs_removed_from_fs += 1
        $log.write( Log::INFO, "Removed empty directory: #{ path }" )
      end
      next
    elsif reason == "not a file"
      $log.write( Log::ERROR, "Not a file: #{ path }" )
      next
    elsif reason == "small" || reason == "symlink" \
        || reason == "not an included extension"
      begin
        File.delete( path )
      rescue => error_info
        $log.write( Log::ERROR, "Unable to remove file" \
          + "\n  Path: #{ path }" \
          + "\n  #{ error_info }" \
        ) 
        next
      end
      $num_collated_files_removed_from_fs += 1
      $log.write( Log::WARN, "Removed file (#{ reason }): #{ path }" )
      next
    elsif reason != "none"
      $log.write( Log::ERROR, \
        "CheckFileSystemCollatedPathsAreInDB: programming error:" \
        + " unexpected return '#{ reason }' from GetAnyFileTypeExclusionReason" \
        + " for path #{ path }" \
      )
      next
    end

    in_db = IsCollatedPathInDB( path )
    if ! in_db 
      $log.write( Log::INFO, "Collated path not in DB: " + path )

      # Instantiate a containing CollatedFile
      md5, sha1 = GetChecksums( path )
      begin
        collated_file = CollatedFile.new( md5, sha1, path )
      rescue
        $log.write( Log::ERROR, "CollatedFile.new failed" )
        next
      end

      # Insert database records
      if ! collated_file.in_db
        $log.write( Log::INFO, "Inserting DB record for collated file:" + \
          "\n  inode: #{ collated_file.inode } (Path: #{ path })"
        )
        collated_file.insert_into_db
        $num_collated_files_created += 1
      end
      $log.write( Log::INFO, "Inserting DB record for collated path:" + \
        "\n  Path: #{ path }"
      )
      InsertCollatedPathIntoDB( collated_file.inode, path )
      $num_collated_paths_created += 1
    end
  end
end


def Finalise( exitcode, *msg )
  # TODO: might be nice to have a quiet option, eg for use with command line errors

  # Log any optional message
  if msg.length > 0; $log.write( Log::INFO, msg[ 0 ] ) end

  # Close the database connection
  if $databaseConnected
    $log.write( Log::INFO, "Closing database connection" )
    $conn.close
  end

  # Final logging
  $log.write( Log::INFO, "Database record changes:\n" + \
    "  Collated files created: #{ $num_collated_files_created }\n" + \
    "  Collated files deleted: #{ $num_collated_files_deleted }\n" + \
    "  Collated paths created: #{ $num_collated_paths_created }\n" + \
    "  Collated paths deleted: #{ $num_collated_paths_deleted }\n" + \
    "  Source files deleted: #{ $num_source_files_deleted }" \
  )
  $log.write( Log::INFO, "Collation directory tree changes:\n" + \
    "  Files removed: #{ $num_collated_files_removed_from_fs }\n" + \
    "  Directories removed (empty): #{ $num_empty_dirs_removed_from_fs }" \
  )
  if $log.n_warnings > 0
    if $log.n_warnings == 1
      $log.write( Log::WARN, "There was one warning" )
    else
      $log.write( Log::WARN, "There were #{ $log.n_warnings } warnings" )
    end
    if exitcode == 0; exitcode = 1 end
  end
  if $log.n_errors > 0
    if $log.n_errors == 1
      $log.write( Log::ERROR, "There was one error" )
    else
      $log.write( Log::ERROR, "There were #{ $log.n_errors } errors" )
    end
    if exitcode == 0; exitcode = 1 end
  end
  $log.write( Log::INFO, "#{ File.basename( $0 ) }: exiting with exitcode #{ exitcode }" )
  $log.close

  # Bye!
  exit exitcode
end


def GetCollationPathsFromTree( dir )
  paths = []
  # Use the OS find command because Ruby's Find.find does not have a depth
  # option which is necessary when deleting a file or directory results in
  # its parent directory becoming empty
  ` find #{ ShellEscape( dir ) } -depth 2>&1 `.each_line \
  { |path|
    paths << path.chomp!
  }
  return paths
end


def Initialise
  # Disable common traps until logging and initialisation required for Finalise( )
  # is completed
  trap( "INT" ) { }
  trap( "HUP" ) { }
  trap( "QUIT" ) { }
  trap( "TERM" ) { }

  # Set default parameters
  InitialiseParameters( )

  # Parse any config file
  # Must do now so config file settings can be overriden by command line
  x = ARGV.index( "--config" ) 
  if x != nil && ARGV[ x + 1 ] != nil
    config_file_error_msg = ParseConfigFile( ARGV[ x + 1 ], $parameters.keys )
  else
    config_file_error_msg = ''
  end

  # Parse command line
  # Must do now in case "--help" is given or there are any logging options
  # Save the options and arguments because GetoptsLong in ParseCommandLine will empty ARGV :-(
  opts_and_args = ARGV.join( ' ' )
  cmd_line_error_msg = ParseCommandLine( )

  # Set up logging
  now = "#{ Time.now.strftime( '%y-%m-%d@%H:%M:%S' ) }"
  if $parameters[ "LogToFile" ]
    # TODO: pass path to $Log.new when it can accept
    # TODO: error trap the File.open (which would be better in the Log class anyway)
    log_fd = File.open( $parameters[ "LogPath" ], 'w' )
    timestamps = true
  else
    log_fd = $stdout
    timestamps = false
  end
  $log = Log.new( log_fd, $parameters[ "LogLevel" ], timestamps )

  # Initialisation required before Finalise can be called
  # (logging FATAL messages calls Finalise)
  $databaseConnected = false
  $num_collated_files_created = 0
  $num_collated_files_deleted = 0
  $num_collated_files_removed_from_fs = 0
  $num_collated_paths_created = 0
  $num_collated_paths_deleted = 0
  $num_empty_dirs_removed_from_fs = 0
  $num_source_files_deleted = 0

  # Report any command line or config errors
  if cmd_line_error_msg != ''
    $log.write( Log::ERROR, cmd_line_error_msg )
    Usage( "not verbose" )
    Finalise( 1 )
  end
  if config_file_error_msg != ''
    $log.write( Log::FATAL, config_file_error_msg )
  end

  # Set common traps
  trap( "HUP" ) { Finalise( 129, "Received signal HUP" ) }
  trap( "INT" ) { Finalise( 130, "Received signal INT" ) }
  trap( "QUIT" ) { Finalise( 131, "Received signal QUIT" ) }
  trap( "TERM" ) { Finalise( 143, "Received signal TERM" ) }

  # Log startup message
  $log.write( Log::INFO, \
    "#{ File.basename( $0 ) } started at #{ now } by " + \
    "#{ $0 } #{ opts_and_args }" \
  )

  # Normalise, log and check $parameters
  NormaliseParameters( )
  LogParameters( )
  error_msg = CheckParameters( )
  if error_msg != '' 
    $log.write( Log::FATAL, "Parameter error(s):" + error_msg )
  end

  # Initialisation for database actions
  # TODO: use at_exit to ensure connection dropped?
  ConnectToDB( )
  $databaseConnected = true
  CreateTables( )
end


def ParseCommandLine( )
  # Options that require an argument are marked OPTIONAL_ARGUMENT so this
  #   script can handle missing arguments itself
  opts = GetoptLong.new(
    [ '--config', GetoptLong::OPTIONAL_ARGUMENT ],
    [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
    [ '--log', GetoptLong::OPTIONAL_ARGUMENT ],
    [ '--loglevel', GetoptLong::OPTIONAL_ARGUMENT ]
  )
  # Option arguments that may later be changed are duplicated
  # (strings in ARGV[ ] are frozen)
  error_msg = ''
  opts.each \
  do |opt, arg|
    case opt
    when "--config"
      if arg != ''
        $parameters[ "ConfigFile" ] = arg.dup
      else
        error_msg += "\n  '--config' argument missing"
      end
    when "--help"
      Usage( "verbose" )
      exit( 0 )
    when "--log"
      $parameters[ "LogToFile" ] = true
      if arg != ''
        $parameters[ "LogPath" ] = arg.dup
      end
    when "--loglevel"
      case arg
        when 'D' 
          $parameters[ "LogLevel" ] = Log::DEBUG
        when 'I' 
          $parameters[ "LogLevel" ] = Log::INFO
        when 'W' 
          $parameters[ "LogLevel" ] = Log::WARN
        when 'E' 
          $parameters[ "LogLevel" ] = Log::ERROR
        when 'F' 
          $parameters[ "LogLevel" ] = Log::FATAL
        when ''
          error_msg += "\n  '--loglevel' argument missing"
        else
          error_msg += "\n  Invalid '--loglevel' argument: '#{ arg }'"
      end
    else
      error_msg += "\n  Invalid option, '#{ opt }'"
    end
  end
  if ARGV.length != 0
    error_msg += "\n  Invalid argument(s) after options and their arguments: '#{ ARGV.join( ' ' ) }'"
  end
  if error_msg != ''
    error_msg = "Command line error(s):" + error_msg
  end
  return error_msg
end
  

def Usage( verbosity )
  # If logging not set up, set up default logging
  # This is required when "--help" is given on the command line
  if $log == nil
    log_fd = $stdout
    timestamps = false
    $log = Log.new( log_fd, $parameters[ "LogLevel" ], timestamps )
  end

  # Display usage
  $log.write( Log::INFO, "Usage: #{File.basename( $0 )} " + \
    "[--config config_file] [--help] [--log [log_file]] [--loglevel level]" \
  )
  if verbosity == "verbose"
    $log.write( Log::INFO, \
      "  --config: names the configuration file" + \
      "\n  --help: print this help message and exit" + \
      "\n  --log" + \
      "\n    log_file given: specify the log_file" + \
      "\n    log_file not given: log to the default log file" + \
      "\n  --loglevel: set lowest log level messages to log.  In order:" + \
      "\n    D for debug" + \
      "\n    I for informtion" + \
      "\n    W for warning" + \
      "\n    E for error" + \
      "\n    F for fatal" \
  )
  end
end


# Execute
# ~~~~~~~
Initialise( )
CheckFileSystemCollatedPathsAreInDB( )
inodes = LookupAllCollatedFileInodes( )
$log.write( Log::INFO, "Found #{ inodes.length } inodes in DB;" \
  + " deleting any of their paths that do not exist" \
)
inodes.each { |inode|
  paths_left = DeleteCollatedFilePathsFromDbIfGone( inode )
  if paths_left == 0
    $log.write( Log::INFO, "No paths left for inode #{ inode }; deleting file from DB" )
    DeleteCollatedFileFromDB( inode )
    $num_collated_files_deleted += 1
  end
}
LookupSourcePaths( ) # Deletes DB records if paths do not exist
Finalise( 0 )
