# Database methods library for the collation system

# Copyright (C) 2011 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: Ruby code library for the docoll scripts' PostgreSQL interface

require './Log'

# Initialise global variables
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
DB_SOURCE_FILES_TBL = "source_files"
DB_COLLATED_FILES_TBL = "collated_files"
DB_COLLATED_PATHS_TBL = "collated_paths"


# Methods
# ~~~~~~~
# (in alphabetical order)

def ConnectToDB( )
  host = $parameters[ "Database" ][ "host" ]
  port = $parameters[ "Database" ][ "port" ]
  db_name = $parameters[ "Database" ][ "db_name" ]
  password = $parameters[ "Database" ][ "password" ]
  user = $parameters[ "Database" ][ "user" ]
  begin
    $conn = PGconn.connect( \
      host, port, "", "", db_name, user, password \
    )
    $log.write( Log::INFO, "Connected to database #{db_name}" )
  rescue PGError => errmsg
    $log.write( Log::FATAL, "Failed to connect to database #{db_name}\n  #{ errmsg }" )
  end
end


def CreateTable( tableName, sql )
  # TODO: the $conn.exec call outputs to stderr; any way to prevent that?
  #   It would upset cron and anyway all output should be logged.
  #   The StringIO atttempt commented out below did not work.
  #   Similar issue as Ruby run-time errors going to stderr.
  #stderr_orig = $stderr.dup
  #buffer = ''
  #$stderr = StringIO.open(buffer, "w")
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, "Failed to create table #{ tableName }" + \
       "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  $log.write( Log::INFO, "Created table '#{ tableName }'" )
  res.clear
end


def CreateTables
  if ! ExistsTable?( DB_SOURCE_FILES_TBL )
    CreateTable( \
      DB_SOURCE_FILES_TBL, \
      "create table #{ DB_SOURCE_FILES_TBL } (
        inode int NOT NULL,
        md5 char( 32 ) NOT NULL,
        mime_type text NOT NULL,
        mtime timestamp NOT NULL,
        path text UNIQUE,
        sha1 char( 40 ) NOT NULL
      );" \
    )
  end
  if ! ExistsTable?( DB_COLLATED_FILES_TBL )
    CreateTable( \
      DB_COLLATED_FILES_TBL, \
      "create table #{ DB_COLLATED_FILES_TBL } (
        inode int NOT NULL,
        md5 char( 32 ) NOT NULL,
        mime_type text NOT NULL,
        mtime timestamp NOT NULL,
        sha1 char( 40 ) NOT NULL,
        UNIQUE ( md5, sha1 )
      );" \
    )
  end
  if ! ExistsTable?( DB_COLLATED_PATHS_TBL )
    CreateTable( \
      DB_COLLATED_PATHS_TBL, \
      "create table #{ DB_COLLATED_PATHS_TBL } (
        inode int NOT NULL,
        path text UNIQUE
      );" \
    )
  end
end


def DeleteCollatedFileFromDB( inode )
  $log.write( Log::DEBUG, "Deleting collated file with inode #{ inode } from the database" )
  sql = "DELETE FROM #{ DB_COLLATED_FILES_TBL }
    WHERE inode = '#{ inode }';"
  begin
    # Start the transaction
    res = $conn.exec( "BEGIN;" )
    res.clear

    # Delete the record
    # TODO: why del here rather than the usual res?
    del = $conn.exec( sql )
    del.clear

    # Commit the transaction
    res = $conn.exec( "COMMIT;" )
    res.clear
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to delete collated file from table #{ DB_COLLATED_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  $log.write( Log::INFO, "Deleted collated file from DB: inode #{ inode }" )
end


def DeleteCollatedPathFromDB( path )
  $log.write( Log::DEBUG, "Deleting collated path from the database." + \
    "\n  path: #{ path }" )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  sql = "DELETE FROM #{ DB_COLLATED_PATHS_TBL }
    WHERE path = '#{ escaped_path }';"
  begin
    # Start the transaction
    res = $conn.exec( "BEGIN;" )
    res.clear

    # Delete the record
    res = $conn.exec( sql )
    res.clear

    # Commit the transaction
    res = $conn.exec( "COMMIT;" )
    res.clear
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to delete collated path from table #{ DB_COLLATED_PATHS_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  $log.write( Log::INFO, "Deleted collated path from DB: #{ path }" )
  $num_collated_paths_deleted += 1
end


def DeleteCollatedFilePathsFromDbIfGone( inode )
  sql = "SELECT path
    FROM #{DB_COLLATED_PATHS_TBL}
    WHERE inode = '#{ inode }';"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup paths from table #{DB_COLLATED_PATHS_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  paths = Array.new
  paths_left = 0
  res.each \
  do |row|
      # TODO: more efficient to do a query than an exec? (would not need to lookup data by name string)
      path = "#{ row[ "path" ] }"
      if File.exists?( path )
        paths_left = paths_left + 1
      else
        $log.write( Log::INFO, "Deleting path from DB (does not exist): #{ path }" )
        DeleteCollatedPathFromDB( path )   
        next
      end
  end
  return paths_left
end


def DeleteSourceFileFromDB( path )
  $log.write( Log::DEBUG, "DeleteSourceFileFromDB: #{ path }" )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  sql = "DELETE FROM #{ DB_SOURCE_FILES_TBL }
    WHERE path = '#{ escaped_path }';"
  $log.write( Log::DEBUG, "DeleteSourceFileFromDB: SQL is:\n  #{ sql }" )
  begin
    # Start the transaction
    res = $conn.exec( "BEGIN;" )
    res.clear

    # Delete the record
    # TODO: why del here rather than the usual res?
    # TODO: why set del and then clear it without referencing the contents?
    del = $conn.exec( sql )
    del.clear

    # Commit the transaction
    # TODO: why set res and then clear it without referencing the contents?
    res = $conn.exec( "COMMIT;" )
    res.clear
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to delete source file record from table #{ DB_SOURCE_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
end


def ExistsTable?( tablename )
  my_return = false
  res = $conn.exec( "select * from pg_tables;" )
  res.each \
  do |tupl|
    if tupl[ "tablename" ] == tablename
      my_return = true
      break
    end
  end
  res.clear
  return my_return
end


def InsertCollatedFileIntoDB( inode, md5, mime_type, mtime, sha1 )
  $log.write( Log::DEBUG, "Inserting collated file with inode #{ inode } into the database" )
  sql = "INSERT INTO #{ DB_COLLATED_FILES_TBL }
    ( inode, md5, mime_type, mtime, sha1 )
    VALUES ( '#{ inode }', '#{ md5 }', '#{ mime_type }', '#{ mtime }', '#{ sha1 }' );"
  begin
    # Start the transaction
    res = $conn.exec( "BEGIN;" )
    res.clear

    # Insert the record
    res = $conn.exec( sql )
    res.clear

    # Commit the transaction
    res = $conn.exec( "COMMIT;" )
    res.clear
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to insert collated file into table #{ DB_COLLATED_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
end


def InsertCollatedPathIntoDB( inode, path )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  sql = "INSERT INTO #{ DB_COLLATED_PATHS_TBL }
    ( inode, path )
    VALUES ( '#{ inode }', '#{ escaped_path }' );
  "
  begin
    # Start the transaction
    res = $conn.exec( "BEGIN;" )
    res.clear

    # Insert the record
    res = $conn.exec( sql )
    res.clear

    # Commit the transaction
    res = $conn.exec( "COMMIT;" )
    res.clear
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to insert collated path into table #{ DB_COLLATED_PATHS_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
end


def InsertSourceFileIntoDB( inode, md5, mime_type, mtime, path, sha1 )
  $log.write( Log::DEBUG, \
    "Inserting source file record into the database for #{ path }" \
  )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  sql = "INSERT INTO #{ DB_SOURCE_FILES_TBL }
    ( inode, md5, mime_type, mtime, path, sha1 )
    VALUES ( '#{ inode }', '#{ md5 }', '#{ mime_type }', '#{ mtime }', 
      '#{ escaped_path }', '#{ sha1 }' );
  "
  begin
    # Start the transaction
    res = $conn.exec( "BEGIN;" )
    res.clear

    # Insert the record
    res = $conn.exec( sql )
    res.clear

    # Commit the transaction
    res = $conn.exec( "COMMIT;" )
    res.clear
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to insert source file into table #{ DB_SOURCE_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  $num_source_files_created += 1
end


def IsCollatedPathInDB( path )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  # TODO: no need to get path from DB, just want to know if matching record exists
  sql = "SELECT path
    FROM #{DB_COLLATED_PATHS_TBL}
    WHERE path = '#{escaped_path}';"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated paths from table #{DB_COLLATED_PATHS_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  tuples = res.num_tuples

  case tuples
  when 0
    res.clear
    return false
  when 1
    res.clear
    return true
  else
    $log.write( Log::FATAL, "Invalid number of tuples returned when looking up collated file" + \
      " by path from table #{DB_COLLATED_FILES_TBL}: #{res.num_tuples}" )
    res.clear
  end
end


def LookupAllCollatedFileInodes( )
  sql = "SELECT inode
    FROM #{DB_COLLATED_FILES_TBL};"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated paths from table #{DB_COLLATED_FILES_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end

  inodes = Array.new
  res.each \
  do |row|
      # TODO: more efficient to do a query than an exec? (would not need to lookup data by name string)
      inode = "#{ row[ "inode" ] }"
      inodes += [ inode ]
  end
  return inodes
end


def LookupCollatedFileByChecksums( md5, sha1 )
  sql = "SELECT inode, mime_type, mtime
    FROM #{ DB_COLLATED_FILES_TBL }
    WHERE md5 = '#{ md5 }' and sha1 = '#{ sha1 }';"
  $log.write( Log::DEBUG, "LookupCollatedFileByChecksums: SQL is:\n  #{ sql }" )
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated file by checksums from table #{DB_COLLATED_FILES_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  tuples = res.num_tuples

  case tuples
  when 0
    res.clear
    return [ false, nil, nil, nil ]
  when 1
    inode     = res.getvalue( 0, 0 ).to_i
    mime_type = res.getvalue( 0, 1 )
    mtime     = StrToTime( res.getvalue( 0, 2 ) )
    res.clear
    return [ true, inode, mime_type, mtime ]
  else
    $log.write( Log::FATAL, "Invalid number of tuples returned when looking up collated file" + \
      " by checksums from table #{DB_COLLATED_FILES_TBL}: #{res.num_tuples}" )
  end
end


def LookupCollatedFileByInode( inode )
  sql = "SELECT md5, mime_type, mtime, sha1
    FROM #{DB_COLLATED_FILES_TBL}
    WHERE inode = '#{inode}';"
  #$log.write( Log::DEBUG, "LookupCollatedFileByInode: SQL is:\n  #{ sql }" )
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated file by inode from table #{DB_COLLATED_FILES_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  tuples = res.num_tuples

  case tuples
  when 0
    res.clear
    return [ false, nil, nil, nil, nil ]
  when 1
    md5       = res.getvalue( 0, 0 )
    mime_type = res.getvalue( 0, 1 )
    mtime     = StrToTime( res.getvalue( 0, 2 ) )
    path      = res.getvalue( 0, 3 )
    sha1      = res.getvalue( 0, 4 )
    res.clear
    return [ true, mime_type, mtime, md5, sha1 ]
  else
    $log.write( Log::FATAL, "Invalid number of tuples returned when looking up collated file" + \
      "by inode from table #{DB_COLLATED_FILES_TBL}: #{res.num_tuples}" )
  end
end


def LookupCollatedPathByInode( inode )
  sql = "SELECT path
    FROM #{DB_COLLATED_PATHS_TBL}
    WHERE inode = '#{inode}';"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated paths by inode from table #{DB_COLLATED_PATHS_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  res.each \
  do |row|
      # TODO: more efficient to do a query than an exec? (would not need to lookup data by name string)
      path = "#{ row[ "path" ] }"
      if File.exists?( path )
        $log.write( Log::DEBUG, "LookupCollatedPathByInode: returning #{ path }" )
        return path
      else
        DeleteCollatedPathFromDB( path )   
        $log.write( Log::INFO, "Deleted from DB (does not exist): #{ path }" )
        next
      end
      #$log.write( Log::DEBUG, "LookupCollatedPathByInode: adding path to paths: #{ path }" )
  end
  $log.write( Log::DEBUG, "LookupCollatedPathByInode: returning nil" )
  return nil
end


def LookupCollatedPathsByInode( inode )
  sql = "SELECT path
    FROM #{DB_COLLATED_PATHS_TBL}
    WHERE inode = '#{inode}';"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated paths by inode from table #{DB_COLLATED_PATHS_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  paths = Array.new
  res.each \
  do |row|
      # TODO: more efficient to do a query than an exec? (would not need to lookup data by name string)
      path = "#{ row[ "path" ] }"
      #$log.write( Log::DEBUG, "LookupCollatedPathsByInode: path: #{ path }" )
      if ! File.exists?( path )
        DeleteCollatedPathFromDB( path )   
        $log.write( Log::INFO, "Deleted from DB (does not exist): #{ path }" )
        next
      end
      #$log.write( Log::DEBUG, "LookupCollatedPathsByInode: adding path to paths: #{ path }" )
      paths += [ path ]
  end
  #$log.write( Log::DEBUG, "LookupCollatedPathsByInode: returning paths: #{ paths }" )
  return paths
end


def LookupSourceFileByPath( path )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  sql = "SELECT inode, md5, mime_type, mtime, sha1
    FROM #{ DB_SOURCE_FILES_TBL }
    WHERE path = '#{ escaped_path }';"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup source file by path from table #{ DB_SOURCE_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  tuples = res.num_tuples

  case tuples
  when 0
    res.clear
    return [ false, nil, nil, nil, nil ]
  when 1
    inode     = res.getvalue( 0, 0 ).to_i
    md5       = res.getvalue( 0, 1 )
    mime_type = res.getvalue( 0, 2 )
    mtime     = StrToTime( res.getvalue( 0, 3 ) )
    sha1      = res.getvalue( 0, 4 )
    res.clear
    return [ true, inode, md5, mime_type, mtime, sha1 ]
  else
    $log.write( Log::FATAL, "Invalid number of tuples returned when looking up source file" + \
      " by path from table #{DB_SOURCEFILES_TB}: #{res.num_tuples}" )
  end
end


def LookupSourcePaths( )
  sql = "SELECT path
    FROM #{DB_SOURCE_FILES_TBL};"
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to lookup collated paths from table #{DB_SOURCE_PATHS_TBL}" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end

  $log.write( Log::INFO, \
    "Found #{ res.num_tuples } source files in DB;" \
    + " deleting any that do not exist in source directory tree(s)" \
  )
  paths = Array.new
  res.each \
  do |row|
      # TODO: more efficient to do a query than an exec? (would not need to lookup data by name string)
      path = "#{ row[ "path" ] }"
      #$log.write( Log::DEBUG, "LookupSourcePaths: path: #{ path }" )
      if ! File.exists?( path )
        DeleteSourceFileFromDB( path )   
        $num_source_files_deleted += 1
        $log.write( Log::INFO, "Deleted from DB (file does not exist): #{ path }" )
        next
      end
      #$log.write( Log::DEBUG, "LookupSourcePaths: adding path to paths: #{ path }" )
      paths += [ path ]
  end
  #$log.write( Log::DEBUG, "LookupSourcePaths: returning paths: #{ paths }" )
  return paths
end


def UpdateCollatedFileInDB( inode, md5, mime_type, mtime, sha1 )
  $log.write( Log::DEBUG, "Updating collated file record for inode #{ inode }" )
  sql = "UPDATE #{ DB_COLLATED_FILES_TBL }
    SET md5 = '#{ md5 }', mime_type = '#{ mime_type }', mtime = '#{ mtime }', 
      sha1 = '#{ sha1 }'
    WHERE inode = '#{ inode }';
  "
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to update collated file in table #{ DB_COLLATED_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  res.clear
end


def UpdateSourceFileInDB( inode, md5, mime_type, mtime, path, sha1 )
  $log.write( Log::DEBUG, "Updating source file record in the database for #{ path }" )
  escaped_path = path.gsub( /'/,'\'\'' ) # escape the ' chars for SQL
  sql = "UPDATE #{ DB_SOURCE_FILES_TBL }
    SET md5 = '#{ md5 }', mime_type = '#{ mime_type }', mtime = '#{ mtime }', 
      path = '#{ escaped_path }', sha1 = '#{ sha1 }'
    WHERE inode = '#{ inode }';
  "
  begin
    res = $conn.exec( sql )
  rescue PGError => errmsg
    $log.write( Log::FATAL, 
      "Failed to update source file in table #{ DB_SOURCE_FILES_TBL }" + \
        "\n  SQL: #{ sql }\n  #{ errmsg }" 
    )
  end
  res.clear
end
