#!/usr/bin/perl -w

# apt-cacher-cleanup.pl
# Script to clean the cache for the Apt-cacher package caching system.
#
# Copyright (C) 2005, Eduard Bloch <blade@debian.org>
# Copyright (C) 2002-03, Jonathan Oxer <jon@debian.org>
# Portions  (C) 2002, Jacob Lundberg <jacob@chaos2.org>
# Distributed under the terms of the GNU Public Licence (GPL).


# do locking, not losing files because someone redownloaded the index files
# right then
# use IO::Handle;

use strict;
use Cwd 'abs_path';

use Fcntl ':flock';
use IO::Handle;
use POSIX;
use Getopt::Long qw(:config no_ignore_case bundling pass_through);

my $configfile = '/etc/apt-cacher/apt-cacher.conf';
my $nice_mode=0;
my $verbose=0;
my $help;
my $force;
my $sim_mode=0;
my $offline=0;

my %options = (
    "h|help" => \$help,
    "n|nice" => \$nice_mode,
    "v|verbose" => \$verbose,
    "f|force" => \$force,
    "c|config-file=s" => \$configfile,
    "s|simulate" => \$sim_mode,
    "o|offline" => \$offline
);


&help unless ( GetOptions(%options));
if ($sim_mode) {
  $verbose = 1;
  print "Simulation mode. Just printing what would be done.\n";
}
&help if ($help);

$configfile=abs_path($configfile);

sub help {
    die <<EOM
    Usage: $0 [ -n ] [ -s|v ] [ -o ] [ -f ] [ -c configfile ]
    -n : nice mode, refresh index files first, then renice to 20 and continue
    -s : simulate mode, just print what would be done to package files
    -o : offline mode, don't update index files. Overrides offline_mode from configfile
    -v : verbose mode
    -f : force executing, disable sanity checks
EOM
    ;
}

sub printmsg {
   print @_ if $verbose;
}

#use strict;
#############################################################################
### configuration ###########################################################
# Include the library for the config file parser
push @INC,'/usr/share/apt-cacher/';
require 'apt-cacher-lib.pl';
# Read in the config file and set the necessary variables

# $cfg needs to be global for setup_ownership
our $cfg;

eval {
	 $cfg = read_config($configfile);
};

# not sure what to do if we can't read the config file...
die "Could not read config file: $@" if $@;

my $globlockfile="$$cfg{cache_dir}/private/exlock";
define_global_lockfile($globlockfile);

# check whether we're actually meant to clean the cache
if ( $$cfg{clean_cache} ne 1 ) {
	exit 0;
}
# check offline mode in config
if (defined $$cfg{offline_mode} && $$cfg{offline_mode}) {
	$offline = 1;
}
# change uid and gid
setup_ownership($cfg);

#############################################################################

my %valid;

my $tempdir="$$cfg{cache_dir}/temp";
mkdir $tempdir if !-d $tempdir;
die "Could not create tempdir $tempdir\n" if !-d $tempdir;
unlink (<$tempdir/*>);

### Preparation of the package lists ########################################

chdir "$$cfg{cache_dir}/packages" && -w "." || die "Could not enter the cache dir";

if($> == 0 && !$$cfg{user} && !$force) {
    die "Running $0 as root\nand no effective user has been specified. Aborting.\nPlease set the effective user in $configfile\n";
}

sub get {
    my ($path_info, $filename) = @_;

    my $fh;
    #print "| /usr/share/apt-cacher/apt-cacher.pl -i -c $configfile >/dev/null";
    open($fh, "| REMOTE_ADDR=CLEANUPREFRESH /usr/share/apt-cacher/apt-cacher -i -c $configfile >/dev/null");
    printmsg "GET $path_info\n";
    #printmsg("REMOTE_ADDR=CLEANUPREFRESH /usr/share/apt-cacher/apt-cacher -i -c $configfile >/dev/null\n");
    print $fh "GET $path_info\r\nConnection: Close\r\n\r\n";
    close($fh);
    if($? && ! $force) {
	die "Unable to update $path_info . Network problems?\nRun $0 with -v to get more details.\nCleanup aborted since cached data may be damaged.\n";
    }
}



# file state decisions, lock that area
set_global_lock(": file state decision");
my @ifiles=(<*es.gz>, <*es.bz2>, <*es>, <*_Index>);
release_global_lock();

for (@ifiles) {

   # preserve the index files
   $valid{$_}=1;

   # now refresh them, unless disabled by the setting above
   if(!$offline) {
       printmsg "Downloading: $_\n";
      # if the path is stored there, better use that
      if(-s "../private/$_.complete") {
	 open(my $tmp, "../private/$_.complete");
	 my $url=<$tmp>;
	 &get($url);
	 close $tmp;
      }
      else {
	 my $tmp=$_;
	 $tmp=~s/^/\//;
	 $tmp=~s/_/\//g;
	 &get($tmp);
      }
   }
   else {
     printmsg "Offline: Using existing $_\n";
   }
}

setpriority 0, 0, 20 if $nice_mode;

# use the list of config files we already know
for my $file (@ifiles) {
    printmsg "Reading: $file\n";

    # get both locks and create a temp. copy
    my $tmpfile= "$tempdir/$file";
    set_global_lock(": temporary copy");
    open(my $lck, $file) || do {
	release_global_lock();
	print ("Error: cannot open $file for locking");
	next;
    };
    flock($lck, LOCK_EX);
    link($file, $tmpfile) || do {
	release_global_lock();
	print ("Cannot link $file $tmpfile. Check permissions. $$cfg{cache_dir} must be single filesystem.\n");
	next;
    };
    flock($lck, LOCK_UN);
    close($lck);
    release_global_lock();

    if(-e $tmpfile && -z $tmpfile && $tmpfile=~/(gz|bz2)$/) {
	# moo, junk, empty file, most likely leftovers from previous versions
	# of apt-cacher-cleanup where the junk was "protected" from being
	# deleted. Purge later by not having in %valid.
	# delete $valid{$file}; <- will be recreated RSN either way
      die("Found empty index file $file. Delete this manually or use --force if the repository is no longer interesting. \nExiting to prevent deletion of cache contents.\n") unless $force;
      print "Forced ignoring empty index file $file, apparently undownloadable. All packages referenced by it will be lost!\n";

    }
    else {
	extract_sums($tmpfile, \%valid) || die("Error processing $file in $$cfg{cache_dir}/packages, cleanup stopped.\nRemove the file if the repository is no longer interesting and the packages pulled from it are to be removed.\n");
    }
}

printmsg "Found ".scalar (keys %valid)." valid file entries\n";
#print join("\n",keys %valid);

for(<*.deb>, <*.udeb>, <*.bz2>, <*.gz>, <*.dsc>) {
    # should affect source packages but not index files which are added to the
    # valid list above
    if(! defined($valid{$_})) {
	unlink $_, "../headers/$_", "../private/$_.complete" unless $sim_mode;
	printmsg "Removing source: $_ and company...\n";
    }
}

# similar thing for possibly remaining cruft
chdir "$$cfg{cache_dir}/headers" && -w "." || die "Could not enter the cache dir";

# headers for previously expired files
for(<*.deb>, <*.bz2>, <*.gz>, <*.dsc>) {
   if(! defined($valid{$_})) {
      unlink $_, "../private/$_.complete" unless $sim_mode;
      printmsg "Removing expired headers: $_ and company...\n";
   }
}

# also remove void .complete files, created by broken versions of apt-cacher in rare conditions
chdir "$$cfg{cache_dir}/private" && -w "." || die "Could not enter the cache dir";
for(<*.deb.complete>, <*.bz2.complete>, <*.gz.complete>, <*.dsc.complete>) {
   s/.complete$//;
   if(! (defined($valid{$_}) && -e "../packages/$_" && -e "../headers/$_") ) {
      printmsg "Removing: $_.complete\n";
      unlink "$_.complete" unless $sim_mode;
   }
}

# last step, kill some zombies

my $now = time();
for(<*.notify>) {
    my @info = stat($_);
    # even the largest package should be downloadable in two days or so
    if(int(($now - $info[9])/3600) > 48) {
	printmsg "Removing orphaned notify file: $_\n";
	unlink $_ unless $sim_mode;
    }
}

#&set_global_lock(": cleanup zombies");

chdir "$$cfg{cache_dir}/packages";

for(<*>) {
    # must be empty and not complete and being downloaded right now
    if(-z $_) {
	my $fromfile;
	if(open($fromfile, $_) && flock($fromfile, LOCK_EX|LOCK_NB)) {
	    # double-check, may have changed while locking
	    if(-z $_) {
		printmsg "Removing zombie files: $_ and company...\n";
		unlink $_, "../headers/$_", "../private/$_.complete" unless $sim_mode;
		flock($fromfile, LOCK_UN);
		close($fromfile);
	    }
	}
    }
}

unlink (<$tempdir/*>);
