#!/usr/bin/perl -w
#
# mkjigsnap
#
# (c) 2004-2011 Steve McIntyre <steve@einval.com>
#
# Server-side wrapper; run this on a machine with a mirror to set up
# the snapshots for jigit / jigdo downloading
#
# GPL v2 - see COPYING 
#
# This script can be run in two modes:
#
# 1. To build a jigit .conf file for a single jigdo file:
#    add the "-n" option with a CD name on the command line
#    and only specify a single jigdo to work with using "-j".
#
# 2. To build a snapshot tree for (potentially multiple) jigdo files:
#    do *not* specify the "-n" option, and list as many jigdo files as
#    desired, either on the command line using multiple "-j <jigdo>" options
#    or (better) via a file listing them with the "-J" option.
#
# Some things needed:
#   (single-jigdo mode only) the CD name of the jigit
#   (single-jigdo mode only) the output location; where the jigdo, template
#      file and snapshot will be written
#   (single-jigdo mode only) the locations of the input jigdo and template
#      files
#   the location of the mirror
#   the keyword(s) to look for (e.g. Debian)
#   the snapshot dirname (e.g. today's date)
#
# Example #1: (single-jigdo mode, used for Ubuntu jigit generation)
#
#   mkjigsnap -o /tmp/mjs-test -n mjs-test -m /tmp/mirror \
#        -j ~/jigdo/update/debian-update-3.0r2.01-i386.jigdo \
#        -t ~/jigdo/update/debian-update-3.0r2.01-i386.template \
#        -k Debian -k Non-US
#        -d 20041017
#
#   (This creates a single jigit conf file using the supplied jigdo/template
#    file pair, looking for jigdo references to files in the "Debian" and
#    "Non-US" areas. Output the files into /tmp/mjs-test and call them
#    "mjs-test.<ext>", creating a snapshot of the needed files in
#    /tmp/mjs-test/20041017 by linking files from /tmp/mirror as needed.)
#
# Example #2: (multi-jigdo mode, as run to keep
#              http://us.cdimage.debian.org/cdimage/snapshot/ up to date)
#
# mkjigsnap -m /org/ftp/debian -J ~/jigdo.list \
#      -k Debian \
#      -d /org/jigdo-area/snapshot/Debian \
#      -f ~/mkjigsnap-failed.log \
#      -i ~/mkjigsnap-ignore.list
#
#   (This reads in all the jigdo files listed in ~/jigdo.list, building a
#    list of all the files referenced in the "Debian" area. It will then
#    attempt to build a snapshot tree of all those files under
#    /org/jigdo-area/snapshot/Debian by linking from /org/ftp/debian. Any
#    files that are missing will be listed into the output "missing" file
#    ~/mkjigsnap-failed.log for later checking, UNLESS they are already listed
#    in the "ignore" file ~/mkjigsnap-ignore.list.)
#      

use strict;
use Getopt::Long;
use File::Basename;
use File::Find;
use File::Copy;
use Compress::Zlib;
Getopt::Long::Configure ('no_ignore_case');
Getopt::Long::Configure ('no_auto_abbrev');

my $mode = "multi";
my $dryrun = 0;
my $verbose = 0;
my $startdate = `date -u`;
my ($jlistdonedate, $parsedonedate, $snapdonedate);
my @jigdos;
my @keywords;
my @mirrors;
my ($dirname, $failedfile, $ignorefile, $jigdolist, $mirror, $cdname,
    $outdir, $tempdir, $template);
my $result;
my $num_jigdos = 0;
my $num_unsorted = 0;
my $num_unique = 0;
my @failed_files;
my $old_deleted = 0;
my %ignored_fails;
my %file_list;
my %ref;

$result = GetOptions("d=s" => \$dirname,
                     "f=s" => \$failedfile,
                     "i=s" => \$ignorefile,
                     "J=s" => \$jigdolist,
                     "j=s" => \@jigdos,
                     "k=s" => \@keywords,
                     "m=s" => \@mirrors,
                     "N"   => \$dryrun,
                     "n=s" => \$cdname,
                     "o=s" => \$outdir,
                     "T=s" => \$tempdir,
                     "t=s" => \$template,
                     "v"   => \$verbose);

# Sanity-check arguments
if (!defined ($dirname)) {
    die "You must specify the snapshot directory name!\n";
}
if (!@keywords) {
    die "You must specify the keywords to match!\n";
}
if (!@mirrors) {
    die "You must specify the location(s) of the mirror(s)!\n";
}
if (@jigdos) {
    $num_jigdos += scalar(@jigdos);
}
if (defined($jigdolist)) {
    $num_jigdos += `wc -w < $jigdolist`;
}
if ($num_jigdos == 0) {
    die "No jigdo file(s) specified!\n";
}
if (defined($cdname)) {
    $mode = "single";
}

if ($mode eq "single") {
    if (!defined($cdname)) {
        die "You must specify the output name for the jigit conf!\n";
    }
    if (!defined($outdir)) {
        die "You must specify where to set up the snapshot!\n";
    }
    if (!defined($template)) {
        die "You must specify the template file!\n";
    }
    if ($num_jigdos != 1) {
        die "More than one jigdo file specified ($num_jigdos) in single-jigdo mode!\n";
    }
    # In single-jigdo mode, the snapshot directory is relative to the
    # output dir
    $dirname="$outdir/$dirname";
} else {
    if (defined($cdname)) {
        die "Output name is meaningless for multi-jigdo mode!\n";
    }
    if (defined($outdir)) {
        die "Output dir is meaningless for multi-jigdo mode!\n";
    }
    if (defined($template)) {
        die "Template file name is meaningless for multi-jigdo mode!\n";
    }
}

# Make a dir tree
sub mkdirs {
    my $input = shift;
    my $dir;
    my @components;
    my $need_slash = 0;

    if (! -d $input) {
        if ($verbose) {
            print "mkdirs($input)\n";
        }
        if (!$dryrun) {
            @components = split /\//,$input;
            foreach my $component (@components) {
                if ($need_slash) {
                    $dir = join ("/", $dir, $component);
                } else {
                    $dir = $component;
                    $need_slash = 1;
                }
                mkdir $dir;
            }
        } else {
            print "DRYRUN: not making directory tree $input\n";
        }
    }
}

sub delete_redundant {
    my $ref;

    if (-f) {
        $ref = $file_list{$File::Find::name};
        if (!defined($ref)) {
            if ($verbose) {
                print "delete_redundant($File::Find::name)\n";
            }
            if (!$dryrun) {
                unlink($File::Find::name);
            } else  {
                print "DRYRUN: not deleting $File::Find::name\n";
            }
            $old_deleted++;
            if ( !($old_deleted % 1000) ) {
                print "$old_deleted\n";
            }
        }
    }
}

sub parse_ignore_file {
    my $inputfile = shift;
    my $num_ignored_loaded = 0;
    open(INLIST, "$inputfile") or return;
    while (defined (my $pkg = <INLIST>)) {
        chomp $pkg;
        $ignored_fails{$pkg}++;
        $num_ignored_loaded++;
    }
    print "parse_ignore_file: loaded $num_ignored_loaded entries from file $inputfile\n";
}

sub generate_snapshot_tree () {
    my $done = 0;
    my $failed = 0;
    my $ignored = 0;

    $| = 1;

    # Sorting is important here for performance, to help with
    # directory lookups
    foreach $_ (sort (keys %ref)) {
        my $outfile = $dirname . "/" . $_;

        $file_list{$outfile}++;
        if ($verbose) {
            print "file_list hash updated for $outfile\n";
        }
        if (! -e $outfile) {
            my $dir = dirname($_);
            my $filename = basename($_);
            my $link;
            my $link_ok = 0;
            my $infile;

            mkdirs($dirname . "/" . $dir);

            foreach my $mirror (@mirrors) {
                $infile = $mirror . "/" . $_;
                if (-l $infile) {
                    $link = readlink($infile);
                    if ($link =~ m#^/#) {
                        $infile = $link;
                    } else {
                        $infile = dirname($infile) . "/" . $link;
                    }
                }
                if ($verbose) {
                    print "look for $_:\n";
                }             
                $outfile = $dirname . "/" . $_;
                if (!$dryrun) {
                    if ($verbose) {
                        print "  try $infile\n";
                    }
                    if (link ($infile, $outfile)) {
                        $link_ok = 1;
                        last;
                    }
                } else {
                    print "DRYRUN: not linking $infile to $outfile\n";
                    $link_ok = 1;
                    last;
                }
                $infile = $mirror . "/" . $filename;
                if ($verbose) {
                    print "  fallback: try $infile\n";
                }
                if (!$dryrun) {
                    if (link ($infile, $outfile)) {
                        $link_ok = 1;
                        last;
                    }
                } else {
                    print "DRYRUN: not linking $infile to $outfile\n";
                    $link_ok = 1;
                    last;
                }
            }
            if ($link_ok == 0) {
                if ($ignored_fails{$_}) {
                    $ignored++;
                } else {
                    if (!defined($failedfile)) {
                        # No logfile, print to stdout then
                        print "\nFailed to create link $outfile\n";
                    }
                    $failed++;
                    push (@failed_files, $_);
                }
            } else {
                if ($ignored_fails{$_}) {
                    print "\n$_ marked as failed, but we found it anyway!\n";
                }
            }
        }
        $done++;
        if ( !($done % 10000) ) {
            print "$done done, ignored $ignored, failed $failed out of $num_unique\n";
        }
    }
    print "  Finished: $done/$num_unique, $failed failed, ignored $ignored\n\n";

    if (defined($failedfile) && ($failed > 0)) {
        print "Writing list of failed files to $failedfile\n";
        open(FAIL_LOG, "> $failedfile") or die "Failed to open $failedfile: $!\n";
        foreach my $missing (@failed_files) {
            print FAIL_LOG "$missing\n";
        }
        close FAIL_LOG;
    }

    # Now walk the tree and delete files that we no longer need
    print "Scanning for now-redundant files\n";
    find(\&delete_redundant, $dirname);
    print "  Finished: $old_deleted old files removed\n";
}

# Parse jigdo_list file if we have one
if (defined($jigdolist)) {
    if ($verbose) {
        print "Checking for jigdos in $jigdolist\n";
    }
    open (INLIST, "$jigdolist") or die "Can't open file $jigdolist: $!\n";
    while ($_ = <INLIST>) {
        chomp;
        if (length($_) > 1) {
            push (@jigdos, $_);
        }
    }
    close INLIST;
}
$jlistdonedate = `date -u`;

if ($verbose) {
    print "Working on $num_jigdos jigdo file(s)\n";
}
# Walk through the list of jigdos, parsing as we go
my $num_parsed = 0;
if ($verbose) {
    print "Reading / parsing jigdo file(s)\n";
}

open (INJIG, "zcat -f @jigdos |");
while (<INJIG>) {
    my $file;
    chomp;
    foreach my $keyword (@keywords) {
        m/^......................=$keyword:(.*)$/ and $file = $1;
    }
    if (defined($file)) {
        $num_unsorted++;
        if (!exists $ref{$file}) {
            $num_unique++;
            $ref{$file} = 1;
        }
    }
}
close(INJIG);
$parsedonedate = `date -u`;

if ($num_unique < 5) {
    die "Only $num_unique for the snapshot? Something is wrong; abort!\n"
}

# Now look at the snapshot dir
if (! -d $dirname) {
    print "$dirname does not exist\n";
    if (!$dryrun) {
        mkdirs($dirname);
    } else {
        die "DRYRUN: not making it, so aborting\n";
    }
}
if (defined($ignorefile)) {
    parse_ignore_file($ignorefile);
}

print "Trying to snapshot-link $num_unique files into $dirname\n";
generate_snapshot_tree();
$snapdonedate = `date -u`;

chomp ($startdate, $jlistdonedate, $parsedonedate, $snapdonedate);

print "$startdate: startup\n";
print "$jlistdonedate: found $num_jigdos jigdo files\n";
print "$parsedonedate: found $num_unsorted files referenced in those jigdo files, $num_unique unique\n";
print "$snapdonedate: snapshot done\n";

if ($mode eq "single") {
    if ($dryrun) {
        print "DRYRUN: Not creating files in $outdir\n";
    } else {
        foreach my $jigdo (@jigdos) {
            my ($gzin, $gzout, $line);
            $gzin = gzopen($jigdo, "rb") or
                die "Unable to open jigdo file $jigdo for reading: $!\n";
            $gzout = gzopen("$outdir/$cdname.jigdo", "wb9") or
                die "Unable to open new jigdo file $outdir/$cdname.jigdo for writing: $!\n";
            while ($gzin->gzreadline($line) > 0) {
                $line =~ s:^Template=.*$:Template=$cdname.template:;
                $gzout->gzwrite($line);
            }
        }
        copy("$template", "$outdir/$cdname.template") or
            die "Failed to copy template file $template: $!\n";
        open (CONF, "> $outdir/$cdname.conf") or
            die "Failed to open conf file $outdir/$cdname.conf for writing: $!\n";
        print CONF "JIGDO=$cdname.jigdo\n";
        print CONF "TEMPLATE=$cdname.template\n";
        print CONF "SNAPSHOT=snapshot/$dirname\n";
        close(CONF);
        print "Jigdo files, config and snapshot made in $outdir\n";
    }
}
