554 lines
17 KiB
Perl
Executable File
554 lines
17 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
#
|
|
# mkjigsnap
|
|
#
|
|
# (c) 2004-2019 Steve McIntyre <steve@einval.com>
|
|
#
|
|
# Server-side wrapper; run this on a machine with a mirror to set up
|
|
# the snapshots for jigit / jigdo downloading
|
|
#
|
|
# GPL v2 - see COPYING
|
|
#
|
|
# This script can be run in two modes:
|
|
#
|
|
# 1. To build a jigit .conf file for a single jigdo file:
|
|
# add the "-n" option with a CD name on the command line
|
|
# and only specify a single jigdo to work with using "-j".
|
|
#
|
|
# 2. To build a snapshot tree for (potentially multiple) jigdo files:
|
|
# do *not* specify the "-n" option, and list as many jigdo files as
|
|
# desired, either on the command line using multiple "-j <jigdo>" options
|
|
# or (better) via a file listing them with the "-J" option.
|
|
#
|
|
# Some things needed:
|
|
# (single-jigdo mode only) the CD name of the jigit
|
|
# (single-jigdo mode only) the output location; where the jigdo, template
|
|
# file and snapshot will be written
|
|
# (single-jigdo mode only) the locations of the input jigdo and template
|
|
# files
|
|
# the location of the mirror
|
|
# the keyword(s) to look for (e.g. Debian)
|
|
# the snapshot dirname (e.g. today's date)
|
|
#
|
|
# Example #1: (single-jigdo mode, used for Ubuntu jigit generation)
|
|
#
|
|
# mkjigsnap -o /tmp/mjs-test -n mjs-test -m /tmp/mirror \
|
|
# -j ~/jigdo/update/debian-update-3.0r2.01-i386.jigdo \
|
|
# -t ~/jigdo/update/debian-update-3.0r2.01-i386.template \
|
|
# -k Debian -k Non-US
|
|
# -d 20041017
|
|
#
|
|
# (This creates a single jigit conf file using the supplied jigdo/template
|
|
# file pair, looking for jigdo references to files in the "Debian" and
|
|
# "Non-US" areas. Output the files into /tmp/mjs-test and call them
|
|
# "mjs-test.<ext>", creating a snapshot of the needed files in
|
|
# /tmp/mjs-test/20041017 by linking files from /tmp/mirror as needed.)
|
|
#
|
|
# Example #2: (multi-jigdo mode, as run to keep
|
|
# http://us.cdimage.debian.org/cdimage/snapshot/ up to date)
|
|
#
|
|
# mkjigsnap -m /org/ftp/debian -J ~/jigdo.list \
|
|
# -k Debian \
|
|
# -d /org/jigdo-area/snapshot/Debian \
|
|
# -f ~/mkjigsnap-failed.log \
|
|
# -i ~/mkjigsnap-ignore.list
|
|
#
|
|
# (This reads in all the jigdo files listed in ~/jigdo.list, building a
|
|
# list of all the files referenced in the "Debian" area. It will then
|
|
# attempt to build a snapshot tree of all those files under
|
|
# /org/jigdo-area/snapshot/Debian by linking from /org/ftp/debian. Any
|
|
# files that are missing will be listed into the output "missing" file
|
|
# ~/mkjigsnap-failed.log for later checking, UNLESS they are already listed
|
|
# in the "ignore" file ~/mkjigsnap-ignore.list.)
|
|
#
|
|
|
|
use strict;
|
|
use Getopt::Long;
|
|
use File::Basename;
|
|
use File::Find;
|
|
use File::Copy;
|
|
use Compress::Zlib;
|
|
Getopt::Long::Configure ('no_ignore_case');
|
|
Getopt::Long::Configure ('no_auto_abbrev');
|
|
|
|
my $mode = "multi";
|
|
my $dryrun = 0;
|
|
my $verbose = 0;
|
|
my $startdate = `date -u`;
|
|
my ($jlistdonedate, $parsedonedate, $snapdonedate);
|
|
my @jigdos;
|
|
my $single_jigdo;
|
|
my @keywords;
|
|
my @mirrors;
|
|
my ($dirname, $failedfile, $ignorefile, $jigdolist, $mirror, $cdname,
|
|
$outdir, $tempdir, $template, $check_checksums, $checksum_out, $backref_file);
|
|
my $result;
|
|
my $num_jigdos = 0;
|
|
my $num_unsorted = 0;
|
|
my $num_unique = 0;
|
|
my @failed_files;
|
|
my @ck_failed_files;
|
|
my $old_deleted = 0;
|
|
my %ignored_fails;
|
|
my %file_list;
|
|
my %ref;
|
|
my %jigdo_backref;
|
|
|
|
GetOptions("b=s" => \$backref_file,
|
|
"c" => \$check_checksums,
|
|
"C=s" => \$checksum_out,
|
|
"d=s" => \$dirname,
|
|
"f=s" => \$failedfile,
|
|
"i=s" => \$ignorefile,
|
|
"J=s" => \$jigdolist,
|
|
"j=s" => \@jigdos,
|
|
"k=s" => \@keywords,
|
|
"m=s" => \@mirrors,
|
|
"N" => \$dryrun,
|
|
"n=s" => \$cdname,
|
|
"o=s" => \$outdir,
|
|
"T=s" => \$tempdir,
|
|
"t=s" => \$template,
|
|
"v" => \$verbose)
|
|
or die "Error in command line arguments, bailing out\n";
|
|
|
|
# Sanity-check arguments
|
|
if (!defined ($dirname)) {
|
|
die "You must specify the snapshot directory name!\n";
|
|
}
|
|
if (!@keywords) {
|
|
die "You must specify the keywords to match!\n";
|
|
}
|
|
if (!@mirrors) {
|
|
die "You must specify the location(s) of the mirror(s)!\n";
|
|
}
|
|
if (@jigdos) {
|
|
$num_jigdos += scalar(@jigdos);
|
|
}
|
|
if (defined($jigdolist)) {
|
|
$num_jigdos += `wc -w < $jigdolist`;
|
|
}
|
|
if ($num_jigdos == 0) {
|
|
die "No jigdo file(s) specified!\n";
|
|
}
|
|
if (defined($cdname)) {
|
|
$mode = "single";
|
|
}
|
|
|
|
if ($mode eq "single") {
|
|
if (!defined($cdname)) {
|
|
die "You must specify the output name for the jigit conf!\n";
|
|
}
|
|
if (!defined($outdir)) {
|
|
die "You must specify where to set up the snapshot!\n";
|
|
}
|
|
if (!defined($template)) {
|
|
die "You must specify the template file!\n";
|
|
}
|
|
if ($num_jigdos != 1) {
|
|
die "More than one jigdo file specified ($num_jigdos) in single-jigdo mode!\n";
|
|
}
|
|
# In single-jigdo mode, the snapshot directory is relative to the
|
|
# output dir
|
|
$dirname="$outdir/$dirname";
|
|
# And store the path to the jigdo file for later use
|
|
$single_jigdo = $jigdos[0];
|
|
} else {
|
|
if (defined($cdname)) {
|
|
die "Output name is meaningless for multi-jigdo mode!\n";
|
|
}
|
|
if (defined($outdir)) {
|
|
die "Output dir is meaningless for multi-jigdo mode!\n";
|
|
}
|
|
if (defined($template)) {
|
|
die "Template file name is meaningless for multi-jigdo mode!\n";
|
|
}
|
|
}
|
|
|
|
# Make a dir tree
|
|
sub mkdirs {
|
|
my $input = shift;
|
|
my $dir;
|
|
my @components;
|
|
my $need_slash = 0;
|
|
|
|
if (! -d $input) {
|
|
if ($verbose) {
|
|
print "mkdirs($input)\n";
|
|
}
|
|
if (!$dryrun) {
|
|
@components = split /\//,$input;
|
|
foreach my $component (@components) {
|
|
if ($need_slash) {
|
|
$dir = join ("/", $dir, $component);
|
|
} else {
|
|
$dir = $component;
|
|
$need_slash = 1;
|
|
}
|
|
mkdir $dir;
|
|
}
|
|
} else {
|
|
print "DRYRUN: not making directory tree $input\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub delete_redundant {
|
|
my $link;
|
|
|
|
if (-f) {
|
|
$link = $file_list{$File::Find::name};
|
|
if (!defined($link)) {
|
|
if ($verbose) {
|
|
print "delete_redundant($File::Find::name)\n";
|
|
}
|
|
if (!$dryrun) {
|
|
unlink($File::Find::name);
|
|
} else {
|
|
print "DRYRUN: not deleting $File::Find::name\n";
|
|
}
|
|
$old_deleted++;
|
|
if ( !($old_deleted % 1000) ) {
|
|
print "$old_deleted\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub parse_ignore_file {
|
|
my $inputfile = shift;
|
|
my $num_ignored_loaded = 0;
|
|
open(INLIST, "$inputfile") or return;
|
|
while (defined (my $pkg = <INLIST>)) {
|
|
chomp $pkg;
|
|
$ignored_fails{$pkg}++;
|
|
$num_ignored_loaded++;
|
|
}
|
|
print "parse_ignore_file: loaded $num_ignored_loaded entries from file $inputfile\n";
|
|
}
|
|
|
|
# Iff we have a checksum of the right type, calculate the checksum of
|
|
# the file on disk and validate
|
|
sub validate_checksum($$$) {
|
|
my $file = shift;
|
|
my $full_path = shift;
|
|
my $type = shift;
|
|
my $jigsum;
|
|
my $checksum = "";
|
|
|
|
if (! exists $ref{$file}{$type}) {
|
|
return 0; # Nothing to compare, so we're good!
|
|
}
|
|
|
|
# else
|
|
if ($type eq "md5") {
|
|
$jigsum= `jigsum $full_path 2>/dev/null`;
|
|
if ($jigsum =~ m/^(.{22}) /) {
|
|
$checksum = $1;
|
|
}
|
|
} elsif ($type eq "sha256") {
|
|
$jigsum= `jigsum-sha256 $full_path 2>/dev/null`;
|
|
if ($jigsum =~ m/^(.{43}) /) {
|
|
$checksum = $1;
|
|
}
|
|
}
|
|
|
|
if (length($checksum) < 2) {
|
|
# Didn't find a checksum in the jigsum output, so failed
|
|
return -2;
|
|
}
|
|
|
|
# else
|
|
if (!($ref{$file}{$type} =~ m/\Q$checksum\E/)) {
|
|
return -1;
|
|
}
|
|
|
|
# else
|
|
return 0;
|
|
}
|
|
|
|
sub generate_snapshot_tree () {
|
|
my $done = 0;
|
|
my $failed = 0;
|
|
my $ignored = 0;
|
|
my $ck_failed = 0;
|
|
|
|
$| = 1;
|
|
|
|
# Sorting is important here for performance, to help with
|
|
# directory lookups
|
|
foreach $_ (sort (keys %ref)) {
|
|
my $outfile = $dirname . "/" . $_;
|
|
|
|
$file_list{$outfile}++;
|
|
if ($verbose) {
|
|
print "file_list hash updated for $outfile\n";
|
|
}
|
|
if (! -e $outfile) {
|
|
my $dir = dirname($_);
|
|
my $filename = basename($_);
|
|
my $link;
|
|
my $link_ok = 0;
|
|
my $infile;
|
|
|
|
mkdirs($dirname . "/" . $dir);
|
|
|
|
foreach my $mirror (@mirrors) {
|
|
$infile = $mirror . "/" . $_;
|
|
if (-l $infile) {
|
|
$link = readlink($infile);
|
|
if ($link =~ m#^/#) {
|
|
$infile = $link;
|
|
} else {
|
|
$infile = dirname($infile) . "/" . $link;
|
|
}
|
|
}
|
|
if ($verbose) {
|
|
print "look for $_:\n";
|
|
}
|
|
$outfile = $dirname . "/" . $_;
|
|
if (!$dryrun) {
|
|
if ($verbose) {
|
|
print " try $infile\n";
|
|
}
|
|
if (link ($infile, $outfile)) {
|
|
$link_ok = 1;
|
|
last;
|
|
}
|
|
} else {
|
|
print "DRYRUN: not linking $infile to $outfile\n";
|
|
$link_ok = 1;
|
|
last;
|
|
}
|
|
$infile = $mirror . "/" . $filename;
|
|
if ($verbose) {
|
|
print " fallback: try $infile\n";
|
|
}
|
|
if (!$dryrun) {
|
|
if (link ($infile, $outfile)) {
|
|
$link_ok = 1;
|
|
last;
|
|
}
|
|
} else {
|
|
print "DRYRUN: not linking $infile to $outfile\n";
|
|
$link_ok = 1;
|
|
last;
|
|
}
|
|
}
|
|
if ($link_ok == 0) {
|
|
if ($ignored_fails{$_}) {
|
|
$ignored++;
|
|
} else {
|
|
if (!defined($failedfile)) {
|
|
# No logfile, print to stdout then
|
|
print "\nFailed to create link $outfile\n";
|
|
}
|
|
$failed++;
|
|
push (@failed_files, $_);
|
|
}
|
|
} else {
|
|
if ($ignored_fails{$_}) {
|
|
print "\n$_ marked as failed, but we found it anyway!\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
if (-e $outfile && $check_checksums) {
|
|
my $csum_result;
|
|
$csum_result = validate_checksum($_, $outfile, "md5");
|
|
if (0 == $csum_result) {
|
|
# no problems
|
|
$csum_result = validate_checksum($_, $outfile, "sha256");
|
|
}
|
|
if ($csum_result == -1) {
|
|
print "\nChecksum failure: $_\n";
|
|
$ck_failed++;
|
|
push (@ck_failed_files, $_);
|
|
} elsif ($csum_result == -2) {
|
|
print "\nFailed to jigsum $_\n";
|
|
}
|
|
}
|
|
|
|
$done++;
|
|
if ( !($done % 10000) or ($check_checksums && !($done % 100))) {
|
|
print "$done done, ignored $ignored, failed $failed ck_failed $ck_failed out of $num_unique\n";
|
|
}
|
|
}
|
|
print " Finished: $done/$num_unique, $failed failed, $ck_failed ck_failed, ignored $ignored\n\n";
|
|
|
|
if (defined($failedfile) && ($failed > 0)) {
|
|
print "Writing list of failed files to $failedfile\n";
|
|
open(FAIL_LOG, "> $failedfile") or die "Failed to open $failedfile: $!\n";
|
|
if ($backref_file) {
|
|
open (BACKREF, "> $backref_file") or die "Failed to open $backref_file: $!\n";
|
|
}
|
|
foreach my $missing (@failed_files) {
|
|
print FAIL_LOG "$missing\n";
|
|
if ($backref_file) {
|
|
print BACKREF "$missing:\n";
|
|
print BACKREF $jigdo_backref{$missing};
|
|
}
|
|
}
|
|
close FAIL_LOG;
|
|
if ($backref_file) {
|
|
close BACKREF;
|
|
}
|
|
}
|
|
|
|
# Now walk the tree and delete files that we no longer need
|
|
print "Scanning for now-redundant files\n";
|
|
find(\&delete_redundant, $dirname);
|
|
print " Finished: $old_deleted old files removed\n";
|
|
}
|
|
|
|
# Parse jigdo_list file if we have one
|
|
if (defined($jigdolist)) {
|
|
if ($verbose) {
|
|
print "Checking for jigdos in $jigdolist\n";
|
|
}
|
|
open (INLIST, "$jigdolist") or die "Can't open file $jigdolist: $!\n";
|
|
while ($_ = <INLIST>) {
|
|
chomp;
|
|
if (length($_) > 1) {
|
|
push (@jigdos, $_);
|
|
}
|
|
}
|
|
close INLIST;
|
|
}
|
|
$jlistdonedate = `date -u`;
|
|
|
|
print "Working on $num_jigdos jigdo file(s)\n";
|
|
# Walk through the list of jigdos, parsing as we go
|
|
my $num_parsed = 0;
|
|
print "Reading / parsing jigdo file(s)\n";
|
|
|
|
foreach my $injig (sort @jigdos) {
|
|
open (INJIG, "zcat -f $injig |");
|
|
$num_parsed++;
|
|
while (<INJIG>) {
|
|
my ($file, $jigsum, $type);
|
|
chomp;
|
|
foreach my $keyword (@keywords) {
|
|
# Look for a jigdo format v1 match first, with
|
|
# base64(ish)-encoded md5 checksums (22 chars before the
|
|
# "=")
|
|
if (m/^(.{22})=$keyword:(.*)$/) {
|
|
$jigsum = $1;
|
|
$file = $2;
|
|
$file =~ s?^/??;
|
|
$type = "md5";
|
|
}
|
|
# Otherwise, look for a jigdo format v2 match, with
|
|
# base64(ish)-encoded sha256 checksums (43 chars before
|
|
# the "=")
|
|
if (m/^(.{43})=$keyword:(.*)$/) {
|
|
$jigsum = $1;
|
|
$file = $2;
|
|
$file =~ s?^/??;
|
|
$type = "sha256";
|
|
}
|
|
}
|
|
if (defined($file)) {
|
|
$num_unsorted++;
|
|
# Only count a ref of any kind as unique
|
|
if (!exists $ref{$file}) {
|
|
$num_unique++;
|
|
}
|
|
# Even though we have to treat different checksums
|
|
# differently
|
|
if (!exists $ref{$file}{$type}) {
|
|
$ref{$file}{$type} = $jigsum;
|
|
} else {
|
|
if (!($ref{$file}{$type} =~ /\Q$jigsum\E/ )) {
|
|
print " ERROR: $file referenced again with different checksum!\n";
|
|
print " (old " . $ref{$file}{$type} . " new $jigsum\n";
|
|
}
|
|
}
|
|
if ($backref_file) {
|
|
if (!defined $jigdo_backref{$file}) {
|
|
$jigdo_backref{$file} = " $injig\n";
|
|
} else {
|
|
$jigdo_backref{$file} .= " $injig\n";
|
|
}
|
|
}
|
|
if (!($num_unsorted % 100000) ) {
|
|
print " found $num_unsorted total, $num_unique unique file refs, $num_parsed / $num_jigdos jigdo files ($injig)\n";
|
|
}
|
|
}
|
|
}
|
|
close(INJIG);
|
|
}
|
|
$parsedonedate = `date -u`;
|
|
print " found $num_unsorted total, $num_unique unique file refs in $num_jigdos jigdo files\n";
|
|
|
|
if ($checksum_out) {
|
|
open(CK_OUT, "> $checksum_out") or die "Can't open $checksum_out for writing: $!\n";
|
|
foreach $_ (sort (keys %ref)) {
|
|
if (exists $ref{$_}{"md5"}) {
|
|
print CK_OUT $ref{$_}{"md5"} . " $_\n";
|
|
}
|
|
if (exists $ref{$_}{"sha256"}) {
|
|
print CK_OUT $ref{$_}{"sha256"} . " $_\n";
|
|
}
|
|
}
|
|
close(CK_OUT);
|
|
}
|
|
|
|
if ($num_unique < 5) {
|
|
die "Only $num_unique for the snapshot? Something is wrong; abort!\n"
|
|
}
|
|
|
|
# Now look at the snapshot dir
|
|
if (! -d $dirname) {
|
|
print "$dirname does not exist\n";
|
|
if (!$dryrun) {
|
|
mkdirs($dirname);
|
|
} else {
|
|
die "DRYRUN: not making it, so aborting\n";
|
|
}
|
|
}
|
|
if (defined($ignorefile)) {
|
|
parse_ignore_file($ignorefile);
|
|
}
|
|
|
|
print "Trying to snapshot-link $num_unique files into $dirname\n";
|
|
if ($check_checksums) {
|
|
print " (and checksumming every file, so this may take a while)\n";
|
|
}
|
|
generate_snapshot_tree();
|
|
$snapdonedate = `date -u`;
|
|
|
|
chomp ($startdate, $jlistdonedate, $parsedonedate, $snapdonedate);
|
|
|
|
print "$startdate: startup\n";
|
|
print "$jlistdonedate: found $num_jigdos jigdo files\n";
|
|
print "$parsedonedate: found $num_unsorted files referenced in those jigdo files, $num_unique unique\n";
|
|
print "$snapdonedate: snapshot done\n";
|
|
|
|
if ($mode eq "single") {
|
|
if ($dryrun) {
|
|
print "DRYRUN: Not creating files in $outdir\n";
|
|
} else {
|
|
my ($gzin, $gzout, $line);
|
|
$gzin = gzopen($single_jigdo, "rb") or
|
|
die "Unable to open jigdo file $single_jigdo for reading: $!\n";
|
|
$gzout = gzopen("$outdir/$cdname.jigdo", "wb9") or
|
|
die "Unable to open new jigdo file $outdir/$cdname.jigdo for writing: $!\n";
|
|
while ($gzin->gzreadline($line) > 0) {
|
|
$line =~ s:^Template=.*$:Template=$cdname.template:;
|
|
$gzout->gzwrite($line);
|
|
}
|
|
$gzin->close();
|
|
$gzout->close();
|
|
copy("$template", "$outdir/$cdname.template") or
|
|
die "Failed to copy template file $template: $!\n";
|
|
open (CONF, "> $outdir/$cdname.conf") or
|
|
die "Failed to open conf file $outdir/$cdname.conf for writing: $!\n";
|
|
print CONF "JIGDO=$cdname.jigdo\n";
|
|
print CONF "TEMPLATE=$cdname.template\n";
|
|
print CONF "SNAPSHOT=snapshot/$dirname\n";
|
|
close(CONF);
|
|
print "Jigdo files, config and snapshot made in $outdir\n";
|
|
}
|
|
}
|