#!/usr/bin/perl use strict; ($main::progname = $0) =~ s/^.*\/([^\/]+)/$1/; $main::svn_id = '$HeadURL$ $LastChangedRevision$'; $main::version_scheme = 'svn'; $main::release_version = 'ADE_APP_TOKEN_RELEASEID'; use IO::Dir; my($tar_media_capacity_ratio) = 0.75; sub main { my(@ARGV) = @_; my(@chunks, $current_chunkgroup_size, @current_chunkgroup_chunks); my($tag, $max_chunkgroup_size, $chunkgroup_num, @chunkgroups, $chunkgroup_ref); my($added, $i, $j, $empty, $dir, @dirs, %path_size_map, $size, $path, $chunk); my($chunkgroups_cache_file, @cached_chunkgroups, $cached_chunkgroup_ref); my(@chunkgroups, $opt_generate_cache); ########################################################################## # # PROCESS OPTIONS # ########################################################################## # Default values for options $opt_generate_cache = 0; # Loop over each option while (defined($ARGV[0]) && $ARGV[0] =~ /^-./) { $_ = shift @ARGV; # standard options if (/^(?:-V|--version)$/) { &ade_msg_version(\&app_version); } elsif (/^(?:-d|--debug)(.*)/) { $main::verboselevel = ($1 ? $1 : shift @ARGV); } elsif (/^(?:-v|--verbose)$/) { $main::verboselevel = 3; } elsif (/^(?:-h|--help)$/) { &ade_msg_usage(\&app_usage, 0); } elsif (/^(?:-g|--generate-cache)$/) { $opt_generate_cache = 1; } elsif (/^(?:-p|--list-paths)$/) { &ade_msg_listpaths(\&app_listpaths); } else { &ade_msg_usage(\&app_usage, 1); } } ########################################################################## # # PROCESS ARGUMENTS # ########################################################################## (!$ARGV[3]) && &ade_msg_usage(\&app_usage, 1); $tag = $ARGV[0]; $max_chunkgroup_size = int($ARGV[1] * $tar_media_capacity_ratio); $chunkgroup_num = $ARGV[2]; shift @ARGV; shift @ARGV; shift @ARGV; @dirs = @ARGV; &ade_msg_debug(10, "main: tag=$tag, max_chunkgroup_size=$max_chunkgroup_size, chunkgroup_num=$chunkgroup_num, dirs=" . join(' ', @dirs)); ########################################################################## # # INITIALISATION # ########################################################################## ########################################################################## # # GUTS STARTS HERE # ########################################################################## &ade_msg_info("loading 'du' stats ..."); foreach $dir (@dirs) { open(DU_PIPE, "du -a $dir |") || &ade_msg_error("du: failed"); while () { chomp; ($size, $path) = /^(\d+)\s+(\S+)/; $path_size_map{$path} = $size; #print "."; } # this should be an error, but for 'alexis' to test ... close(DU_PIPE) || &ade_msg_error($! ? $! : "'du' exited with code $?"); } &ade_msg_info("chunkifying ..."); @chunks = (); foreach $dir (@dirs) { &ade_msg_debug(10, "main: chunkifying $dir ..."); &recurse($dir, \%path_size_map, \@chunks, $max_chunkgroup_size); } &ade_msg_info("reordering chunks to ensure better chunkgroup packing ..."); @chunks = sort { $path_size_map{$b} <=> $path_size_map{$a} } (@chunks); foreach $chunk (@chunks) { &ade_msg_debug(10, "main: reordered: name=$chunk, size=$path_size_map{$chunk}"); } &ade_msg_info("grouping chunks ..."); $current_chunkgroup_size = 0; @current_chunkgroup_chunks = (); do { for ($i=0, $empty=1, $added=0; $i<=$#chunks; $i++) { defined($chunks[$i]) ? $empty=0 : next; &ade_msg_debug(10, "main: considering chunk $chunks[$i] ..."); if ($current_chunkgroup_size + $path_size_map{$chunks[$i]} < $max_chunkgroup_size) { &ade_msg_debug(10, "main: adding chunk $chunks[$i] to chunkgroup ..."); $current_chunkgroup_size += $path_size_map{$chunks[$i]}; push(@current_chunkgroup_chunks, $chunks[$i]); $added = 1; undef($chunks[$i]); } } if (!$added) { &ade_msg_debug(10, "main: chunkgroup full, outputting ordered chunkgroup ..."); push(@chunkgroups, [ sort(@current_chunkgroup_chunks) ]); $current_chunkgroup_size = 0; @current_chunkgroup_chunks = (); } } until ($empty); $chunkgroups_cache_file = "/tmp/$main::progname.$tag.cache"; if ($opt_generate_cache) { open(CACHE_HANDLE, ">$chunkgroups_cache_file") || &ade_msg_error("$chunkgroups_cache_file: can't write"); for ($i=0; $i<=$#chunkgroups; $i++) { print CACHE_HANDLE join(' ', sort(@{$chunkgroups[$i]})) . "\n"; } close(CACHE_HANDLE); } else { &ade_msg_info("loading cached chunkgroups from $chunkgroups_cache_file for comparison ..."); open(CACHE_HANDLE, $chunkgroups_cache_file) || &ade_msg_error("$chunkgroups_cache_file: can't read"); while () { chomp; push(@cached_chunkgroups, [ split(/ /, $_) ]); } close(CACHE_HANDLE); &ade_msg_info("verifying chunkgroups have not changed since cached ..."); &ade_msg_error("chunkgroup count has changed!") if ($#cached_chunkgroups != $#chunkgroups); for ($i=0; $i<=$#chunkgroups; $i++) { $chunkgroup_ref = $chunkgroups[$i]; $cached_chunkgroup_ref = $cached_chunkgroups[$i]; &ade_msg_error("chunk count for chunkgroup $i has changed!") if ($#{$chunkgroup_ref} != $#{$cached_chunkgroup_ref}); for ($j=0; $j<=$#{$chunkgroup_ref}; $j++) { &ade_msg_debug(10, "recurse: comparing cg=$i, c=$j: ${$chunkgroup_ref}[$j], ${$cached_chunkgroup_ref}[$j] ..."); &ade_msg_error("chunk names in chunkgroup $i have changed!") if (${$chunkgroup_ref}[$j] ne ${$cached_chunkgroup_ref}[$j]); } } } &ade_msg_error("$chunkgroup_num: no such chunkgroup") if (!(defined(@{$chunkgroups[$chunkgroup_num]}))); print join(' ', sort(@{$chunkgroups[$chunkgroup_num]})) . "\n"; return(0); } sub recurse { my ($item, $path_size_map_ref, $chunks_ref, $max_chunkgroup_size) = @_; my ($d); if (${$path_size_map_ref}{$item} < $max_chunkgroup_size) { &ade_msg_debug(10, "recurse: chunk $item is small enough, accepting ..."); push(@{$chunks_ref}, $item); return; } elsif (! -d $item) { die "$item is too big and it isn't a directory!"; &ade_msg_error("$item is too big and it cannot be crumbled!"); } else { &ade_msg_debug(10, "recurse: chunk $item is too big, crumbling ..."); $d = new IO::Dir $item; while (defined($_ = $d->read)) { next if ($_ eq "." || $_ eq ".."); &recurse("$item/$_", $path_size_map_ref, $chunks_ref, $max_chunkgroup_size); } } } sub app_version { my($version_ref) = @_; ${$version_ref} = $main::version; return(0); } sub app_usage { print "Usage: $main::progname [ ] ...\n"; print "\n"; # standard options print "Options: -V | --version display version information\n"; print " -v | --verbose verbose\n"; print " -d | --debug set debug level\n"; print " -h | --help display this text\n"; print " -p | --list-paths list paths used by this program\n"; # application-specific options print " -g | --generate-cache generate cache file\n"; print "\n"; return(0); } sub app_listpaths { # If there is only one log file use this #print "Log-File: $main::log_file\n" # If there are several, then just report the directory #print "Log-Directory: $main::log_dir\n" return(0); }