# Name of this package package FAD; # Packages required by this package use strict; use warnings; use lib substr(`ade-config ade_include_prefix`,0,-1) . '/include'; ## no critic (ProhibitBacktickOperators) use ADE; use File::Find; use Data::Dumper; use Fatal qw( close unlink ); # obviate checking close()'s return code # Declare exports BEGIN { use base qw( Exporter ); our @EXPORT_OK = qw(diff dump load collect_info insert_info reindex clone $IT_FILE $IT_DIR $IT_PIPE $IT_SYMLINK $IT_BLOCK $IT_CHAR $IT_SOCKET $CT_DELETED $CT_ADDED $CT_TYPE $CT_OWNER $CT_GROUP $CT_MODE $CT_LINKS $CT_CONT_MAJMIN $CT_CONT_CRC $CT_CONT_SYMLINK $BY_REQUEST_NOT_CHECKED $NOT_READABLE); } END { } # Instantiate exported data our($CT_DELETED, $CT_ADDED, $CT_TYPE, $CT_OWNER, $CT_GROUP, $CT_MODE, $CT_LINKS, $CT_CONT_MAJMIN, $CT_CONT_CRC, $CT_CONT_SYMLINK); our($IT_FILE, $IT_DIR, $IT_PIPE, $IT_SYMLINK, $IT_BLOCK, $IT_CHAR, $IT_SOCKET); # Initialise exported data ($CT_DELETED, $CT_ADDED, $CT_TYPE, $CT_OWNER, $CT_GROUP, $CT_MODE, $CT_LINKS, $CT_CONT_MAJMIN, $CT_CONT_CRC, $CT_CONT_SYMLINK) = qw(d a t u g m l s C S); ($IT_FILE, $IT_DIR, $IT_PIPE, $IT_SYMLINK, $IT_BLOCK, $IT_CHAR, $IT_SOCKET) = qw(f d p l b c s); # Instantiate private data my($app_svnid, $BY_REQUEST_NOT_CHECKED, $NOT_READABLE, $magic_number, $fad_version, %fad_defined_errors); # Initialise private data $app_svnid = '$HeadURL$ $LastChangedRevision$'; ## no critic (RequireInterpolationOfMetachars) # File system object type descriptors ($BY_REQUEST_NOT_CHECKED, $NOT_READABLE) = (-1, -2); # won't clash with valid file contents $magic_number = 'FaDFiLe'; $fad_version = '4'; # Error codes (this will be registered with ADE's error handling routines) %fad_defined_errors = ( fad_err_misc => { fmt => '%s' }, fad_err_access => { fmt => '%s: can\'t %s' }, ); ############################################################################## # # PUBLIC FUNCTIONS # ############################################################################## sub diff ## no critic (ProhibitExcessComplexity) { my($errstack_ref, $old_store_ref, $new_store_ref, $cb_fnc_ref, $cb_param) = @_; my($advance_old_index_offset_flag, $advance_new_index_offset_flag); my(%change); my($key, $filename, $d, $rc); my($old_name, $new_name, $old_type, $new_type, $old_owner, $new_owner, $old_group, $new_group, $old_mode, $new_mode, $old_links, $new_links, $old_content, $new_content, @old_other_names, @new_other_names, $new_other_names_index, $old_other_names_index); my(%old_hash_by_filename, %new_hash_by_filename, @new_index_by_filename, @old_index_by_filename, $new_index_offset, $old_index_offset); ADE::debug($errstack_ref, 40, 'diff: sof'); $d = Data::Dumper->new([$old_store_ref]); ADE::debug($errstack_ref, 40, 'diff: dumped hash is ' . $d->Dump); undef $d; # Generate hash by filename and index of the old data. if (($rc=reindex($errstack_ref, $old_store_ref, \%old_hash_by_filename, \@old_index_by_filename)) != $ADE::OK) { return($rc); } $d = Data::Dumper->new([\%old_hash_by_filename]); ADE::debug($errstack_ref, 40, 'diff: dumped hash is ' . $d->Dump); undef $d; $d = Data::Dumper->new([\@old_index_by_filename]); ADE::debug($errstack_ref, 40, 'diff: dumped index is ' . $d->Dump); undef $d; $d = Data::Dumper->new([$new_store_ref]); ADE::debug($errstack_ref, 40, 'diff: dumped hash is ' . $d->Dump); undef $d; # Generate hash by filename and index of the new data. if (($rc=reindex($errstack_ref, $new_store_ref, \%new_hash_by_filename, \@new_index_by_filename)) != $ADE::OK) { return($rc); } $d = Data::Dumper->new([\%new_hash_by_filename]); ADE::debug($errstack_ref, 40, 'diff: dumped hash is ' . $d->Dump); undef $d; $d = Data::Dumper->new([\@new_index_by_filename]); ADE::debug($errstack_ref, 40, 'diff: dumped index is ' . $d->Dump); undef $d; #for ($old_index_offset = 0, $new_index_offset = 0; $old_index_offset < $#old_index_by_filename+1 or $new_index_offset < $#new_index_by_filename+1; $old_index_offset += $advance_old_index_offset_flag, $new_index_offset += $advance_new_index_offset_flag) { $old_index_offset = 0; $new_index_offset = 0; while (1) { if ($old_index_offset == $#old_index_by_filename+1 and $new_index_offset == $#new_index_by_filename+1) { last; } # no changes for the current listings-pair detected yet ... undef %change; if ($old_index_offset < $#old_index_by_filename+1) { $old_name = $old_index_by_filename[$old_index_offset]; @old_other_names = sort @{${$old_store_ref}{$old_hash_by_filename{$old_name}}{'names'}}; $old_type = ${$old_store_ref}{$old_hash_by_filename{$old_name}}{'data'}{'type'}; $old_owner = ${$old_store_ref}{$old_hash_by_filename{$old_name}}{'data'}{'owner'}; $old_group = ${$old_store_ref}{$old_hash_by_filename{$old_name}}{'data'}{'group'}; $old_mode = ${$old_store_ref}{$old_hash_by_filename{$old_name}}{'data'}{'mode'}; $old_links = ${$old_store_ref}{$old_hash_by_filename{$old_name}}{'data'}{'links'}; $old_content = ${$old_store_ref}{$old_hash_by_filename{$old_name}}{'data'}{'content'}; ADE::debug($errstack_ref, 4, "diff: old_name=$old_name, old_index_offset=$old_index_offset, #old_index_by_filename=" . ($#old_index_by_filename+1) . ", old_type=$old_type, old_owner=$old_owner, old_group=$old_group, old_links=$old_links, old_content=$old_content"); } else { ADE::debug($errstack_ref, 4, 'diff: no old data left to read'); } if ($new_index_offset < $#new_index_by_filename+1) { $new_name = $new_index_by_filename[$new_index_offset]; @new_other_names = sort @{${$new_store_ref}{$new_hash_by_filename{$new_name}}{'names'}}; $new_type = ${$new_store_ref}{$new_hash_by_filename{$new_name}}{'data'}{'type'}; $new_owner = ${$new_store_ref}{$new_hash_by_filename{$new_name}}{'data'}{'owner'}; $new_group = ${$new_store_ref}{$new_hash_by_filename{$new_name}}{'data'}{'group'}; $new_mode = ${$new_store_ref}{$new_hash_by_filename{$new_name}}{'data'}{'mode'}; $new_links = ${$new_store_ref}{$new_hash_by_filename{$new_name}}{'data'}{'links'}; $new_content = ${$new_store_ref}{$new_hash_by_filename{$new_name}}{'data'}{'content'}; ADE::debug($errstack_ref, 4, "diff: new_name=$new_name, new_index_offset=$new_index_offset, #new_index_by_filename=" . ($#new_index_by_filename+1) . ", new_type=$new_type, new_owner=$new_owner, new_group=$new_group, new_links=$new_links, new_content=$new_content"); } else { ADE::debug($errstack_ref, 4, 'diff: no new data left to read'); } ADE::debug($errstack_ref, 4, 'diff: checking for differences ...'); # We do this in a new block because we will want to jump forward when there is no # point in checking other change types (e.g. filetype changes, file added, file deleted). # Doing this with 'next' is not what we want because there is code to execute at # after this block but before we loop round. CHECK_CHANGES: { # if file is in the other but not in the first then we have an added file if ($new_index_offset == $#new_index_by_filename+1 or ($old_index_offset != $#old_index_by_filename+1 and $old_name lt $new_name)) { ADE::debug($errstack_ref, 30, "diff: $old_name: deleted"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_DELETED }; $advance_old_index_offset_flag = 1; $advance_new_index_offset_flag = 0; last CHECK_CHANGES; } # if file is in one but not in other then we have a deleted file if ($old_index_offset == $#old_index_by_filename+1 or ($new_index_offset != $#new_index_by_filename+1 and $old_name gt $new_name)) { ADE::debug($errstack_ref, 30, "diff: $new_name: added"); $change{'file'} = $new_name; push @{$change{'changes'}}, { type => $CT_ADDED }; $advance_old_index_offset_flag = 0; $advance_new_index_offset_flag = 1; last CHECK_CHANGES; } # otherwise we have entries in each listing for the current file, but # those listings may be different so here check if ($old_type ne $new_type) { ADE::debug($errstack_ref, 30, "diff: $old_name: type changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_TYPE, old => $old_type, new => $new_type}; $advance_old_index_offset_flag = 1; $advance_new_index_offset_flag = 1; last CHECK_CHANGES; } if ($old_owner ne $new_owner) { ADE::debug($errstack_ref, 30, "diff: $old_name: owner changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_OWNER, old => $old_owner, new => $new_owner }; } if ($old_group ne $new_group) { ADE::debug($errstack_ref, 30, "diff: $old_name: group changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_GROUP, old => $old_group, new => $new_group }; } if ($old_mode ne $new_mode) { ADE::debug($errstack_ref, 30, "diff: $old_name: mode changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_MODE, old => $old_mode, new => $new_mode }; } if ($old_links != 1 or $new_links != 1) { ADE::debug($errstack_ref, 4, 'diff: investigating hard link lists ..'); # Scan sorted list of names looking for differences #for ($old_other_names_index=0, $new_other_names_index=0; $old_other_names_index<$#old_other_names+1 and $new_other_names_index<$#new_other_names+1; $old_other_names_index++, $new_other_names_index++) { $old_other_names_index=0; $new_other_names_index=0; while (1) { if ($old_other_names_index == $#old_other_names+1 or $new_other_names_index == $#new_other_names+1) { last; } if ($old_other_names[$old_other_names_index] ne $new_other_names[$new_other_names_index]) { last; } $old_other_names_index++; $new_other_names_index++; } # If we broke out of that loop early then it means a mismatch was found. if ($old_other_names_index<$#old_other_names+1 or $new_other_names_index<$#new_other_names+1) { ADE::debug($errstack_ref, 30, "diff: $old_name: links changed"); $change{'file'} = $old_name; # brackets for precendence not functionalisation of 'defined'. push @{$change{'changes'}}, { type => $CT_LINKS, old => join (',',@old_other_names), new => join ',',@new_other_names }; } } # if either file was unreadable then error (5 of 9 cases) # (note that if the whole record from 1st file is identical to # the second and they (both) contain $NOT_READABLE then we don't # get this far because identical records are skipped higher up. if ($old_content ne $new_content and ($old_content eq $NOT_READABLE or $new_content eq $NOT_READABLE)) { ADE::error($errstack_ref, 'fad_err_misc', "$old_name or $new_name was unreadable when read"); return($ADE::FAIL); # second case is that both files are marked $BY_REQUEST_NOT_CHECKED # which is fine, no action necessary. } elsif ($old_content ne $new_content and ($old_content eq $BY_REQUEST_NOT_CHECKED and $new_content eq $BY_REQUEST_NOT_CHECKED)) { ADE::debug($errstack_ref, 40, 'diff: two not crc\'d files'); # third case is one has $BY_REQUEST_NOT_CHECKED and the other # doesn't. This is an error. } elsif ($old_content ne $new_content and ($old_content eq $BY_REQUEST_NOT_CHECKED or $new_content eq $BY_REQUEST_NOT_CHECKED)) { ADE::error($errstack_ref, 'fad_err_misc', "only one of $old_name and $new_name was not CRC'ed"); return($ADE::FAIL); } elsif ($old_content ne $new_content and $old_type eq $IT_FILE) { ADE::debug($errstack_ref, 30, "diff: $old_name: content crc changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_CONT_CRC, old => $old_content, new => $new_content}; } elsif ($old_content ne $new_content and $old_type eq $IT_SYMLINK) { ADE::debug($errstack_ref, 30, "diff: $old_name: symlink target changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_CONT_SYMLINK, old => $old_content, new => $new_content }; } elsif ($old_content ne $new_content and $old_type eq $IT_BLOCK or $old_type eq $IT_CHAR) { ADE::debug($errstack_ref, 30, "diff: $old_name: major/minor changed changed"); $change{'file'} = $old_name; push @{$change{'changes'}}, { type => $CT_CONT_MAJMIN, old => $old_content, new => $new_content}; } # All other sorts of differences will mean we advance to the next file on *both* lists. $advance_old_index_offset_flag = 1; $advance_new_index_offset_flag = 1; } # report changes if any detected if (%change and ($rc=&{$cb_fnc_ref}($errstack_ref, $cb_param, \%change)) != $ADE::OK) { ADE::debug($errstack_ref, 40, 'diff: cb_fnc_ref() failed, returning failure ...'); return($rc); } $old_index_offset += $advance_old_index_offset_flag; $new_index_offset += $advance_new_index_offset_flag; } # If we get this far then success ADE::debug($errstack_ref, 40, 'diff: returning success ...'); return($ADE::OK); } sub dump { my($errstack_ref, $out_handle, $data_store_ref) = @_; my($d, $rc); if (($rc=_fad_header_write($errstack_ref, $out_handle)) != $ADE::OK) { return($rc); } $d = Data::Dumper->new([$data_store_ref]); print $out_handle $d->Dump; undef $d; return($ADE::OK); } sub load { my($errstack_ref, $in_handle, $old_store_ref, $header_hashref, $regexp) = @_; my($code, $VAR1, $key); my(%old_hash_by_filename, %new_hash_by_filename, $rc); ADE::debug($errstack_ref, 40, 'load: sof'); # Generate hash by filename and index of the old data. if (($rc=reindex($errstack_ref, $old_store_ref, \%old_hash_by_filename, undef)) != $ADE::OK) { return($rc); } # Read new data's header. if (($rc=_fad_header_read($errstack_ref, $in_handle, $header_hashref)) != $ADE::OK) { return($rc); } # Read new data's body. ADE::debug($errstack_ref, 40, 'load: slurping ...'); { local $/ = undef; $code = scalar <$in_handle>; } ADE::debug($errstack_ref, 40, "load: code=[$code]"); eval $code; ## no critic (ProhibitStringyEval, RequireCheckingReturnValueOfEval) # Generate hash by filename and index of the new data. if (($rc=reindex($errstack_ref, $VAR1, \%new_hash_by_filename, undef)) != $ADE::OK) { return($rc); } # Loop over each new item, rejecting it if it is to be filtered # out, flagging an error if it exists already, otherwise inserting it. ADE::debug($errstack_ref, 40, 'load: looping over new names ...'); foreach my $filename (keys %new_hash_by_filename) { ADE::debug($errstack_ref, 40, "load: processing $filename ..."); if (defined $regexp and $filename !~ /$regexp/) { ADE::debug($errstack_ref, 40, "load: $filename: filtered out ('cos doesn't match '$regexp')"); next; } elsif (defined $old_hash_by_filename{$filename}) { ADE::error($errstack_ref, 'fad_err_misc', "$filename: characteristics for this file loaded already"); return($ADE::FAIL); } else { ADE::debug($errstack_ref, 40, "load: $filename: inserting ..."); ${$old_store_ref}{$new_hash_by_filename{$filename}} = ${$VAR1}{$new_hash_by_filename{$filename}}; } } return($ADE::OK); } sub collect_info ## no critic (ProhibitExcessComplexity) { my($errstack_ref, $filename, $suppress_crcs, $info_hash_ref_ref) = @_; my($linktext, $type, $dev, $inode, $nlink, $uid, $gid, $rdev); my($size, $atime, $mtime, $ctime, $blksize, $blocks); my ($buf, $contents, $mode, $crc_handle); ADE::debug($errstack_ref, 40, "collect_info: processing $filename ..."); # For all fs entities we identify their type. How do you spot a # symbolic link? Assume it is one and try seeing where it points. if ($linktext = readlink $filename) { $type = $IT_SYMLINK; ($dev,$inode,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = lstat $filename; # Otherwise stat like a file/dir. } elsif (stat $filename) { ($dev,$inode,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat _; $type = $IT_FILE if -f _; $type = $IT_DIR if -d _; $type = $IT_PIPE if -p _; $type = $IT_SOCKET if -S _; $type = $IT_BLOCK if -b _; $type = $IT_CHAR if -c _; # If stat fails, then return *success* but flag to the caller what happened by setting undef. } else { ${$info_hash_ref_ref} = undef; return($ADE::OK); } # Now based on what it was, certain bits of information # are ignored, and certain extra information is ascertained. For # a symlink, we note the destination it points to. if ($type eq $IT_SYMLINK) { # Can symbolic links by hard linked ? $nlink = 1; $contents = $linktext; $uid = 0; $gid = 0; # For a directory there is nothing else to note. } elsif ($type eq $IT_DIR) { # Can directories by hard linked ? $nlink = 1; $contents = 0; # For a file we get the CRC, if we can, unless instructed not to. } elsif ($type eq $IT_FILE and $suppress_crcs) { $contents = $BY_REQUEST_NOT_CHECKED; # Openable file } elsif ($type eq $IT_FILE and open $crc_handle, '<', $filename) { $contents = 0; while (read $crc_handle, $buf, 262144) { $contents += unpack '%32C*', $buf; $contents = $contents % 32767; } close $crc_handle; # Unopenable file (return success but flag it to caller). } elsif ($type eq $IT_FILE) { ${$info_hash_ref_ref} = undef; return($ADE::OK); # For device files we save the major and minode numbers } elsif ($type eq $IT_BLOCK) { $contents = $rdev; } elsif ($type eq $IT_CHAR) { $contents = $rdev; # For a socket there's nothing. } elsif ($type eq $IT_SOCKET) { $contents = 0; # For a pipe there's nothing. } elsif ($type eq $IT_PIPE) { $contents = 0; } # Additionally the mode contains some junk at the high order # bits so we mask them off. $mode &= oct 7777; ${$info_hash_ref_ref} = { names => [ $filename ], data => { type => $type, owner => $uid, group => $gid, mode => $mode, content => $contents, links => $nlink, dev => $dev, inode => $inode } }; return($ADE::OK); } sub insert_info { my($errstack_ref, $data_store_ref, $info_ref) = @_; my($dev, $inode); $dev = ${$info_ref}{'data'}{'dev'}; $inode = ${$info_ref}{'data'}{'inode'}; if (not defined ${$data_store_ref}{$dev,$inode}) { ${$data_store_ref}{$dev,$inode} = $info_ref; } else { push @{${$data_store_ref}{$dev,$inode}{'names'}}, ${$info_ref}{'names'}[0]; } ADE::debug($errstack_ref, 10, 'insert_info: currently known names now: ' . join ', ', @{${$data_store_ref}{$dev,$inode}{'names'}}); return($ADE::OK); } sub reindex { my($errstack_ref, $store_ref, $namehash_ref, $orderednamearray_ref) = @_; # In practice populating a name hash is the minimum this function could do # (remember the ordered name array's values are keys in the namehash). # More importantly it is also absolutely required to populate a orderednamearray. if (not defined $namehash_ref) { ADE::internal($errstack_ref, 'reindex: namehash_ref not defined'); } foreach my $key (keys %{$store_ref}) { foreach my $filename (@{${$store_ref}{$key}{'names'}}) { ${$namehash_ref}{$filename} = $key; } } if (defined $orderednamearray_ref) { @{$orderednamearray_ref} = sort { $a cmp $b } keys %{$namehash_ref}; } return($ADE::OK); } sub clone { my($errstack_ref, $src_store_ref, $dst_store_ref) = @_; my($VAR1, $d); $d = Data::Dumper->new([$src_store_ref]); # Create a reference to a copy of the hash. eval $d->Dump; ## no critic (ProhibitStringyEval, RequireCheckingReturnValueOfEval) undef $d; # Copy the anoymous hash to a real hash. %{$dst_store_ref} = %{$VAR1}; return($ADE::OK); } ############################################################################## # # PRIVATE FUNCTIONS # ############################################################################## sub _fad_header_write { my($errstack_ref, $out_handle) = @_; # Statutory headers print $out_handle "$magic_number\n"; print $out_handle "FAD-Version: $fad_version\n"; # Version-specific headers print $out_handle 'Unix-Time: ' . time . "\n"; # End of headers marker print $out_handle "EOH\n"; return($ADE::OK); } sub _fad_header_read { my($errstack_ref, $fad_handle, $read_header_ref) = @_; my($key, $val, %read_header, $read_header_magic_number, $read_header_line, $had_eoh); ADE::debug($errstack_ref, 40, '_fad_header_read: reading magic number'); if (!($read_header_magic_number = <$fad_handle>)) { ADE::error($errstack_ref, 'fad_err_misc', 'can\'t read magic number'); return($ADE::FAIL); } chomp $read_header_magic_number; # Read in remaining header lines - up to 'EOH', and validate ADE::debug($errstack_ref, 40, '_fad_header_read: looping over remaining lines ...'); $had_eoh = 0; while ((not $had_eoh) and (not eof $fad_handle)) { ADE::debug($errstack_ref, 40, '_fad_header_read: about to read a line'); $read_header_line = <$fad_handle>; chomp $read_header_line; ($key, $val) = split /:\s*/, $read_header_line, 2; $val = 0 if (defined $key and not defined $val); if ($key eq 'EOH') { $had_eoh = 1; } else { $read_header{$key} = $val; ADE::debug($errstack_ref, 40, "_fad_header_read: read header (key=$key, val=$val)"); } } # If (had EOF and) not yet had end-of-headers marker then error. ADE::debug($errstack_ref, 40, '_fad_header_read: checking headers were terminated ...'); if (!$had_eoh) { ADE::debug($errstack_ref, 40, '_fad_header_read: headers were not terminated'); ADE::error($errstack_ref, 'fad_err_misc', 'headers not terminated'); return($ADE::FAIL); } ADE::debug($errstack_ref, 40, '_fad_header_read: headers were terminated'); # Validate headers ADE::debug($errstack_ref, 40, '_fad_header_read: validating magic number ...'); if ($read_header_magic_number ne $magic_number) { ADE::error($errstack_ref, 'fad_err_misc', 'mismatching magic number'); return($ADE::FAIL); } ADE::debug($errstack_ref, 40, '_fad_header_read: validating FAD version ...'); if (($read_header{'FAD-Version'} ne $fad_version)) { ADE::error($errstack_ref, 'fad_err_misc', 'mismatching FAD version'); return($ADE::FAIL); } # the remaining headers are simply ignored ADE::debug($errstack_ref, 40, '_fad_header_read: validating free fields'); # delete the headers which are not the concern of other programs delete $read_header{'FAD-Version'}; delete $read_header{'EOH'}; # The rest are for things higher up (e.g. fadscan uses Unix-Time in # the report headers. if (defined $read_header_ref) { %{$read_header_ref} = %read_header; } return($ADE::OK); } sub _fad_complex_initialisations { my ($errstack_ref) = @_; shift; ADE::register_error_types(\%fad_defined_errors); return($ADE::OK); } ############################################################################## # # CODE # ############################################################################## _fad_complex_initialisations(undef); ############################################################################## # # NON-ZERO MODULE EXIT CODE # ############################################################################## 1; # don't forget to return a true value from the file