#!/usr/bin/perl -w use strict; use warnings; use Getopt::Std; use Digest::MD5; use File::Path qw(make_path); sub HELP_MESSAGE () { print < ...] double.pl [-h|--help] double.pl [-V|--version] -m move duplicate files into .double in the current directory -r recursive, descend into subdirectories -s rebuild the directory structure in .double; without this, files with the same name get overwritten in .double without any confirmation -v be verbose, print what is being done ... search beneath these directories; default: current directory -h, --help print this help -V, --version print version number, then exit EOF } sub VERSION_MESSAGE () { print "double.pl 20230908\n"; } sub traverse ($$$); sub traverse ($$$) { my ($dir, $checksums, $opts) = @_; my @dirstack; my $actual_dir = ($dir ? $dir : "."); print "doing $actual_dir\n" if $opts->{v}; if (!opendir(DH, $actual_dir)) { warn "$dir: $!"; return; } my @entries = sort readdir(DH); closedir(DH); foreach my $entry(@entries) { local $/; my $file = ($dir ? "$dir/$entry" : $entry); if (-d $file) { if (($entry ne ".") && ($entry ne "..") && ($entry ne ".double")) { push @dirstack, $file; } next; } if (!-f $file) { print "$file: not a file\n"; next; } my $size = -s $file; if (!$size) { print "$file: zero size\n"; next; } my $fh; if (!open($fh, "<", $file)) { warn "$file: $!"; next; } binmode($fh); my $data = <$fh>; close($fh); my $md5 = Digest::MD5->new(); $md5->add($data); my $sum = $md5->digest; if (!$checksums->{$sum}) { $checksums->{$sum} = $file; next; } my $old_file = $checksums->{$sum}; if (!open($fh, "<", $old_file)) { warn "$old_file: $!"; next; } binmode($fh); my $old_data = <$fh>; close($fh); if ($data ne $old_data) { print "$file: same md5sum as $old_file\n"; next; } print "$file: $old_file is equal\n"; next if !$opts->{m}; my $move_dir = ".double"; $move_dir .= "/$dir" if ($dir && $opts->{s}); my $errors; make_path($move_dir, { error => \$errors, verbose => $opts->{v} }); if ($errors && @$errors) { foreach my $error(@$errors) { while (my ($errfile, $message) = each %$error) { if ($errfile) { warn "$errfile: $message"; } else { warn "$move_dir: $message"; } } } next; } if (!rename($file, "$move_dir/$entry")) { warn "$file -> $move_dir/$entry: $!"; next; } print "$file -> $move_dir/$entry\n" if $opts->{v}; } if ($opts->{r}) { foreach my $subdir(@dirstack) { traverse($subdir, $checksums, $opts); } } } $Getopt::Std::STANDARD_HELP_VERSION = 1; my %opts; exit(1) if !getopts("hmrsvV", \%opts); if ($opts{h}) { HELP_MESSAGE(); exit(0); } if ($opts{V}) { VERSION_MESSAGE(); exit(0); } push @ARGV, "" if !@ARGV; my %checksums; foreach my $startdir(map { s%^(.+)/$%$1%; $_ } @ARGV) { traverse($startdir, \%checksums, \%opts); }