Here is a Perl script that I use to automatically create hardlink of my duplicated files.
user@host:~$ SEARCH=/data; find $SEARCH -not -empty -type f -printf %s\\n | sort -rn | uniq -d | xargs -I{} -n1 find $SEARCH -type f -size {}c -print0 | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate > duplicated_files
user@host:~$ sed -i 's/[0-9a-f]*\s\s//' duplicated_files
user@host:~$ deduphard.pl duplicated_files
#!/usr/bin/perl # Role : perl script which create hard links with duplicated files # Author : http://shebangthedolphins.net/ # Instructions : # - create a file of duplicated files : SEARCH=/data; find $SEARCH -not -empty -type f -printf %s\\n | sort -rn | uniq -d | xargs -I{} -n1 find $SEARCH -type f -size {}c -print0 | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate > duplicated_files # - format the file in order to be exploited in deleting hash datas # * sed -i 's/[0-9a-f]*\s\s//' fichier # 1.0 first version use strict; use warnings; my $fichier = $ARGV[0]; my $firstline; #contain reference line my $stateligne; my $temoin = 0; #flag if void line my $source; my $destination; open (F, '<', "$fichier") || die "Error: $!"; #open file while (my $ligne = <F>) { #loop to read file line by line $stateligne=&lignevide($ligne); #call lignevide function to know if the line is void if ($stateligne==1){ #if void line $temoin = 0; } if ($stateligne==0 and $temoin==1){ #if current line is not void and last line not void $source=$firstline; #first line is the source $destination=$ligne; #new line is the destination chop($source); #delete carriage return chop($destination); #delete carriage return print "hardlink : ln $source $destination\n\n"; #print information `ln -fi "$source" "$destination"`; #-f : delete destination files; -i ask before delete } if ($stateligne==0 and $temoin==0){ #if current line is not void and if last line is void $firstline = $ligne; #put current line inside $firstline $temoin = 1; #flag reference line } } #check void line function, return 1 if void line sub lignevide { my $func_ligne = $_[0]; if ($func_ligne =~m/^$/){ return 1; } else{ return 0; } }
Contact :