rss logo

Intro

Here is a Perl script that I use to automatically create hardlink of my duplicated files.

Configuration

Instructions

user@host:~$ SEARCH=/data; find $SEARCH -not -empty -type f -printf %s\\n | sort -rn | uniq -d | xargs -I{} -n1 find $SEARCH -type f -size {}c -print0 | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate > duplicated_files
user@host:~$ sed -i 's/[0-9a-f]*\s\s//' duplicated_files

Usage

user@host:~$ deduphard.pl duplicated_files

Code

#!/usr/bin/perl
# Role : perl script which create hard links with duplicated files
# Author : http://shebangthedolphins.net/
# Instructions  : 
# - create a file of duplicated files : SEARCH=/data; find $SEARCH -not -empty -type f -printf %s\\n | sort -rn | uniq -d | xargs -I{} -n1 find $SEARCH -type f -size {}c -print0 | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate > duplicated_files
# - format the file in order to be exploited in deleting hash datas
#  * sed -i 's/[0-9a-f]*\s\s//' fichier
# 1.0 first version

use strict;
use warnings;

my $fichier = $ARGV[0];
my $firstline;						#contain reference line
my $stateligne;
my $temoin = 0;						#flag if void line
my $source;
my $destination;

open (F, '<', "$fichier") || die "Error: $!";       #open file 
while (my $ligne = <F>) {				#loop to read file line by line
	$stateligne=&lignevide($ligne);		#call lignevide function to know if the line is void
	if ($stateligne==1){				#if void line
		$temoin = 0;				
	}
	if ($stateligne==0 and $temoin==1){		#if current line is not void and last line not void
		$source=$firstline;			#first line is the source
		$destination=$ligne;			#new line is the destination
		chop($source);				#delete carriage return
		chop($destination);			#delete carriage return

		print "hardlink : ln $source $destination\n\n";	#print information
		`ln -fi "$source" "$destination"`;		#-f : delete destination files; -i ask before delete
	}
	if ($stateligne==0 and $temoin==0){		#if current line is not void and if last line is void
		$firstline = $ligne;			#put current line inside $firstline
		$temoin = 1;				#flag reference line
	}
}

#check void line function, return 1 if void line
sub lignevide {
	my $func_ligne = $_[0];	
	if ($func_ligne =~m/^$/){
		return 1;
	}
	else{
		return 0;
	}
}
Creative Commons License
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.

Contact :

contact mail address