From f0ea5246398f08506dab796048624651c8908e30 Mon Sep 17 00:00:00 2001 From: Samuel Tardieu Date: Sun, 5 Jan 2020 11:24:58 +0100 Subject: [PATCH] Make remove-duplicates behaviour reproducible and add test --- doc/remove-duplicates.md | 7 +++---- scripts/remove-duplicates | 19 +++++++++++++------ tests/Makefile.am | 2 +- tests/remove-duplicates.test | 29 +++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 11 deletions(-) create mode 100755 tests/remove-duplicates.test diff --git a/doc/remove-duplicates.md b/doc/remove-duplicates.md index af291c1..53c1650 100644 --- a/doc/remove-duplicates.md +++ b/doc/remove-duplicates.md @@ -1,6 +1,6 @@ %REMOVE-DUPLICATES(1) User Manuals %Samuel Tardieu -%November 12, 2016 +%January 5, 2019 # NAME @@ -13,8 +13,7 @@ remove-duplicate [*-f*] # DESCRIPTION Removes duplicates of the same file in the current directory if *-f* -is given. If *-f* is not given, duplicate will be identified twice -(once in every direction). +is given. If *-f* is not given, duplicates will be identified. # OPTIONS @@ -22,7 +21,7 @@ is given. If *-f* is not given, duplicate will be identified twice # COPYRIGHT -Copyright (c) 2004-2016 Samuel Tardieu . +Copyright (c) 2004-2019 Samuel Tardieu . This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. diff --git a/scripts/remove-duplicates b/scripts/remove-duplicates index b085f1f..04db0fe 100755 --- a/scripts/remove-duplicates +++ b/scripts/remove-duplicates @@ -11,9 +11,9 @@ import os -def check_duplicate(orig, copy): +def check_duplicate(orig, content, copy): try: - if open(orig).read() == open(copy).read(): + if content == open(copy).read(): print("Removing %s which is a copy of %s" % (copy, orig)) os.unlink(copy) except: @@ -28,10 +28,17 @@ def aggregate(): return d def remove_duplicates(d): - for v in d.values(): - while v: - del v[0] - for c in v[1:]: check_duplicate(v[0], c) + for v in sorted(d.values()): + if len(v) < 2: + continue + v.sort() + for (i, f1) in enumerate(v[:-1]): + try: + content = open(f1).read() + for f2 in v[i+1:]: + check_duplicate(f1, content, f2) + except IOError: + continue if __name__ == '__main__': remove_duplicates(aggregate()) diff --git a/tests/Makefile.am b/tests/Makefile.am index 77e0910..1347444 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,4 @@ -TESTS = chdir-ok.test chdir-not-ok.test +TESTS = chdir-ok.test chdir-not-ok.test remove-duplicates.test XFAIL_TESTS = chdir-not-ok.test EXTRA_DIST = *.test diff --git a/tests/remove-duplicates.test b/tests/remove-duplicates.test new file mode 100755 index 0000000..c9d1135 --- /dev/null +++ b/tests/remove-duplicates.test @@ -0,0 +1,29 @@ +#! /bin/sh +# + +topsrcdir=$(cd $(dirname $0)/.. && pwd) +REMOVE_DUPLICATES="$topsrcdir/scripts/remove-duplicates" +trap "rm -rf $PWD/$0.dir" INT QUIT TERM EXIT +mkdir "$0.dir" +cd "$0.dir" + +echo foo > foo1.txt +echo foo > foo2.txt +echo foo > foo3.txt +echo foo > foo4.txt +echo bar > bar1.txt +echo bar > bar2.txt +echo baz > baz1.txt +echo zab > baz2.txt + +[ $("$REMOVE_DUPLICATES" -f | wc -l) = 4 ] || exit 1 + +[ $(ls foo?.txt | wc -l) = 1 ] || exit 2 + +[ -f foo1.txt ] || exit 3 + +[ $(ls bar?.txt | wc -l) = 1 ] || exit 4 + +[ -f bar1.txt ] || exit 5 + +[ $(ls baz?.txt | wc -l) = 2 ] || exit 6