summaryrefslogtreecommitdiff
path: root/fdupes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fdupes.c')
-rw-r--r--fdupes.c948
1 files changed, 679 insertions, 269 deletions
diff --git a/fdupes.c b/fdupes.c
index b52f723..db22010 100644
--- a/fdupes.c
+++ b/fdupes.c
@@ -1,4 +1,4 @@
-/* FDUPES Copyright (c) 1999 Adrian Lopez
+/* FDUPES Copyright (c) 1999-2002 Adrian Lopez
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
@@ -26,26 +26,39 @@
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
+#ifndef OMIT_GETOPT_LONG
#include <getopt.h>
+#endif
#include <string.h>
#include <errno.h>
+#include <libgen.h>
-#ifndef EXTERNAL_MD5
#include "md5/md5.h"
-#endif
#define ISFLAG(a,b) ((a & b) == b)
#define SETFLAG(a,b) (a |= b)
-#define F_RECURSE 0x001
-#define F_HIDEPROGRESS 0x002
-#define F_DSAMELINE 0x004
-#define F_FOLLOWLINKS 0x008
-#define F_DELETEFILES 0x010
-#define F_EXCLUDEEMPTY 0x020
-#define F_CONSIDERHARDLINKS 0x040
-#define F_SHOWSIZE 0x080
-#define F_OMITFIRST 0x100
+#define F_RECURSE 0x0001
+#define F_HIDEPROGRESS 0x0002
+#define F_DSAMELINE 0x0004
+#define F_FOLLOWLINKS 0x0008
+#define F_DELETEFILES 0x0010
+#define F_EXCLUDEEMPTY 0x0020
+#define F_CONSIDERHARDLINKS 0x0040
+#define F_SHOWSIZE 0x0080
+#define F_OMITFIRST 0x0100
+#define F_RECURSEAFTER 0x0200
+#define F_NOPROMPT 0x0400
+#define F_SUMMARIZEMATCHES 0x0800
+#define F_EXCLUDEHIDDEN 0x1000
+#define F_PERMISSIONS 0x2000
+#define F_REVERSE 0x4000
+#define F_IMMEDIATE 0x8000
+
+typedef enum {
+ ORDER_TIME = 0,
+ ORDER_NAME
+} ordertype_t;
char *program_name;
@@ -55,11 +68,36 @@ unsigned long flags = 0;
#define INPUT_SIZE 256
+#define PARTIAL_MD5_SIZE 4096
+
+#define MD5_DIGEST_LENGTH 16
+
+/*
+
+TODO: Partial sums (for working with very large files).
+
+typedef struct _signature
+{
+ md5_state_t state;
+ md5_byte_t digest[16];
+} signature_t;
+
+typedef struct _signatures
+{
+ int num_signatures;
+ signature_t *signatures;
+} signatures_t;
+
+*/
+
typedef struct _file {
char *d_name;
off_t size;
- char *crcsignature;
+ md5_byte_t *crcpartial;
+ md5_byte_t *crcsignature;
+ dev_t device;
ino_t inode;
+ time_t mtime;
int hasdupes; /* true only if file is first on duplicate chain */
struct _file *duplicates;
struct _file *next;
@@ -67,19 +105,10 @@ typedef struct _file {
typedef struct _filetree {
file_t *file;
-#ifdef EXPERIMENTAL_RBTREE
- unsigned char color;
- struct _filetree *parent;
-#endif
struct _filetree *left;
struct _filetree *right;
} filetree_t;
-#ifdef EXPERIMENTAL_RBTREE
-#define COLOR_RED 0
-#define COLOR_BLACK 1
-#endif
-
void errormsg(char *message, ...)
{
va_list ap;
@@ -130,6 +159,14 @@ off_t filesize(char *filename) {
return s.st_size;
}
+dev_t getdevice(char *filename) {
+ struct stat s;
+
+ if (stat(filename, &s) != 0) return 0;
+
+ return s.st_dev;
+}
+
ino_t getinode(char *filename) {
struct stat s;
@@ -138,6 +175,70 @@ ino_t getinode(char *filename) {
return s.st_ino;
}
+time_t getmtime(char *filename) {
+ struct stat s;
+
+ if (stat(filename, &s) != 0) return 0;
+
+ return s.st_mtime;
+}
+
+char **cloneargs(int argc, char **argv)
+{
+ int x;
+ char **args;
+
+ args = (char **) malloc(sizeof(char*) * argc);
+ if (args == NULL) {
+ errormsg("out of memory!\n");
+ exit(1);
+ }
+
+ for (x = 0; x < argc; x++) {
+ args[x] = (char*) malloc(strlen(argv[x]) + 1);
+ if (args[x] == NULL) {
+ free(args);
+ errormsg("out of memory!\n");
+ exit(1);
+ }
+
+ strcpy(args[x], argv[x]);
+ }
+
+ return args;
+}
+
+int findarg(char *arg, int start, int argc, char **argv)
+{
+ int x;
+
+ for (x = start; x < argc; x++)
+ if (strcmp(argv[x], arg) == 0)
+ return x;
+
+ return x;
+}
+
+/* Find the first non-option argument after specified option. */
+int nonoptafter(char *option, int argc, char **oldargv,
+ char **newargv, int optind)
+{
+ int x;
+ int targetind;
+ int testind;
+ int startat = 1;
+
+ targetind = findarg(option, 1, argc, oldargv);
+
+ for (x = optind; x < argc; x++) {
+ testind = findarg(newargv[x], startat, argc, oldargv);
+ if (testind > targetind) return x;
+ else startat = testind;
+ }
+
+ return x;
+}
+
int grokdir(char *dir, file_t **filelistp)
{
DIR *cd;
@@ -149,6 +250,7 @@ int grokdir(char *dir, file_t **filelistp)
struct stat linfo;
static int progress = 0;
static char indicator[] = "-\\|/";
+ char *fullname, *name;
cd = opendir(dir);
@@ -172,8 +274,10 @@ int grokdir(char *dir, file_t **filelistp)
exit(1);
} else newfile->next = *filelistp;
+ newfile->device = 0;
newfile->inode = 0;
newfile->crcsignature = NULL;
+ newfile->crcpartial = NULL;
newfile->duplicates = NULL;
newfile->hasdupes = 0;
@@ -192,6 +296,17 @@ int grokdir(char *dir, file_t **filelistp)
strcat(newfile->d_name, "/");
strcat(newfile->d_name, dirinfo->d_name);
+ if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
+ fullname = strdup(newfile->d_name);
+ name = basename(fullname);
+ if (name[0] == '.' && strcmp(name, ".") && strcmp(name, "..") ) {
+ free(newfile->d_name);
+ free(newfile);
+ continue;
+ }
+ free(fullname);
+ }
+
if (filesize(newfile->d_name) == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) {
free(newfile->d_name);
free(newfile);
@@ -232,25 +347,22 @@ int grokdir(char *dir, file_t **filelistp)
return filecount;
}
-#ifndef EXTERNAL_MD5
-
-/* If EXTERNAL_MD5 is not defined, use L. Peter Deutsch's MD5 library.
- */
-char *getcrcsignature(char *filename)
+md5_byte_t *getcrcsignatureuntil(char *filename, off_t max_read)
{
- int x;
off_t fsize;
off_t toread;
md5_state_t state;
- md5_byte_t digest[16];
+ static md5_byte_t digest[MD5_DIGEST_LENGTH];
static md5_byte_t chunk[CHUNK_SIZE];
- static char signature[16*2 + 1];
- char *sigp;
FILE *file;
md5_init(&state);
+
fsize = filesize(filename);
+
+ if (max_read != 0 && fsize > max_read)
+ fsize = max_read;
file = fopen(filename, "rb");
if (file == NULL) {
@@ -259,9 +371,10 @@ char *getcrcsignature(char *filename)
}
while (fsize > 0) {
- toread = (fsize % CHUNK_SIZE) ? (fsize % CHUNK_SIZE) : CHUNK_SIZE;
+ toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize;
if (fread(chunk, toread, 1, file) != 1) {
errormsg("error reading from file %s\n", filename);
+ fclose(file);
return NULL;
}
md5_append(&state, chunk, toread);
@@ -270,61 +383,44 @@ char *getcrcsignature(char *filename)
md5_finish(&state, digest);
- sigp = signature;
-
- for (x = 0; x < 16; x++) {
- sprintf(sigp, "%02x", digest[x]);
- sigp = strchr(sigp, '\0');
- }
-
fclose(file);
- return signature;
+ return digest;
}
-#endif /* [#ifndef EXTERNAL_MD5] */
-
-#ifdef EXTERNAL_MD5
-
-/* If EXTERNAL_MD5 is defined, use md5sum program to calculate signatures.
- */
-char *getcrcsignature(char *filename)
+md5_byte_t *getcrcsignature(char *filename)
{
- static char signature[256];
- char *command;
- char *separator;
- FILE *result;
+ return getcrcsignatureuntil(filename, 0);
+}
- command = (char*) malloc(strlen(filename)+strlen(EXTERNAL_MD5)+2);
- if (command == NULL) {
- errormsg("out of memory\n");
- exit(1);
- }
+md5_byte_t *getcrcpartialsignature(char *filename)
+{
+ return getcrcsignatureuntil(filename, PARTIAL_MD5_SIZE);
+}
- sprintf(command, "%s %s", EXTERNAL_MD5, filename);
+int md5cmp(const md5_byte_t *a, const md5_byte_t *b)
+{
+ int x;
- result = popen(command, "r");
- if (result == NULL) {
- errormsg("error invoking %s\n", EXTERNAL_MD5);
- exit(1);
+ for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
+ {
+ if (a[x] < b[x])
+ return -1;
+ else if (a[x] > b[x])
+ return 1;
}
-
- free(command);
- if (fgets(signature, 256, result) == NULL) {
- errormsg("error generating signature for %s\n", filename);
- return NULL;
- }
- separator = strchr(signature, ' ');
- if (separator) *separator = '\0';
+ return 0;
+}
- pclose(result);
+void md5copy(md5_byte_t *to, const md5_byte_t *from)
+{
+ int x;
- return signature;
+ for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
+ to[x] = from[x];
}
-#endif /* [#ifdef EXTERNAL_MD5] */
-
void purgetree(filetree_t *checktree)
{
if (checktree->left != NULL) purgetree(checktree->left);
@@ -334,140 +430,17 @@ void purgetree(filetree_t *checktree)
free(checktree);
}
-#ifdef EXPERIMENTAL_RBTREE
-/* Use a red-black tree structure to store file information.
- */
-
-void rotate_left(filetree_t **root, filetree_t *node)
-{
- filetree_t *subject;
-
- subject = node->right;
- node->right = subject->left;
-
- if (subject->left != NULL) subject->left->parent = node;
- subject->parent = node->parent;
-
- if (node->parent == NULL) {
- *root = subject;
- } else {
- if (node == node->parent->left)
- node->parent->left = subject;
- else
- node->parent->right = subject;
- }
-
- subject->left = node;
- node->parent = subject;
-}
-
-void rotate_right(filetree_t **root, filetree_t *node)
-{
- filetree_t *subject;
-
- subject = node->left;
- node->left = subject->right;
-
- if (subject->right != NULL) subject->right->parent = node;
- subject->parent = node->parent;
-
- if (node->parent == NULL) {
- *root = subject;
- } else {
- if (node == node->parent->left)
- node->parent->left = subject;
- else
- node->parent->right = subject;
- }
-
- subject->right = node;
- node->parent = subject;
-}
-
-#define TREE_LEFT -1
-#define TREE_RIGHT 1
-#define TREE_ROOT 0
-
-void registerfile(filetree_t **root, filetree_t *parent, int loc, file_t *file)
+void getfilestats(file_t *file)
{
- filetree_t *node;
- filetree_t *uncle;
-
file->size = filesize(file->d_name);
file->inode = getinode(file->d_name);
-
- node = (filetree_t*) malloc(sizeof(filetree_t));
- if (node == NULL) {
- errormsg("out of memory!\n");
- exit(1);
- }
-
- node->file = file;
- node->left = NULL;
- node->right = NULL;
- node->parent = parent;
- node->color = COLOR_RED;
-
- if (loc == TREE_ROOT)
- *root = node;
- else if (loc == TREE_LEFT)
- parent->left = node;
- else
- parent->right = node;
-
- while (node != *root && node->parent->color == COLOR_RED) {
- if (node->parent->parent == NULL) return;
-
- if (node->parent == node->parent->parent->left) {
- uncle = node->parent->parent->right;
- if (uncle == NULL) return;
-
- if (uncle->color == COLOR_RED) {
- node->parent->color = COLOR_BLACK;
- uncle->color = COLOR_BLACK;
- node->parent->parent->color = COLOR_RED;
- node = node->parent->parent;
- } else {
- if (node == node->parent->right) {
- node = node->parent;
- rotate_left(root, node);
- }
- node->parent->color = COLOR_BLACK;
- node->parent->parent->color = COLOR_RED;
- rotate_right(root, node->parent->parent);
- }
- } else {
- uncle = node->parent->parent->left;
- if (uncle == NULL) return;
-
- if (uncle->color == COLOR_RED) {
- node->parent->color = COLOR_BLACK;
- uncle->color = COLOR_BLACK;
- node->parent->parent->color = COLOR_RED;
- node = node->parent->parent;
- } else {
- if (node == node->parent->right) {
- node = node->parent;
- rotate_left(root, node);
- }
- node->parent->color = COLOR_BLACK;
- node->parent->parent->color = COLOR_RED;
- rotate_right(root, node->parent->parent);
- }
- }
- }
-
- (*root)->color = COLOR_BLACK;
+ file->device = getdevice(file->d_name);
+ file->mtime = getmtime(file->d_name);
}
-#endif /* [#ifdef EXPERIMENTAL_RBTREE] */
-
-#ifndef EXPERIMENTAL_RBTREE
-
int registerfile(filetree_t **branch, file_t *file)
{
- file->size = filesize(file->d_name);
- file->inode = getinode(file->d_name);
+ getfilestats(file);
*branch = (filetree_t*) malloc(sizeof(filetree_t));
if (*branch == NULL) {
@@ -482,20 +455,61 @@ int registerfile(filetree_t **branch, file_t *file)
return 1;
}
-#endif /* [#ifndef EXPERIMENTAL_RBTREE] */
+int same_permissions(char* name1, char* name2)
+{
+ struct stat s1, s2;
+
+ if (stat(name1, &s1) != 0) return -1;
+ if (stat(name2, &s2) != 0) return -1;
+
+ return (s1.st_mode == s2.st_mode &&
+ s1.st_uid == s2.st_uid &&
+ s1.st_gid == s2.st_gid);
+}
+
+int is_hardlink(filetree_t *checktree, file_t *file)
+{
+ file_t *dupe;
+ ino_t inode;
+ dev_t device;
+
+ inode = getinode(file->d_name);
+ device = getdevice(file->d_name);
+
+ if ((inode == checktree->file->inode) &&
+ (device == checktree->file->device))
+ return 1;
+
+ if (checktree->file->hasdupes)
+ {
+ dupe = checktree->file->duplicates;
+
+ do {
+ if ((inode == dupe->inode) &&
+ (device == dupe->device))
+ return 1;
+
+ dupe = dupe->duplicates;
+ } while (dupe != NULL);
+ }
+
+ return 0;
+}
-file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
+file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
{
int cmpresult;
- char *crcsignature;
+ md5_byte_t *crcsignature;
off_t fsize;
- /* If inodes are equal one of the files is a hard link, which
- is usually not accidental. We don't want to flag them as
- duplicates, unless the user specifies otherwise. */
+ /* If device and inode fields are equal one of the files is a
+ hard link to the other or the files have been listed twice
+ unintentionally. We don't want to flag these files as
+ duplicates unless the user specifies otherwise.
+ */
- if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && getinode(file->d_name) ==
- checktree->file->inode) return NULL;
+ if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && is_hardlink(checktree, file))
+ return NULL;
fsize = filesize(file->d_name);
@@ -503,57 +517,97 @@ file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
cmpresult = -1;
else
if (fsize > checktree->file->size) cmpresult = 1;
+ else
+ if (ISFLAG(flags, F_PERMISSIONS) &&
+ !same_permissions(file->d_name, checktree->file->d_name))
+ cmpresult = -1;
else {
- if (checktree->file->crcsignature == NULL) {
- crcsignature = getcrcsignature(checktree->file->d_name);
- if (crcsignature == NULL) return NULL;
+ if (checktree->file->crcpartial == NULL) {
+ crcsignature = getcrcpartialsignature(checktree->file->d_name);
+ if (crcsignature == NULL) {
+ errormsg ("cannot read file %s\n", checktree->file->d_name);
+ return NULL;
+ }
- checktree->file->crcsignature = (char*) malloc(strlen(crcsignature)+1);
- if (checktree->file->crcsignature == NULL) {
+ checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
+ if (checktree->file->crcpartial == NULL) {
errormsg("out of memory\n");
exit(1);
}
- strcpy(checktree->file->crcsignature, crcsignature);
+ md5copy(checktree->file->crcpartial, crcsignature);
}
- if (file->crcsignature == NULL) {
- crcsignature = getcrcsignature(file->d_name);
- if (crcsignature == NULL) return NULL;
+ if (file->crcpartial == NULL) {
+ crcsignature = getcrcpartialsignature(file->d_name);
+ if (crcsignature == NULL) {
+ errormsg ("cannot read file %s\n", file->d_name);
+ return NULL;
+ }
- file->crcsignature = (char*) malloc(strlen(crcsignature)+1);
- if (file->crcsignature == NULL) {
+ file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
+ if (file->crcpartial == NULL) {
errormsg("out of memory\n");
exit(1);
}
- strcpy(file->crcsignature, crcsignature);
+ md5copy(file->crcpartial, crcsignature);
}
- cmpresult = strcmp(file->crcsignature, checktree->file->crcsignature);
+ cmpresult = md5cmp(file->crcpartial, checktree->file->crcpartial);
+ /*if (cmpresult != 0) errormsg(" on %s vs %s\n", file->d_name, checktree->file->d_name);*/
+
+ if (cmpresult == 0) {
+ if (checktree->file->crcsignature == NULL) {
+ crcsignature = getcrcsignature(checktree->file->d_name);
+ if (crcsignature == NULL) return NULL;
+
+ checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
+ if (checktree->file->crcsignature == NULL) {
+ errormsg("out of memory\n");
+ exit(1);
+ }
+ md5copy(checktree->file->crcsignature, crcsignature);
+ }
+
+ if (file->crcsignature == NULL) {
+ crcsignature = getcrcsignature(file->d_name);
+ if (crcsignature == NULL) return NULL;
+
+ file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
+ if (file->crcsignature == NULL) {
+ errormsg("out of memory\n");
+ exit(1);
+ }
+ md5copy(file->crcsignature, crcsignature);
+ }
+
+ cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature);
+ /*if (cmpresult != 0) errormsg("P on %s vs %s\n",
+ file->d_name, checktree->file->d_name);
+ else errormsg("P F on %s vs %s\n", file->d_name,
+ checktree->file->d_name);
+ printf("%s matches %s\n", file->d_name, checktree->file->d_name);*/
+ }
}
if (cmpresult < 0) {
if (checktree->left != NULL) {
return checkmatch(root, checktree->left, file);
} else {
-#ifndef EXPERIMENTAL_RBTREE
registerfile(&(checktree->left), file);
-#else
- registerfile(root, checktree, TREE_LEFT, file);
-#endif
return NULL;
}
} else if (cmpresult > 0) {
if (checktree->right != NULL) {
return checkmatch(root, checktree->right, file);
} else {
-#ifndef EXPERIMENTAL_RBTREE
registerfile(&(checktree->right), file);
-#else
- registerfile(root, checktree, TREE_RIGHT, file);
-#endif
return NULL;
}
- } else return checktree->file;
+ } else
+ {
+ getfilestats(file);
+ return &checktree->file;
+ }
}
/* Do a bit-for-bit comparison in case two different files produce the
@@ -561,8 +615,8 @@ file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
int confirmmatch(FILE *file1, FILE *file2)
{
- unsigned char c1;
- unsigned char c2;
+ unsigned char c1[CHUNK_SIZE];
+ unsigned char c2[CHUNK_SIZE];
size_t r1;
size_t r2;
@@ -570,17 +624,55 @@ int confirmmatch(FILE *file1, FILE *file2)
fseek(file2, 0, SEEK_SET);
do {
- r1 = fread(&c1, sizeof(c1), 1, file1);
- r2 = fread(&c2, sizeof(c2), 1, file2);
+ r1 = fread(c1, sizeof(unsigned char), sizeof(c1), file1);
+ r2 = fread(c2, sizeof(unsigned char), sizeof(c2), file2);
- if (c1 != c2) return 0; /* file contents are different */
- } while (r1 && r2);
+ if (r1 != r2) return 0; /* file lengths are different */
+ if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
+ } while (r2);
- if (r1 != r2) return 0; /* file lengths are different */
-
return 1;
}
+void summarizematches(file_t *files)
+{
+ int numsets = 0;
+ double numbytes = 0.0;
+ int numfiles = 0;
+ file_t *tmpfile;
+
+ while (files != NULL)
+ {
+ if (files->hasdupes)
+ {
+ numsets++;
+
+ tmpfile = files->duplicates;
+ while (tmpfile != NULL)
+ {
+ numfiles++;
+ numbytes += files->size;
+ tmpfile = tmpfile->duplicates;
+ }
+ }
+
+ files = files->next;
+ }
+
+ if (numsets == 0)
+ printf("No duplicates found.\n\n");
+ else
+ {
+ if (numbytes < 1024.0)
+ printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", numfiles, numsets, numbytes);
+ else if (numbytes <= (1000.0 * 1000.0))
+ printf("%d duplicate files (in %d sets), occupying %.1f kilobytes\n\n", numfiles, numsets, numbytes / 1000.0);
+ else
+ printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", numfiles, numsets, numbytes / (1000.0 * 1000.0));
+
+ }
+}
+
void printmatches(file_t *files)
{
file_t *tmpfile;
@@ -588,7 +680,7 @@ void printmatches(file_t *files)
while (files != NULL) {
if (files->hasdupes) {
if (!ISFLAG(flags, F_OMITFIRST)) {
- if (ISFLAG(flags, F_SHOWSIZE)) printf("%ld byte%seach:\n", files->size,
+ if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", (long long int)files->size,
(files->size != 1) ? "s " : " ");
if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
@@ -607,7 +699,64 @@ void printmatches(file_t *files)
}
}
-void autodelete(file_t *files)
+/*
+#define REVISE_APPEND "_tmp"
+char *revisefilename(char *path, int seq)
+{
+ int digits;
+ char *newpath;
+ char *scratch;
+ char *dot;
+
+ digits = numdigits(seq);
+ newpath = malloc(strlen(path) + strlen(REVISE_APPEND) + digits + 1);
+ if (!newpath) return newpath;
+
+ scratch = malloc(strlen(path) + 1);
+ if (!scratch) return newpath;
+
+ strcpy(scratch, path);
+ dot = strrchr(scratch, '.');
+ if (dot)
+ {
+ *dot = 0;
+ sprintf(newpath, "%s%s%d.%s", scratch, REVISE_APPEND, seq, dot + 1);
+ }
+
+ else
+ {
+ sprintf(newpath, "%s%s%d", path, REVISE_APPEND, seq);
+ }
+
+ free(scratch);
+
+ return newpath;
+} */
+
+int relink(char *oldfile, char *newfile)
+{
+ dev_t od;
+ dev_t nd;
+ ino_t oi;
+ ino_t ni;
+
+ od = getdevice(oldfile);
+ oi = getinode(oldfile);
+
+ if (link(oldfile, newfile) != 0)
+ return 0;
+
+ /* make sure we're working with the right file (the one we created) */
+ nd = getdevice(newfile);
+ ni = getinode(newfile);
+
+ if (nd != od || oi != ni)
+ return 0; /* file is not what we expected */
+
+ return 1;
+}
+
+void deletefiles(file_t *files, int prompt, FILE *tty)
{
int counter;
int groups = 0;
@@ -661,27 +810,36 @@ void autodelete(file_t *files)
counter = 1;
dupelist[counter] = files;
- printf("[%d] %s\n", counter, files->d_name);
+ if (prompt) printf("[%d] %s\n", counter, files->d_name);
tmpfile = files->duplicates;
while (tmpfile) {
dupelist[++counter] = tmpfile;
- printf("[%d] %s\n", counter, tmpfile->d_name);
+ if (prompt) printf("[%d] %s\n", counter, tmpfile->d_name);
tmpfile = tmpfile->duplicates;
}
- printf("\n");
+ if (prompt) printf("\n");
+
+ if (!prompt) /* preserve only the first file */
+ {
+ preserve[1] = 1;
+ for (x = 2; x <= counter; x++) preserve[x] = 0;
+ }
+
+ else /* prompt for files to preserve */
do {
printf("Set %d of %d, preserve files [1 - %d, all]",
curgroup, groups, counter);
- if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%ld byte%seach)", files->size,
+ if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", (long long int)files->size,
(files->size != 1) ? "s " : " ");
printf(": ");
fflush(stdout);
- fgets(preservestr, INPUT_SIZE, stdin);
+ if (!fgets(preservestr, INPUT_SIZE, tty))
+ preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
i = strlen(preservestr) - 1;
@@ -694,8 +852,11 @@ void autodelete(file_t *files)
}
preservestr = tstr;
- if (!fgets(preservestr + i + 1, INPUT_SIZE, stdin))
- break; /* stop if fgets fails -- possible EOF? */
+ if (!fgets(preservestr + i + 1, INPUT_SIZE, tty))
+ {
+ preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
+ break;
+ }
i = strlen(preservestr)-1;
}
@@ -723,8 +884,12 @@ void autodelete(file_t *files)
if (preserve[x])
printf(" [+] %s\n", dupelist[x]->d_name);
else {
- printf(" [-] %s\n", dupelist[x]->d_name);
- remove(dupelist[x]->d_name);
+ if (remove(dupelist[x]->d_name) == 0) {
+ printf(" [-] %s\n", dupelist[x]->d_name);
+ } else {
+ printf(" [!] %s ", dupelist[x]->d_name);
+ printf("-- unable to delete file!\n");
+ }
}
}
printf("\n");
@@ -738,19 +903,126 @@ void autodelete(file_t *files)
free(preservestr);
}
+int sort_pairs_by_arrival(file_t *f1, file_t *f2)
+{
+ if (f2->duplicates != 0)
+ return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
+
+ return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
+}
+
+int sort_pairs_by_mtime(file_t *f1, file_t *f2)
+{
+ if (f1->mtime < f2->mtime)
+ return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
+ else if (f1->mtime > f2->mtime)
+ return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
+
+ return 0;
+}
+
+int sort_pairs_by_filename(file_t *f1, file_t *f2)
+{
+ return strcmp(f1->d_name, f2->d_name);
+}
+
+void registerpair(file_t **matchlist, file_t *newmatch,
+ int (*comparef)(file_t *f1, file_t *f2))
+{
+ file_t *traverse;
+ file_t *back;
+
+ (*matchlist)->hasdupes = 1;
+
+ back = 0;
+ traverse = *matchlist;
+ while (traverse)
+ {
+ if (comparef(newmatch, traverse) <= 0)
+ {
+ newmatch->duplicates = traverse;
+
+ if (back == 0)
+ {
+ *matchlist = newmatch; /* update pointer to head of list */
+
+ newmatch->hasdupes = 1;
+ traverse->hasdupes = 0; /* flag is only for first file in dupe chain */
+ }
+ else
+ back->duplicates = newmatch;
+
+ break;
+ }
+ else
+ {
+ if (traverse->duplicates == 0)
+ {
+ traverse->duplicates = newmatch;
+
+ if (back == 0)
+ traverse->hasdupes = 1;
+
+ break;
+ }
+ }
+
+ back = traverse;
+ traverse = traverse->duplicates;
+ }
+}
+
+void deletesuccessor(file_t **existing, file_t *duplicate,
+ int (*comparef)(file_t *f1, file_t *f2))
+{
+ file_t *to_keep;
+ file_t *to_delete;
+
+ if (comparef(duplicate, *existing) >= 0)
+ {
+ to_keep = *existing;
+ to_delete = duplicate;
+ }
+ else
+ {
+ to_keep = duplicate;
+ to_delete = *existing;
+
+ *existing = duplicate;
+ }
+
+ if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
+
+ printf(" [+] %s\n", to_keep->d_name);
+ if (remove(to_delete->d_name) == 0) {
+ printf(" [-] %s\n", to_delete->d_name);
+ } else {
+ printf(" [!] %s ", to_delete->d_name);
+ printf("-- unable to delete file!\n");
+ }
+
+ printf("\n");
+}
+
void help_text()
{
printf("Usage: fdupes [options] DIRECTORY...\n\n");
- printf(" -r --recurse \tinclude files residing in subdirectories\n");
+ printf(" -r --recurse \tfor every directory given follow subdirectories\n");
+ printf(" \tencountered within\n");
+ printf(" -R --recurse: \tfor each directory given after this option follow\n");
+ printf(" \tsubdirectories encountered within (note the ':' at\n");
+ printf(" \tthe end of the option, manpage for more details)\n");
printf(" -s --symlinks \tfollow symlinks\n");
printf(" -H --hardlinks \tnormally, when two or more files point to the same\n");
printf(" \tdisk area they are treated as non-duplicates; this\n");
printf(" \toption will change this behavior\n");
printf(" -n --noempty \texclude zero-length files from consideration\n");
+ printf(" -A --nohidden \texclude hidden files from consideration\n");
printf(" -f --omitfirst \tomit the first file in each set of matches\n");
printf(" -1 --sameline \tlist each set of matches on a single line\n");
printf(" -S --size \tshow size of duplicate files\n");
+ printf(" -m --summarize \tsummarize dupe information\n");
printf(" -q --quiet \thide progress indicator\n");
printf(" -d --delete \tprompt user for files to preserve and delete all\n");
printf(" \tothers; important: under particular circumstances,\n");
@@ -758,8 +1030,22 @@ void help_text()
printf(" \twith -s or --symlinks, or when specifying a\n");
printf(" \tparticular directory more than once; refer to the\n");
printf(" \tfdupes documentation for additional information\n");
+ /*printf(" -l --relink \t(description)\n");*/
+ printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n");
+ printf(" \teach set of duplicates and delete the rest without\n");
+ printf(" \tprompting the user\n");
+ printf(" -I --immediate \tdelete duplicates as they are encountered, without\n");
+ printf(" \tgrouping into sets; implies --noprompt\n");
+ printf(" -p --permissions \tdon't consider files with different owner/group or\n");
+ printf(" \tpermission bits as duplicates\n");
+ printf(" -o --order=BY \tselect sort order for output, linking and deleting; by\n");
+ printf(" \tmtime (BY='time'; default) or filename (BY='name')\n");
+ printf(" -i --reverse \treverse order while sorting\n");
printf(" -v --version \tdisplay fdupes version\n");
printf(" -h --help \tdisplay this help message\n\n");
+#ifdef OMIT_GETOPT_LONG
+ printf("Note: Long options are not supported in this fdupes build.\n\n");
+#endif
}
int main(int argc, char **argv) {
@@ -769,30 +1055,56 @@ int main(int argc, char **argv) {
FILE *file2;
file_t *files = NULL;
file_t *curfile;
- file_t *match = NULL;
+ file_t **match = NULL;
filetree_t *checktree = NULL;
int filecount = 0;
int progress = 0;
-
+ char **oldargv;
+ int firstrecurse;
+ ordertype_t ordertype = ORDER_TIME;
+
+#ifndef OMIT_GETOPT_LONG
static struct option long_options[] =
{
{ "omitfirst", 0, 0, 'f' },
{ "recurse", 0, 0, 'r' },
+ { "recursive", 0, 0, 'r' },
+ { "recurse:", 0, 0, 'R' },
+ { "recursive:", 0, 0, 'R' },
{ "quiet", 0, 0, 'q' },
{ "sameline", 0, 0, '1' },
{ "size", 0, 0, 'S' },
{ "symlinks", 0, 0, 's' },
{ "hardlinks", 0, 0, 'H' },
+ { "relink", 0, 0, 'l' },
{ "noempty", 0, 0, 'n' },
+ { "nohidden", 0, 0, 'A' },
{ "delete", 0, 0, 'd' },
{ "version", 0, 0, 'v' },
{ "help", 0, 0, 'h' },
+ { "noprompt", 0, 0, 'N' },
+ { "immediate", 0, 0, 'I'},
+ { "summarize", 0, 0, 'm'},
+ { "summary", 0, 0, 'm' },
+ { "permissions", 0, 0, 'p' },
+ { "order", 1, 0, 'o' },
+ { "reverse", 0, 0, 'i' },
{ 0, 0, 0, 0 }
};
+#define GETOPT getopt_long
+#else
+#define GETOPT getopt
+#endif
program_name = argv[0];
- while ((opt = getopt_long(argc, argv, "frq1SsHndvh", long_options, NULL)) != EOF) {
+ oldargv = cloneargs(argc, argv);
+
+ while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:i"
+#ifndef OMIT_GETOPT_LONG
+ , long_options, NULL
+#endif
+ )) != EOF) {
switch (opt) {
case 'f':
SETFLAG(flags, F_OMITFIRST);
@@ -800,6 +1112,9 @@ int main(int argc, char **argv) {
case 'r':
SETFLAG(flags, F_RECURSE);
break;
+ case 'R':
+ SETFLAG(flags, F_RECURSEAFTER);
+ break;
case 'q':
SETFLAG(flags, F_HIDEPROGRESS);
break;
@@ -818,6 +1133,9 @@ int main(int argc, char **argv) {
case 'n':
SETFLAG(flags, F_EXCLUDEEMPTY);
break;
+ case 'A':
+ SETFLAG(flags, F_EXCLUDEHIDDEN);
+ break;
case 'd':
SETFLAG(flags, F_DELETEFILES);
break;
@@ -827,8 +1145,34 @@ int main(int argc, char **argv) {
case 'h':
help_text();
exit(1);
+ case 'N':
+ SETFLAG(flags, F_NOPROMPT);
+ break;
+ case 'I':
+ SETFLAG(flags, F_IMMEDIATE);
+ break;
+ case 'm':
+ SETFLAG(flags, F_SUMMARIZEMATCHES);
+ break;
+ case 'p':
+ SETFLAG(flags, F_PERMISSIONS);
+ break;
+ case 'o':
+ if (!strcasecmp("name", optarg)) {
+ ordertype = ORDER_NAME;
+ } else if (!strcasecmp("time", optarg)) {
+ ordertype = ORDER_TIME;
+ } else {
+ errormsg("invalid value for --order: '%s'\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'i':
+ SETFLAG(flags, F_REVERSE);
+ break;
+
default:
- fprintf(stderr, "Try `fdupes --help' for more information\n");
+ fprintf(stderr, "Try `fdupes --help' for more information.\n");
exit(1);
}
}
@@ -838,19 +1182,51 @@ int main(int argc, char **argv) {
exit(1);
}
- for (x = optind; x < argc; x++) filecount += grokdir(argv[x], &files);
+ if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
+ errormsg("options --recurse and --recurse: are not compatible\n");
+ exit(1);
+ }
+
+ if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
+ errormsg("options --summarize and --delete are not compatible\n");
+ exit(1);
+ }
+
+ if (ISFLAG(flags, F_RECURSEAFTER)) {
+ firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
+
+ if (firstrecurse == argc)
+ firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind);
+
+ if (firstrecurse == argc) {
+ errormsg("-R option must be isolated from other options\n");
+ exit(1);
+ }
+
+ /* F_RECURSE is not set for directories before --recurse: */
+ for (x = optind; x < firstrecurse; x++)
+ filecount += grokdir(argv[x], &files);
+
+ /* Set F_RECURSE for directories after --recurse: */
+ SETFLAG(flags, F_RECURSE);
- if (!files) exit(0);
+ for (x = firstrecurse; x < argc; x++)
+ filecount += grokdir(argv[x], &files);
+ } else {
+ for (x = optind; x < argc; x++)
+ filecount += grokdir(argv[x], &files);
+ }
+
+ if (!files) {
+ if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
+ exit(0);
+ }
curfile = files;
while (curfile) {
if (!checktree)
-#ifndef EXPERIMENTAL_RBTREE
registerfile(&checktree, curfile);
-#else
- registerfile(&checktree, NULL, TREE_ROOT, curfile);
-#endif
else
match = checkmatch(&checktree, checktree, curfile);
@@ -860,18 +1236,21 @@ int main(int argc, char **argv) {
curfile = curfile->next;
continue;
}
-
- file2 = fopen(match->d_name, "rb");
+
+ file2 = fopen((*match)->d_name, "rb");
if (!file2) {
fclose(file1);
curfile = curfile->next;
continue;
}
-
+
if (confirmmatch(file1, file2)) {
- match->hasdupes = 1;
- curfile->duplicates = match->duplicates;
- match->duplicates = curfile;
+ if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_IMMEDIATE))
+ deletesuccessor(match, curfile,
+ (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename );
+ else
+ registerpair(match, curfile,
+ (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename );
}
fclose(file1);
@@ -889,16 +1268,47 @@ int main(int argc, char **argv) {
if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
- if (ISFLAG(flags, F_DELETEFILES)) autodelete(files);
- else printmatches(files);
+ if (ISFLAG(flags, F_DELETEFILES))
+ {
+ if (ISFLAG(flags, F_NOPROMPT))
+ {
+ deletefiles(files, 0, 0);
+ }
+ else
+ {
+ if (freopen("/dev/tty", "r", stdin) == 0)
+ {
+ errormsg("could not open terminal for input\n");
+ exit(1);
+ }
+
+ deletefiles(files, 1, stdin);
+ }
+ }
+
+ else
+
+ if (ISFLAG(flags, F_SUMMARIZEMATCHES))
+ summarizematches(files);
+
+ else
+
+ printmatches(files);
while (files) {
curfile = files->next;
free(files->d_name);
+ free(files->crcsignature);
+ free(files->crcpartial);
free(files);
files = curfile;
}
-
+
+ for (x = 0; x < argc; x++)
+ free(oldargv[x]);
+
+ free(oldargv);
+
purgetree(checktree);
return 0;