diff options
Diffstat (limited to 'fdupes.c')
-rw-r--r-- | fdupes.c | 948 |
1 files changed, 679 insertions, 269 deletions
@@ -1,4 +1,4 @@ -/* FDUPES Copyright (c) 1999 Adrian Lopez +/* FDUPES Copyright (c) 1999-2002 Adrian Lopez Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files @@ -26,26 +26,39 @@ #include <dirent.h> #include <unistd.h> #include <stdlib.h> +#ifndef OMIT_GETOPT_LONG #include <getopt.h> +#endif #include <string.h> #include <errno.h> +#include <libgen.h> -#ifndef EXTERNAL_MD5 #include "md5/md5.h" -#endif #define ISFLAG(a,b) ((a & b) == b) #define SETFLAG(a,b) (a |= b) -#define F_RECURSE 0x001 -#define F_HIDEPROGRESS 0x002 -#define F_DSAMELINE 0x004 -#define F_FOLLOWLINKS 0x008 -#define F_DELETEFILES 0x010 -#define F_EXCLUDEEMPTY 0x020 -#define F_CONSIDERHARDLINKS 0x040 -#define F_SHOWSIZE 0x080 -#define F_OMITFIRST 0x100 +#define F_RECURSE 0x0001 +#define F_HIDEPROGRESS 0x0002 +#define F_DSAMELINE 0x0004 +#define F_FOLLOWLINKS 0x0008 +#define F_DELETEFILES 0x0010 +#define F_EXCLUDEEMPTY 0x0020 +#define F_CONSIDERHARDLINKS 0x0040 +#define F_SHOWSIZE 0x0080 +#define F_OMITFIRST 0x0100 +#define F_RECURSEAFTER 0x0200 +#define F_NOPROMPT 0x0400 +#define F_SUMMARIZEMATCHES 0x0800 +#define F_EXCLUDEHIDDEN 0x1000 +#define F_PERMISSIONS 0x2000 +#define F_REVERSE 0x4000 +#define F_IMMEDIATE 0x8000 + +typedef enum { + ORDER_TIME = 0, + ORDER_NAME +} ordertype_t; char *program_name; @@ -55,11 +68,36 @@ unsigned long flags = 0; #define INPUT_SIZE 256 +#define PARTIAL_MD5_SIZE 4096 + +#define MD5_DIGEST_LENGTH 16 + +/* + +TODO: Partial sums (for working with very large files). + +typedef struct _signature +{ + md5_state_t state; + md5_byte_t digest[16]; +} signature_t; + +typedef struct _signatures +{ + int num_signatures; + signature_t *signatures; +} signatures_t; + +*/ + typedef struct _file { char *d_name; off_t size; - char *crcsignature; + md5_byte_t *crcpartial; + md5_byte_t *crcsignature; + dev_t device; ino_t inode; + time_t mtime; int hasdupes; /* true only if file is first on duplicate chain */ struct _file *duplicates; struct _file *next; @@ -67,19 +105,10 @@ typedef struct _file { typedef struct _filetree { file_t *file; -#ifdef EXPERIMENTAL_RBTREE - unsigned char color; - struct _filetree *parent; -#endif struct _filetree *left; struct _filetree *right; } filetree_t; -#ifdef EXPERIMENTAL_RBTREE -#define COLOR_RED 0 -#define COLOR_BLACK 1 -#endif - void errormsg(char *message, ...) { va_list ap; @@ -130,6 +159,14 @@ off_t filesize(char *filename) { return s.st_size; } +dev_t getdevice(char *filename) { + struct stat s; + + if (stat(filename, &s) != 0) return 0; + + return s.st_dev; +} + ino_t getinode(char *filename) { struct stat s; @@ -138,6 +175,70 @@ ino_t getinode(char *filename) { return s.st_ino; } +time_t getmtime(char *filename) { + struct stat s; + + if (stat(filename, &s) != 0) return 0; + + return s.st_mtime; +} + +char **cloneargs(int argc, char **argv) +{ + int x; + char **args; + + args = (char **) malloc(sizeof(char*) * argc); + if (args == NULL) { + errormsg("out of memory!\n"); + exit(1); + } + + for (x = 0; x < argc; x++) { + args[x] = (char*) malloc(strlen(argv[x]) + 1); + if (args[x] == NULL) { + free(args); + errormsg("out of memory!\n"); + exit(1); + } + + strcpy(args[x], argv[x]); + } + + return args; +} + +int findarg(char *arg, int start, int argc, char **argv) +{ + int x; + + for (x = start; x < argc; x++) + if (strcmp(argv[x], arg) == 0) + return x; + + return x; +} + +/* Find the first non-option argument after specified option. */ +int nonoptafter(char *option, int argc, char **oldargv, + char **newargv, int optind) +{ + int x; + int targetind; + int testind; + int startat = 1; + + targetind = findarg(option, 1, argc, oldargv); + + for (x = optind; x < argc; x++) { + testind = findarg(newargv[x], startat, argc, oldargv); + if (testind > targetind) return x; + else startat = testind; + } + + return x; +} + int grokdir(char *dir, file_t **filelistp) { DIR *cd; @@ -149,6 +250,7 @@ int grokdir(char *dir, file_t **filelistp) struct stat linfo; static int progress = 0; static char indicator[] = "-\\|/"; + char *fullname, *name; cd = opendir(dir); @@ -172,8 +274,10 @@ int grokdir(char *dir, file_t **filelistp) exit(1); } else newfile->next = *filelistp; + newfile->device = 0; newfile->inode = 0; newfile->crcsignature = NULL; + newfile->crcpartial = NULL; newfile->duplicates = NULL; newfile->hasdupes = 0; @@ -192,6 +296,17 @@ int grokdir(char *dir, file_t **filelistp) strcat(newfile->d_name, "/"); strcat(newfile->d_name, dirinfo->d_name); + if (ISFLAG(flags, F_EXCLUDEHIDDEN)) { + fullname = strdup(newfile->d_name); + name = basename(fullname); + if (name[0] == '.' && strcmp(name, ".") && strcmp(name, "..") ) { + free(newfile->d_name); + free(newfile); + continue; + } + free(fullname); + } + if (filesize(newfile->d_name) == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) { free(newfile->d_name); free(newfile); @@ -232,25 +347,22 @@ int grokdir(char *dir, file_t **filelistp) return filecount; } -#ifndef EXTERNAL_MD5 - -/* If EXTERNAL_MD5 is not defined, use L. Peter Deutsch's MD5 library. - */ -char *getcrcsignature(char *filename) +md5_byte_t *getcrcsignatureuntil(char *filename, off_t max_read) { - int x; off_t fsize; off_t toread; md5_state_t state; - md5_byte_t digest[16]; + static md5_byte_t digest[MD5_DIGEST_LENGTH]; static md5_byte_t chunk[CHUNK_SIZE]; - static char signature[16*2 + 1]; - char *sigp; FILE *file; md5_init(&state); + fsize = filesize(filename); + + if (max_read != 0 && fsize > max_read) + fsize = max_read; file = fopen(filename, "rb"); if (file == NULL) { @@ -259,9 +371,10 @@ char *getcrcsignature(char *filename) } while (fsize > 0) { - toread = (fsize % CHUNK_SIZE) ? (fsize % CHUNK_SIZE) : CHUNK_SIZE; + toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize; if (fread(chunk, toread, 1, file) != 1) { errormsg("error reading from file %s\n", filename); + fclose(file); return NULL; } md5_append(&state, chunk, toread); @@ -270,61 +383,44 @@ char *getcrcsignature(char *filename) md5_finish(&state, digest); - sigp = signature; - - for (x = 0; x < 16; x++) { - sprintf(sigp, "%02x", digest[x]); - sigp = strchr(sigp, '\0'); - } - fclose(file); - return signature; + return digest; } -#endif /* [#ifndef EXTERNAL_MD5] */ - -#ifdef EXTERNAL_MD5 - -/* If EXTERNAL_MD5 is defined, use md5sum program to calculate signatures. - */ -char *getcrcsignature(char *filename) +md5_byte_t *getcrcsignature(char *filename) { - static char signature[256]; - char *command; - char *separator; - FILE *result; + return getcrcsignatureuntil(filename, 0); +} - command = (char*) malloc(strlen(filename)+strlen(EXTERNAL_MD5)+2); - if (command == NULL) { - errormsg("out of memory\n"); - exit(1); - } +md5_byte_t *getcrcpartialsignature(char *filename) +{ + return getcrcsignatureuntil(filename, PARTIAL_MD5_SIZE); +} - sprintf(command, "%s %s", EXTERNAL_MD5, filename); +int md5cmp(const md5_byte_t *a, const md5_byte_t *b) +{ + int x; - result = popen(command, "r"); - if (result == NULL) { - errormsg("error invoking %s\n", EXTERNAL_MD5); - exit(1); + for (x = 0; x < MD5_DIGEST_LENGTH; ++x) + { + if (a[x] < b[x]) + return -1; + else if (a[x] > b[x]) + return 1; } - - free(command); - if (fgets(signature, 256, result) == NULL) { - errormsg("error generating signature for %s\n", filename); - return NULL; - } - separator = strchr(signature, ' '); - if (separator) *separator = '\0'; + return 0; +} - pclose(result); +void md5copy(md5_byte_t *to, const md5_byte_t *from) +{ + int x; - return signature; + for (x = 0; x < MD5_DIGEST_LENGTH; ++x) + to[x] = from[x]; } -#endif /* [#ifdef EXTERNAL_MD5] */ - void purgetree(filetree_t *checktree) { if (checktree->left != NULL) purgetree(checktree->left); @@ -334,140 +430,17 @@ void purgetree(filetree_t *checktree) free(checktree); } -#ifdef EXPERIMENTAL_RBTREE -/* Use a red-black tree structure to store file information. - */ - -void rotate_left(filetree_t **root, filetree_t *node) -{ - filetree_t *subject; - - subject = node->right; - node->right = subject->left; - - if (subject->left != NULL) subject->left->parent = node; - subject->parent = node->parent; - - if (node->parent == NULL) { - *root = subject; - } else { - if (node == node->parent->left) - node->parent->left = subject; - else - node->parent->right = subject; - } - - subject->left = node; - node->parent = subject; -} - -void rotate_right(filetree_t **root, filetree_t *node) -{ - filetree_t *subject; - - subject = node->left; - node->left = subject->right; - - if (subject->right != NULL) subject->right->parent = node; - subject->parent = node->parent; - - if (node->parent == NULL) { - *root = subject; - } else { - if (node == node->parent->left) - node->parent->left = subject; - else - node->parent->right = subject; - } - - subject->right = node; - node->parent = subject; -} - -#define TREE_LEFT -1 -#define TREE_RIGHT 1 -#define TREE_ROOT 0 - -void registerfile(filetree_t **root, filetree_t *parent, int loc, file_t *file) +void getfilestats(file_t *file) { - filetree_t *node; - filetree_t *uncle; - file->size = filesize(file->d_name); file->inode = getinode(file->d_name); - - node = (filetree_t*) malloc(sizeof(filetree_t)); - if (node == NULL) { - errormsg("out of memory!\n"); - exit(1); - } - - node->file = file; - node->left = NULL; - node->right = NULL; - node->parent = parent; - node->color = COLOR_RED; - - if (loc == TREE_ROOT) - *root = node; - else if (loc == TREE_LEFT) - parent->left = node; - else - parent->right = node; - - while (node != *root && node->parent->color == COLOR_RED) { - if (node->parent->parent == NULL) return; - - if (node->parent == node->parent->parent->left) { - uncle = node->parent->parent->right; - if (uncle == NULL) return; - - if (uncle->color == COLOR_RED) { - node->parent->color = COLOR_BLACK; - uncle->color = COLOR_BLACK; - node->parent->parent->color = COLOR_RED; - node = node->parent->parent; - } else { - if (node == node->parent->right) { - node = node->parent; - rotate_left(root, node); - } - node->parent->color = COLOR_BLACK; - node->parent->parent->color = COLOR_RED; - rotate_right(root, node->parent->parent); - } - } else { - uncle = node->parent->parent->left; - if (uncle == NULL) return; - - if (uncle->color == COLOR_RED) { - node->parent->color = COLOR_BLACK; - uncle->color = COLOR_BLACK; - node->parent->parent->color = COLOR_RED; - node = node->parent->parent; - } else { - if (node == node->parent->right) { - node = node->parent; - rotate_left(root, node); - } - node->parent->color = COLOR_BLACK; - node->parent->parent->color = COLOR_RED; - rotate_right(root, node->parent->parent); - } - } - } - - (*root)->color = COLOR_BLACK; + file->device = getdevice(file->d_name); + file->mtime = getmtime(file->d_name); } -#endif /* [#ifdef EXPERIMENTAL_RBTREE] */ - -#ifndef EXPERIMENTAL_RBTREE - int registerfile(filetree_t **branch, file_t *file) { - file->size = filesize(file->d_name); - file->inode = getinode(file->d_name); + getfilestats(file); *branch = (filetree_t*) malloc(sizeof(filetree_t)); if (*branch == NULL) { @@ -482,20 +455,61 @@ int registerfile(filetree_t **branch, file_t *file) return 1; } -#endif /* [#ifndef EXPERIMENTAL_RBTREE] */ +int same_permissions(char* name1, char* name2) +{ + struct stat s1, s2; + + if (stat(name1, &s1) != 0) return -1; + if (stat(name2, &s2) != 0) return -1; + + return (s1.st_mode == s2.st_mode && + s1.st_uid == s2.st_uid && + s1.st_gid == s2.st_gid); +} + +int is_hardlink(filetree_t *checktree, file_t *file) +{ + file_t *dupe; + ino_t inode; + dev_t device; + + inode = getinode(file->d_name); + device = getdevice(file->d_name); + + if ((inode == checktree->file->inode) && + (device == checktree->file->device)) + return 1; + + if (checktree->file->hasdupes) + { + dupe = checktree->file->duplicates; + + do { + if ((inode == dupe->inode) && + (device == dupe->device)) + return 1; + + dupe = dupe->duplicates; + } while (dupe != NULL); + } + + return 0; +} -file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file) +file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file) { int cmpresult; - char *crcsignature; + md5_byte_t *crcsignature; off_t fsize; - /* If inodes are equal one of the files is a hard link, which - is usually not accidental. We don't want to flag them as - duplicates, unless the user specifies otherwise. */ + /* If device and inode fields are equal one of the files is a + hard link to the other or the files have been listed twice + unintentionally. We don't want to flag these files as + duplicates unless the user specifies otherwise. + */ - if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && getinode(file->d_name) == - checktree->file->inode) return NULL; + if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && is_hardlink(checktree, file)) + return NULL; fsize = filesize(file->d_name); @@ -503,57 +517,97 @@ file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file) cmpresult = -1; else if (fsize > checktree->file->size) cmpresult = 1; + else + if (ISFLAG(flags, F_PERMISSIONS) && + !same_permissions(file->d_name, checktree->file->d_name)) + cmpresult = -1; else { - if (checktree->file->crcsignature == NULL) { - crcsignature = getcrcsignature(checktree->file->d_name); - if (crcsignature == NULL) return NULL; + if (checktree->file->crcpartial == NULL) { + crcsignature = getcrcpartialsignature(checktree->file->d_name); + if (crcsignature == NULL) { + errormsg ("cannot read file %s\n", checktree->file->d_name); + return NULL; + } - checktree->file->crcsignature = (char*) malloc(strlen(crcsignature)+1); - if (checktree->file->crcsignature == NULL) { + checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t)); + if (checktree->file->crcpartial == NULL) { errormsg("out of memory\n"); exit(1); } - strcpy(checktree->file->crcsignature, crcsignature); + md5copy(checktree->file->crcpartial, crcsignature); } - if (file->crcsignature == NULL) { - crcsignature = getcrcsignature(file->d_name); - if (crcsignature == NULL) return NULL; + if (file->crcpartial == NULL) { + crcsignature = getcrcpartialsignature(file->d_name); + if (crcsignature == NULL) { + errormsg ("cannot read file %s\n", file->d_name); + return NULL; + } - file->crcsignature = (char*) malloc(strlen(crcsignature)+1); - if (file->crcsignature == NULL) { + file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t)); + if (file->crcpartial == NULL) { errormsg("out of memory\n"); exit(1); } - strcpy(file->crcsignature, crcsignature); + md5copy(file->crcpartial, crcsignature); } - cmpresult = strcmp(file->crcsignature, checktree->file->crcsignature); + cmpresult = md5cmp(file->crcpartial, checktree->file->crcpartial); + /*if (cmpresult != 0) errormsg(" on %s vs %s\n", file->d_name, checktree->file->d_name);*/ + + if (cmpresult == 0) { + if (checktree->file->crcsignature == NULL) { + crcsignature = getcrcsignature(checktree->file->d_name); + if (crcsignature == NULL) return NULL; + + checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t)); + if (checktree->file->crcsignature == NULL) { + errormsg("out of memory\n"); + exit(1); + } + md5copy(checktree->file->crcsignature, crcsignature); + } + + if (file->crcsignature == NULL) { + crcsignature = getcrcsignature(file->d_name); + if (crcsignature == NULL) return NULL; + + file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t)); + if (file->crcsignature == NULL) { + errormsg("out of memory\n"); + exit(1); + } + md5copy(file->crcsignature, crcsignature); + } + + cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature); + /*if (cmpresult != 0) errormsg("P on %s vs %s\n", + file->d_name, checktree->file->d_name); + else errormsg("P F on %s vs %s\n", file->d_name, + checktree->file->d_name); + printf("%s matches %s\n", file->d_name, checktree->file->d_name);*/ + } } if (cmpresult < 0) { if (checktree->left != NULL) { return checkmatch(root, checktree->left, file); } else { -#ifndef EXPERIMENTAL_RBTREE registerfile(&(checktree->left), file); -#else - registerfile(root, checktree, TREE_LEFT, file); -#endif return NULL; } } else if (cmpresult > 0) { if (checktree->right != NULL) { return checkmatch(root, checktree->right, file); } else { -#ifndef EXPERIMENTAL_RBTREE registerfile(&(checktree->right), file); -#else - registerfile(root, checktree, TREE_RIGHT, file); -#endif return NULL; } - } else return checktree->file; + } else + { + getfilestats(file); + return &checktree->file; + } } /* Do a bit-for-bit comparison in case two different files produce the @@ -561,8 +615,8 @@ file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file) int confirmmatch(FILE *file1, FILE *file2) { - unsigned char c1; - unsigned char c2; + unsigned char c1[CHUNK_SIZE]; + unsigned char c2[CHUNK_SIZE]; size_t r1; size_t r2; @@ -570,17 +624,55 @@ int confirmmatch(FILE *file1, FILE *file2) fseek(file2, 0, SEEK_SET); do { - r1 = fread(&c1, sizeof(c1), 1, file1); - r2 = fread(&c2, sizeof(c2), 1, file2); + r1 = fread(c1, sizeof(unsigned char), sizeof(c1), file1); + r2 = fread(c2, sizeof(unsigned char), sizeof(c2), file2); - if (c1 != c2) return 0; /* file contents are different */ - } while (r1 && r2); + if (r1 != r2) return 0; /* file lengths are different */ + if (memcmp (c1, c2, r1)) return 0; /* file contents are different */ + } while (r2); - if (r1 != r2) return 0; /* file lengths are different */ - return 1; } +void summarizematches(file_t *files) +{ + int numsets = 0; + double numbytes = 0.0; + int numfiles = 0; + file_t *tmpfile; + + while (files != NULL) + { + if (files->hasdupes) + { + numsets++; + + tmpfile = files->duplicates; + while (tmpfile != NULL) + { + numfiles++; + numbytes += files->size; + tmpfile = tmpfile->duplicates; + } + } + + files = files->next; + } + + if (numsets == 0) + printf("No duplicates found.\n\n"); + else + { + if (numbytes < 1024.0) + printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", numfiles, numsets, numbytes); + else if (numbytes <= (1000.0 * 1000.0)) + printf("%d duplicate files (in %d sets), occupying %.1f kilobytes\n\n", numfiles, numsets, numbytes / 1000.0); + else + printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", numfiles, numsets, numbytes / (1000.0 * 1000.0)); + + } +} + void printmatches(file_t *files) { file_t *tmpfile; @@ -588,7 +680,7 @@ void printmatches(file_t *files) while (files != NULL) { if (files->hasdupes) { if (!ISFLAG(flags, F_OMITFIRST)) { - if (ISFLAG(flags, F_SHOWSIZE)) printf("%ld byte%seach:\n", files->size, + if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", (long long int)files->size, (files->size != 1) ? "s " : " "); if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name); printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n'); @@ -607,7 +699,64 @@ void printmatches(file_t *files) } } -void autodelete(file_t *files) +/* +#define REVISE_APPEND "_tmp" +char *revisefilename(char *path, int seq) +{ + int digits; + char *newpath; + char *scratch; + char *dot; + + digits = numdigits(seq); + newpath = malloc(strlen(path) + strlen(REVISE_APPEND) + digits + 1); + if (!newpath) return newpath; + + scratch = malloc(strlen(path) + 1); + if (!scratch) return newpath; + + strcpy(scratch, path); + dot = strrchr(scratch, '.'); + if (dot) + { + *dot = 0; + sprintf(newpath, "%s%s%d.%s", scratch, REVISE_APPEND, seq, dot + 1); + } + + else + { + sprintf(newpath, "%s%s%d", path, REVISE_APPEND, seq); + } + + free(scratch); + + return newpath; +} */ + +int relink(char *oldfile, char *newfile) +{ + dev_t od; + dev_t nd; + ino_t oi; + ino_t ni; + + od = getdevice(oldfile); + oi = getinode(oldfile); + + if (link(oldfile, newfile) != 0) + return 0; + + /* make sure we're working with the right file (the one we created) */ + nd = getdevice(newfile); + ni = getinode(newfile); + + if (nd != od || oi != ni) + return 0; /* file is not what we expected */ + + return 1; +} + +void deletefiles(file_t *files, int prompt, FILE *tty) { int counter; int groups = 0; @@ -661,27 +810,36 @@ void autodelete(file_t *files) counter = 1; dupelist[counter] = files; - printf("[%d] %s\n", counter, files->d_name); + if (prompt) printf("[%d] %s\n", counter, files->d_name); tmpfile = files->duplicates; while (tmpfile) { dupelist[++counter] = tmpfile; - printf("[%d] %s\n", counter, tmpfile->d_name); + if (prompt) printf("[%d] %s\n", counter, tmpfile->d_name); tmpfile = tmpfile->duplicates; } - printf("\n"); + if (prompt) printf("\n"); + + if (!prompt) /* preserve only the first file */ + { + preserve[1] = 1; + for (x = 2; x <= counter; x++) preserve[x] = 0; + } + + else /* prompt for files to preserve */ do { printf("Set %d of %d, preserve files [1 - %d, all]", curgroup, groups, counter); - if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%ld byte%seach)", files->size, + if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", (long long int)files->size, (files->size != 1) ? "s " : " "); printf(": "); fflush(stdout); - fgets(preservestr, INPUT_SIZE, stdin); + if (!fgets(preservestr, INPUT_SIZE, tty)) + preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */ i = strlen(preservestr) - 1; @@ -694,8 +852,11 @@ void autodelete(file_t *files) } preservestr = tstr; - if (!fgets(preservestr + i + 1, INPUT_SIZE, stdin)) - break; /* stop if fgets fails -- possible EOF? */ + if (!fgets(preservestr + i + 1, INPUT_SIZE, tty)) + { + preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */ + break; + } i = strlen(preservestr)-1; } @@ -723,8 +884,12 @@ void autodelete(file_t *files) if (preserve[x]) printf(" [+] %s\n", dupelist[x]->d_name); else { - printf(" [-] %s\n", dupelist[x]->d_name); - remove(dupelist[x]->d_name); + if (remove(dupelist[x]->d_name) == 0) { + printf(" [-] %s\n", dupelist[x]->d_name); + } else { + printf(" [!] %s ", dupelist[x]->d_name); + printf("-- unable to delete file!\n"); + } } } printf("\n"); @@ -738,19 +903,126 @@ void autodelete(file_t *files) free(preservestr); } +int sort_pairs_by_arrival(file_t *f1, file_t *f2) +{ + if (f2->duplicates != 0) + return !ISFLAG(flags, F_REVERSE) ? 1 : -1; + + return !ISFLAG(flags, F_REVERSE) ? -1 : 1; +} + +int sort_pairs_by_mtime(file_t *f1, file_t *f2) +{ + if (f1->mtime < f2->mtime) + return !ISFLAG(flags, F_REVERSE) ? -1 : 1; + else if (f1->mtime > f2->mtime) + return !ISFLAG(flags, F_REVERSE) ? 1 : -1; + + return 0; +} + +int sort_pairs_by_filename(file_t *f1, file_t *f2) +{ + return strcmp(f1->d_name, f2->d_name); +} + +void registerpair(file_t **matchlist, file_t *newmatch, + int (*comparef)(file_t *f1, file_t *f2)) +{ + file_t *traverse; + file_t *back; + + (*matchlist)->hasdupes = 1; + + back = 0; + traverse = *matchlist; + while (traverse) + { + if (comparef(newmatch, traverse) <= 0) + { + newmatch->duplicates = traverse; + + if (back == 0) + { + *matchlist = newmatch; /* update pointer to head of list */ + + newmatch->hasdupes = 1; + traverse->hasdupes = 0; /* flag is only for first file in dupe chain */ + } + else + back->duplicates = newmatch; + + break; + } + else + { + if (traverse->duplicates == 0) + { + traverse->duplicates = newmatch; + + if (back == 0) + traverse->hasdupes = 1; + + break; + } + } + + back = traverse; + traverse = traverse->duplicates; + } +} + +void deletesuccessor(file_t **existing, file_t *duplicate, + int (*comparef)(file_t *f1, file_t *f2)) +{ + file_t *to_keep; + file_t *to_delete; + + if (comparef(duplicate, *existing) >= 0) + { + to_keep = *existing; + to_delete = duplicate; + } + else + { + to_keep = duplicate; + to_delete = *existing; + + *existing = duplicate; + } + + if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " "); + + printf(" [+] %s\n", to_keep->d_name); + if (remove(to_delete->d_name) == 0) { + printf(" [-] %s\n", to_delete->d_name); + } else { + printf(" [!] %s ", to_delete->d_name); + printf("-- unable to delete file!\n"); + } + + printf("\n"); +} + void help_text() { printf("Usage: fdupes [options] DIRECTORY...\n\n"); - printf(" -r --recurse \tinclude files residing in subdirectories\n"); + printf(" -r --recurse \tfor every directory given follow subdirectories\n"); + printf(" \tencountered within\n"); + printf(" -R --recurse: \tfor each directory given after this option follow\n"); + printf(" \tsubdirectories encountered within (note the ':' at\n"); + printf(" \tthe end of the option, manpage for more details)\n"); printf(" -s --symlinks \tfollow symlinks\n"); printf(" -H --hardlinks \tnormally, when two or more files point to the same\n"); printf(" \tdisk area they are treated as non-duplicates; this\n"); printf(" \toption will change this behavior\n"); printf(" -n --noempty \texclude zero-length files from consideration\n"); + printf(" -A --nohidden \texclude hidden files from consideration\n"); printf(" -f --omitfirst \tomit the first file in each set of matches\n"); printf(" -1 --sameline \tlist each set of matches on a single line\n"); printf(" -S --size \tshow size of duplicate files\n"); + printf(" -m --summarize \tsummarize dupe information\n"); printf(" -q --quiet \thide progress indicator\n"); printf(" -d --delete \tprompt user for files to preserve and delete all\n"); printf(" \tothers; important: under particular circumstances,\n"); @@ -758,8 +1030,22 @@ void help_text() printf(" \twith -s or --symlinks, or when specifying a\n"); printf(" \tparticular directory more than once; refer to the\n"); printf(" \tfdupes documentation for additional information\n"); + /*printf(" -l --relink \t(description)\n");*/ + printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n"); + printf(" \teach set of duplicates and delete the rest without\n"); + printf(" \tprompting the user\n"); + printf(" -I --immediate \tdelete duplicates as they are encountered, without\n"); + printf(" \tgrouping into sets; implies --noprompt\n"); + printf(" -p --permissions \tdon't consider files with different owner/group or\n"); + printf(" \tpermission bits as duplicates\n"); + printf(" -o --order=BY \tselect sort order for output, linking and deleting; by\n"); + printf(" \tmtime (BY='time'; default) or filename (BY='name')\n"); + printf(" -i --reverse \treverse order while sorting\n"); printf(" -v --version \tdisplay fdupes version\n"); printf(" -h --help \tdisplay this help message\n\n"); +#ifdef OMIT_GETOPT_LONG + printf("Note: Long options are not supported in this fdupes build.\n\n"); +#endif } int main(int argc, char **argv) { @@ -769,30 +1055,56 @@ int main(int argc, char **argv) { FILE *file2; file_t *files = NULL; file_t *curfile; - file_t *match = NULL; + file_t **match = NULL; filetree_t *checktree = NULL; int filecount = 0; int progress = 0; - + char **oldargv; + int firstrecurse; + ordertype_t ordertype = ORDER_TIME; + +#ifndef OMIT_GETOPT_LONG static struct option long_options[] = { { "omitfirst", 0, 0, 'f' }, { "recurse", 0, 0, 'r' }, + { "recursive", 0, 0, 'r' }, + { "recurse:", 0, 0, 'R' }, + { "recursive:", 0, 0, 'R' }, { "quiet", 0, 0, 'q' }, { "sameline", 0, 0, '1' }, { "size", 0, 0, 'S' }, { "symlinks", 0, 0, 's' }, { "hardlinks", 0, 0, 'H' }, + { "relink", 0, 0, 'l' }, { "noempty", 0, 0, 'n' }, + { "nohidden", 0, 0, 'A' }, { "delete", 0, 0, 'd' }, { "version", 0, 0, 'v' }, { "help", 0, 0, 'h' }, + { "noprompt", 0, 0, 'N' }, + { "immediate", 0, 0, 'I'}, + { "summarize", 0, 0, 'm'}, + { "summary", 0, 0, 'm' }, + { "permissions", 0, 0, 'p' }, + { "order", 1, 0, 'o' }, + { "reverse", 0, 0, 'i' }, { 0, 0, 0, 0 } }; +#define GETOPT getopt_long +#else +#define GETOPT getopt +#endif program_name = argv[0]; - while ((opt = getopt_long(argc, argv, "frq1SsHndvh", long_options, NULL)) != EOF) { + oldargv = cloneargs(argc, argv); + + while ((opt = GETOPT(argc, argv, "frRq1SsHlnAdvhNmpo:i" +#ifndef OMIT_GETOPT_LONG + , long_options, NULL +#endif + )) != EOF) { switch (opt) { case 'f': SETFLAG(flags, F_OMITFIRST); @@ -800,6 +1112,9 @@ int main(int argc, char **argv) { case 'r': SETFLAG(flags, F_RECURSE); break; + case 'R': + SETFLAG(flags, F_RECURSEAFTER); + break; case 'q': SETFLAG(flags, F_HIDEPROGRESS); break; @@ -818,6 +1133,9 @@ int main(int argc, char **argv) { case 'n': SETFLAG(flags, F_EXCLUDEEMPTY); break; + case 'A': + SETFLAG(flags, F_EXCLUDEHIDDEN); + break; case 'd': SETFLAG(flags, F_DELETEFILES); break; @@ -827,8 +1145,34 @@ int main(int argc, char **argv) { case 'h': help_text(); exit(1); + case 'N': + SETFLAG(flags, F_NOPROMPT); + break; + case 'I': + SETFLAG(flags, F_IMMEDIATE); + break; + case 'm': + SETFLAG(flags, F_SUMMARIZEMATCHES); + break; + case 'p': + SETFLAG(flags, F_PERMISSIONS); + break; + case 'o': + if (!strcasecmp("name", optarg)) { + ordertype = ORDER_NAME; + } else if (!strcasecmp("time", optarg)) { + ordertype = ORDER_TIME; + } else { + errormsg("invalid value for --order: '%s'\n", optarg); + exit(1); + } + break; + case 'i': + SETFLAG(flags, F_REVERSE); + break; + default: - fprintf(stderr, "Try `fdupes --help' for more information\n"); + fprintf(stderr, "Try `fdupes --help' for more information.\n"); exit(1); } } @@ -838,19 +1182,51 @@ int main(int argc, char **argv) { exit(1); } - for (x = optind; x < argc; x++) filecount += grokdir(argv[x], &files); + if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) { + errormsg("options --recurse and --recurse: are not compatible\n"); + exit(1); + } + + if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) { + errormsg("options --summarize and --delete are not compatible\n"); + exit(1); + } + + if (ISFLAG(flags, F_RECURSEAFTER)) { + firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind); + + if (firstrecurse == argc) + firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind); + + if (firstrecurse == argc) { + errormsg("-R option must be isolated from other options\n"); + exit(1); + } + + /* F_RECURSE is not set for directories before --recurse: */ + for (x = optind; x < firstrecurse; x++) + filecount += grokdir(argv[x], &files); + + /* Set F_RECURSE for directories after --recurse: */ + SETFLAG(flags, F_RECURSE); - if (!files) exit(0); + for (x = firstrecurse; x < argc; x++) + filecount += grokdir(argv[x], &files); + } else { + for (x = optind; x < argc; x++) + filecount += grokdir(argv[x], &files); + } + + if (!files) { + if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " "); + exit(0); + } curfile = files; while (curfile) { if (!checktree) -#ifndef EXPERIMENTAL_RBTREE registerfile(&checktree, curfile); -#else - registerfile(&checktree, NULL, TREE_ROOT, curfile); -#endif else match = checkmatch(&checktree, checktree, curfile); @@ -860,18 +1236,21 @@ int main(int argc, char **argv) { curfile = curfile->next; continue; } - - file2 = fopen(match->d_name, "rb"); + + file2 = fopen((*match)->d_name, "rb"); if (!file2) { fclose(file1); curfile = curfile->next; continue; } - + if (confirmmatch(file1, file2)) { - match->hasdupes = 1; - curfile->duplicates = match->duplicates; - match->duplicates = curfile; + if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_IMMEDIATE)) + deletesuccessor(match, curfile, + (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename ); + else + registerpair(match, curfile, + (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename ); } fclose(file1); @@ -889,16 +1268,47 @@ int main(int argc, char **argv) { if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " "); - if (ISFLAG(flags, F_DELETEFILES)) autodelete(files); - else printmatches(files); + if (ISFLAG(flags, F_DELETEFILES)) + { + if (ISFLAG(flags, F_NOPROMPT)) + { + deletefiles(files, 0, 0); + } + else + { + if (freopen("/dev/tty", "r", stdin) == 0) + { + errormsg("could not open terminal for input\n"); + exit(1); + } + + deletefiles(files, 1, stdin); + } + } + + else + + if (ISFLAG(flags, F_SUMMARIZEMATCHES)) + summarizematches(files); + + else + + printmatches(files); while (files) { curfile = files->next; free(files->d_name); + free(files->crcsignature); + free(files->crcpartial); free(files); files = curfile; } - + + for (x = 0; x < argc; x++) + free(oldargv[x]); + + free(oldargv); + purgetree(checktree); return 0; |