/* * POSIX multiple file binary compare utility. * Reports identical files, with option to hard link them together. * Files to compare must be specified on the command line. * * Copyright (C) 2005-2009 George Gesslein II * Web site: www.mathomatic.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* * When the executable is named "dups", output is in the UNIX style * of listing only the file names of the duplicates. Otherwise, output * statistics and everything. * * To compile: * * cc -Wall -O mycmp.c -o mycmp * * Usage: mycmp [ options ] file1 file2 ... * Options: * -h Make hard links between duplicate files on the same device. * -p Check permissions, if perms differ, files differ. * -q Quiet mode (no output). * -s Suppress warning messages. * * Compare two or more binary files. * All specified files are compared with each other. * Identical files are reported. */ /* * Changes: * 11/06/05 - fixed an inefficiency and bug with the -h option. Now checks that files * are on the same device before trying to hard link. * 11/06/05 - messages localized with _(""). * 11/06/05 - don't try to hard link if already linked. Fixes a major bug. * 11/13/05 - Added check for a programming error and exit value is set. * 1/28/06 - Added -q option. * 5/27/06 - Improved an error message and cleanup. * 5/28/06 - Added assert() and -p option for safer hard linking. * 11/22/08 - Speed up by using memcmp(3). */ #include #include #include #include #include #include #include #include #include #define false 0 #define true 1 #if I18N /* internationalization */ #include #define _(str) gettext(str) #else #define _(str) str #endif int cmp_two(char *filename1, char *filename2); void usage(void); #define BUFSIZE 16384 static char buf[2][BUFSIZE]; size_t size1, size2; char *prog_name; /* name of this program */ int dups_flag; /* terse mode */ int silent; /* set this true to suppress warning messages */ int hflag; /* make hard links */ int qflag; /* quiet flag */ int pflag; /* check permissions flag */ int main(int argc, char *argv[]) { extern char *optarg; extern int optind; int i, j; int matched; FILE *fp1, *fp2; struct stat sb, sb2; long total, compared, matches, linked, links; /* file counts */ long long space_savings; long long *filesize; /* array of file sizes */ int ev = 0; /* exit value */ prog_name = strdup(basename(argv[0])); if (strcmp(prog_name, "dups") == 0) { dups_flag = true; } while ((i = getopt(argc, argv, "pqsh")) >= 0) { switch (i) { case 'p': pflag = true; break; case 'q': qflag = true; case 's': silent = true; break; case 'h': hflag = true; break; default: usage(); } } if ((argc - optind) < 2) usage(); if (!dups_flag && !hflag && (argc - optind) == 2) { exit(cmp_two(argv[optind], argv[optind+1])); } filesize = (long long *) calloc(argc, sizeof(long long)); if (filesize == NULL) { fprintf(stderr, _("%s: Not enough memory.\n"), prog_name); exit(2); } matches = 0; linked = 0; links = 0; space_savings = 0; if (hflag) { for (i = optind; i < argc; i++) { if (stat(argv[i], &sb)) { if (!silent) fprintf(stderr, _("%s: Cannot stat \"%s\". File skipped.\n"), prog_name, argv[i]); filesize[i] = -1; } else if (!(sb.st_mode & S_IFREG)) { filesize[i] = -2; } else { filesize[i] = sb.st_size; } } } else { /* first, mark all the linked files, so only one copy is compared */ for (i = optind; i < argc; i++) { if (filesize[i] < 0) continue; if (stat(argv[i], &sb)) { if (!silent) fprintf(stderr, _("%s: Cannot stat \"%s\". File skipped.\n"), prog_name, argv[i]); filesize[i] = -1; continue; } if (!(sb.st_mode & S_IFREG)) { if (!silent) { if ((sb.st_mode & S_IFDIR) != 0) fprintf(stderr, _("%s: \"%s\" is a directory. Not compared.\n"), prog_name, argv[i]); else fprintf(stderr, _("%s: \"%s\" is not a regular file. Not compared.\n"), prog_name, argv[i]); } filesize[i] = -2; continue; } if (sb.st_size == 0) { if (!silent) fprintf(stderr, _("%s: File \"%s\" is zero length. Not compared.\n"), prog_name, argv[i]); } filesize[i] = sb.st_size; for (j = i + 1; j < argc; j++) { if (stat(argv[j], &sb2)) continue; if (sb.st_dev == sb2.st_dev && sb.st_ino == sb2.st_ino) { links++; filesize[j] = -1; /* don't compare links with each other */ } } } } /* do the comparisons on same size files */ for (i = optind + 1; i < argc; i++) { if (filesize[i] <= 0) continue; fp2 = NULL; for (j = optind; j < i; j++) { if (filesize[j] <= 0 || filesize[j] != filesize[i]) continue; if (stat(argv[i], &sb) || (fp2 == NULL && (fp2 = fopen(argv[i], "r")) == NULL)) { if (!silent) fprintf(stderr, _("%s: Cannot open \"%s\". File skipped.\n"), prog_name, argv[i]); filesize[i] = -1; break; } rewind(fp2); if (stat(argv[j], &sb2) || (fp1 = fopen(argv[j], "r")) == NULL) { if (!silent) fprintf(stderr, _("%s: Cannot open \"%s\". File skipped.\n"), prog_name, argv[j]); filesize[j] = -1; continue; } matched = (sb.st_dev == sb2.st_dev && sb.st_ino == sb2.st_ino); if (!matched && (!pflag || (sb.st_mode == sb2.st_mode && sb.st_uid == sb2.st_uid && sb.st_gid == sb2.st_gid))) { #if true for (;;) { size1 = fread(buf[0], 1, BUFSIZE, fp1); size2 = fread(buf[1], 1, BUFSIZE, fp2); if (size1 != size2 || size1 < 0) break; if (size1 == 0) { matched = true; break; } if (memcmp(buf[0], buf[1], size1)) break; } #else /* old, slow code: */ int c1, c2; for (;;) { c1 = getc(fp1); c2 = getc(fp2); if (c1 < 0 || c2 < 0) { if (c1 < 0 && c2 < 0) { matched = true; } break; } if (c1 != c2) { break; } } #endif } fclose(fp1); if (matched) { matches++; if (!hflag) { assert(sb.st_dev != sb2.st_dev || sb.st_ino != sb2.st_ino); if (!qflag) { if (dups_flag) { printf("%s %s\n", argv[j], argv[i]); } else { printf(_("\"%s\" is identical to \"%s\" (%lld bytes).\n"), argv[j], argv[i], filesize[i]); } } space_savings += filesize[i]; } else if (sb.st_dev == sb2.st_dev && sb.st_ino != sb2.st_ino) { /* hard link the two identical files */ fclose(fp2); fp2 = NULL; if (unlink(argv[i]) || link(argv[j], argv[i])) { fprintf(stderr, _("%s: Cannot make hard link between \"%s\" and \"%s\"!\n"), prog_name, argv[j], argv[i]); exit(2); } else { linked++; } } j = i; } } if (fp2) fclose(fp2); } for (i = optind, compared = 0, total = 0; i < argc; i++) { if (filesize[i] > 0) { compared++; } if (filesize[i] >= -1) { total++; } } if (!dups_flag && !qflag) { if (compared < 2) { printf(_("No files compared.\n")); } else if (matches == 0 && links == 0) { printf(_("All %ld files are different.\n"), compared); } else { if (hflag) { printf(_("%ld duplicates linked.\n"), linked); } else { printf(_("\n%ld total files.\n"), total); printf(_("%ld files compared.\n"), compared); printf(_("%ld duplicates and %ld links.\n"), matches, links); if (space_savings) { printf(_("Space savings by eliminating duplicates: %lld bytes.\n"), space_savings); } } } } exit(ev); } /* * Compare two binary files. * Return 0 if identical, 1 if different, 2 if error. */ int cmp_two(char *filename1, char *filename2) { register int c1, c2; register FILE *fp1, *fp2; long long mismatches; struct stat sb, sb2; int ev = 0; long long nc; int same_flag; if (stat(filename1, &sb) || (fp1 = fopen(filename1, "r")) == NULL) { fprintf(stderr, _("%s: Cannot open file \"%s\".\n"), prog_name, filename1); return(2); } if (stat(filename2, &sb2) || (fp2 = fopen(filename2, "r")) == NULL) { fprintf(stderr, _("%s: Cannot open file \"%s\".\n"), prog_name, filename2); return(2); } same_flag = (sb.st_dev == sb2.st_dev && sb.st_ino == sb2.st_ino); if (same_flag && !silent) { fprintf(stderr, _("Warning: Specified filenames point to the same file!\n")); } mismatches = 0; nc = 0; for (;;) { c1 = getc(fp1); c2 = getc(fp2); if (c1 < 0 || c2 < 0) { if (c1 >= 0 || c2 >= 0) { if (!qflag) { printf(_("Files are different sizes.\n")); } ev = 1; } break; } if (c1 != c2) { mismatches++; ev = 1; } nc++; } if (!qflag) { printf(_("%lld bytes compared.\n"), nc); if (mismatches) { printf(_("%lld bytes differ.\n"), mismatches); } if (ev == 0) { printf(_("Files are identical.\n")); } else { printf(_("\"%s\" (%lld bytes) and\n\"%s\" (%lld bytes) differ.\n"), filename1, (long long) sb.st_size, filename2, (long long) sb2.st_size); } } if (ev == 0) { if (pflag && (sb.st_mode != sb2.st_mode || sb.st_uid != sb2.st_uid || sb.st_gid != sb2.st_gid)) { if (!qflag) { printf(_("Permissions differ.\n")); } ev = 1; } } return(ev); } void usage(void) { printf("mycmp version 1.1\n"); printf("Usage: %s [ options ] file1 file2 ...\n", prog_name); printf("\nOptions:\n"); printf(" -h Make hard links between duplicate files on the same device.\n"); printf(" -p Check permissions, if perms differ, files differ.\n"); printf(" -q Quiet mode (no output).\n"); printf(" -s Suppress warning messages.\n"); printf("\nCompare two or more binary files.\n"); printf("All specified files are compared with each other.\n"); printf("Identical files are reported.\n"); printf("Does not compare directories or zero length files.\n"); exit(2); }