/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor Boston, MA 02110-1301, USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "filelist.h" #include "group.h" /* opts */ enum msglevel { normal = 0, verbose, debug } msglevel = normal; const char *root = NULL; /* vars */ magic_t mcookie; filelist_t *flist; /* funcs */ static void usage(int exitcode) { fprintf(stderr, "Usage: simdb-fdupes [path]\n" " -h This help\n" " -d Max difference in images (in percents: 0-50)\n" " -v Verbose messages\n" ); exit(exitcode); } static void log_msg(enum msglevel l, const char *fmt, ...) { va_list ap; if (l > msglevel) return; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } static int progressbar(const char *prefix, int width, int num, int total, int lastpct) { char bar[width + 1]; int pct = 0, fill = 0; if (width <= 0 || total <= 0) return 0; pct = 100 * ((float) num / total); if (pct == lastpct) return lastpct; if (pct == 100) { lastpct = pct; } fill = (int) ((pct * width) / 100); memset(bar, ' ', sizeof(char) * width); memset(bar, '=', sizeof(char) * fill); bar[fill] = '>'; bar[width] = '\0'; log_msg(verbose, "\r* %s: % 3d%% [%s]", prefix, pct, bar); return pct; } static int ftw_handler(const char *path, const struct stat *sb, int typeflag) { const char *mime = NULL; (void)(sb); /* unused */ if (typeflag == FTW_D) return 0; if (typeflag == FTW_DNR) { fprintf(stderr, "! can't read: %s\n", path); return 0; } if (typeflag == FTW_NS) { fprintf(stderr, "! can't stat: %s\n", path); return 0; } mime = magic_file(mcookie, path); if (mime == NULL) { fprintf(stderr, "! can't detect mimetype of file %s\n", path); return 0; } if (strncmp(mime, "image/", 6) != 0) return 0; /* not an image */ log_msg(debug, "~ found image file: %s\n", path); if (!filelist_append(flist, path)) { fprintf(stderr, "! can't add file to queue: out of memory"); return -1; /* stop ftw() */ } return 0; } static void make_samples(filelist_t *list, simdb_t *simdb) { const char *path; int ret = 0, pct = 0; assert(list != NULL); assert(simdb != NULL); for (int num = 1; num <= list->size; num++) { path = filelist_get(list, num); pct = progressbar("making samples", 50, num, list->size, pct); ret = simdb_record_add(simdb, num, path, 0); if (ret < 0) { fprintf(stderr, "\r! can't add file #%d '%s' -- %s\n", num, path, simdb_error(ret)); simdb_record_del(simdb, num); filelist_del(flist, num); } } log_msg(verbose, "\n"); /* force newline after progress messages */ return; } static group_t * make_groups(filelist_t *list, simdb_t *simdb, int maxdiff) { simdb_search_t search; group_t *groups = NULL, *group, **map = NULL; int pct = 0, inum, gnum = 1; /* next group number */ assert(list != NULL); assert(simdb != NULL); if ((map = calloc(flist->size + 1, sizeof(group_t *))) == NULL) { fprintf(stderr, "! can't allocate groups map: out-of-memory\n"); return NULL; } simdb_search_init(&search); search.d_ratio = maxdiff / (float) 100; search.d_bitmap = maxdiff / (float) 100; for (int num = 1; num < list->size; num++) { if (!filelist_get(flist, num)) continue; /* file was not sampled */ if (map[num]) continue; /* this image already in some group */ pct = progressbar("grouping images", 50, num, list->size, pct); simdb_search_byid(simdb, &search, num); if (search.found <= 0) continue; /* nothing similar found in database */ group = NULL; /* try to find existing group */ for (int i = 0; i < search.found; i++) { inum = search.matches[i].num; if (map[inum] == NULL) continue; /* found some group */ group = map[inum]; break; } /* create new group if not found any */ if (!group) { if ((group = group_create(gnum++, 0)) == NULL) { fprintf(stderr, "\n! can't create new image group: out-of-memory\n"); break; } group->next = groups; groups = group; } group_append(group, num); /* place in map pointer to group for each found image */ for (int i = 0; i < search.found; i++) { inum = search.matches[i].num; group_append(group, inum); map[inum] = group; } } log_msg(verbose, "\n"); /* force newline after progress messages */ simdb_search_free(&search); free(map); return groups; } static void print_groups(filelist_t *list, group_t *groups) { int inum = 0; assert(list != NULL); for (group_t *group = groups; group != NULL; group = group->next) { for (int i = 0; i < group->size; i++) { inum = group->ids[i]; puts(filelist_get(list, inum)); } puts(""); /* force newline after group */ } } static void free_groups(group_t *groups) { group_t *group = groups, *next = NULL; while (group != NULL) { next = group->next; group_free(group); free(group); group = next; } } int main(int argc, char **argv) { simdb_t *simdb = NULL; group_t *groups = NULL; char tempdb[] = "/tmp/simdb-XXXXXX"; char path[PATH_MAX] = ""; int opt = -1, ret = 0, maxdiff = 7; if (argc <= 1) usage(EXIT_FAILURE); while ((opt = getopt(argc, argv, "hd:v")) != -1) { switch (opt) { case 'v': if (msglevel < debug) msglevel++; break; case 'd': maxdiff = atoi(optarg); break; case 'h': usage(EXIT_SUCCESS); break; default : usage(EXIT_FAILURE); break; } } if (optind < argc) { root = argv[optind]; } else { usage(EXIT_FAILURE); } if (maxdiff < 0 || maxdiff > 50) { fprintf(stderr, "! '-d' option should be in range [0, 50]\n"); return EXIT_FAILURE; } /* resolve root path */ if (realpath(root, path) == NULL) { perror("Can't resolve given path"); exit(EXIT_FAILURE); } /* load magic database */ if ((mcookie = magic_open(MAGIC_MIME_TYPE)) == NULL) { perror("can't open magic database"); return EXIT_FAILURE; } if (magic_load(mcookie, NULL) < 0) { fprintf(stderr, "! can't load magic database: %s\n", magic_error(mcookie)); return EXIT_FAILURE; } /* make images filelist */ if ((flist = filelist_create(1000)) == NULL) { fprintf(stderr, "! can't create filelist struct: out-of-memory?\n"); return EXIT_FAILURE; } log_msg(verbose, "* scanning for images\n"); if (ftw(path, &ftw_handler, 20) < 0) { fprintf(stderr, "! ftw() error, aborting\n"); return EXIT_FAILURE; } else { log_msg(verbose, "* found %d images after initial scan\n", flist->size); } magic_close(mcookie); mkstemp(tempdb); unlink(tempdb); if (!simdb_create(tempdb)) { fprintf(stderr, "! can't create temporary simdb\n"); return EXIT_FAILURE; } if ((simdb = simdb_open(tempdb, SIMDB_FLAG_WRITE | SIMDB_FLAG_LOCKNB, &ret)) == NULL) { fprintf(stderr, "! can't open temporary simdb: %s\n", simdb_error(ret)); return EXIT_FAILURE; } make_samples(flist, simdb); groups = make_groups(flist, simdb, maxdiff); if (groups) { log_msg(verbose, "* found image groups:\n"); print_groups(flist, groups); free_groups(groups); } simdb_close(simdb); unlink(tempdb); filelist_free(flist); free(flist); return EXIT_SUCCESS; }