Alex 'AdUser' Z
8 years ago
1 changed files with 322 additions and 0 deletions
@ -0,0 +1,322 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License as published by |
||||
* the Free Software Foundation; either version 2 of the License, or |
||||
* (at your option) any later version. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU Library General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor Boston, MA 02110-1301, USA |
||||
*/ |
||||
|
||||
#include <assert.h> |
||||
#include <errno.h> |
||||
#include <limits.h> |
||||
#include <stdarg.h> |
||||
#include <stdbool.h> |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
#include <unistd.h> |
||||
|
||||
#include <ftw.h> |
||||
#include <getopt.h> |
||||
|
||||
#include <magic.h> |
||||
#include <simdb.h> |
||||
|
||||
#include "filelist.h" |
||||
#include "group.h" |
||||
|
||||
/* opts */ |
||||
enum msglevel { normal = 0, verbose, debug } msglevel = normal; |
||||
const char *root = NULL; |
||||
|
||||
/* vars */ |
||||
magic_t mcookie; |
||||
filelist_t *flist; |
||||
|
||||
/* funcs */ |
||||
static void |
||||
usage(int exitcode) { |
||||
fprintf(stderr, |
||||
"Usage: simdb-fdupes [path]\n" |
||||
" -h This help\n" |
||||
" -v Verbose messages\n" |
||||
); |
||||
exit(exitcode); |
||||
} |
||||
|
||||
static void |
||||
log_msg(enum msglevel l, const char *fmt, ...) { |
||||
va_list ap; |
||||
|
||||
if (l > msglevel) |
||||
return; |
||||
|
||||
va_start(ap, fmt); |
||||
vfprintf(stderr, fmt, ap); |
||||
va_end(ap); |
||||
} |
||||
|
||||
static int |
||||
progressbar(const char *prefix, int width, int num, int total, int lastpct) { |
||||
char bar[width + 1]; |
||||
int pct = 0, fill = 0; |
||||
|
||||
if (width <= 0 || total <= 0) |
||||
return 0; |
||||
|
||||
pct = 100 * ((float) num / total); |
||||
if (pct == lastpct) |
||||
return lastpct; |
||||
|
||||
if (pct == 100) { |
||||
lastpct = pct; |
||||
} |
||||
|
||||
fill = (int) ((pct * width) / 100); |
||||
memset(bar, ' ', sizeof(char) * width); |
||||
memset(bar, '=', sizeof(char) * fill); |
||||
bar[fill] = '>'; |
||||
bar[width] = '\0'; |
||||
log_msg(verbose, "\r* %s: % 3d%% [%s]", prefix, pct, bar); |
||||
|
||||
return pct; |
||||
} |
||||
|
||||
static int |
||||
ftw_handler(const char *path, const struct stat *sb, int typeflag) { |
||||
const char *mime = NULL; |
||||
(void)(sb); /* unused */ |
||||
if (typeflag == FTW_D) |
||||
return 0; |
||||
if (typeflag == FTW_DNR) { |
||||
fprintf(stderr, "! can't read: %s\n", path); |
||||
return 0; |
||||
} |
||||
if (typeflag == FTW_NS) { |
||||
fprintf(stderr, "! can't stat: %s\n", path); |
||||
return 0; |
||||
} |
||||
mime = magic_file(mcookie, path); |
||||
if (mime == NULL) { |
||||
fprintf(stderr, "! can't detect mimetype of file %s\n", path); |
||||
return 0; |
||||
} |
||||
if (strncmp(mime, "image/", 6) != 0) |
||||
return 0; /* not an image */ |
||||
log_msg(debug, "~ found image file: %s\n", path); |
||||
if (!filelist_append(flist, path)) { |
||||
fprintf(stderr, "! can't add file to queue: out of memory"); |
||||
return -1; /* stop ftw() */ |
||||
} |
||||
return 0; |
||||
} |
||||
|
||||
static void |
||||
make_samples(filelist_t *list, simdb_t *simdb) { |
||||
const char *path; |
||||
int ret = 0, pct = 0; |
||||
|
||||
assert(list != NULL); |
||||
assert(simdb != NULL); |
||||
|
||||
for (int num = 1; num <= list->size; num++) { |
||||
path = filelist_get(list, num); |
||||
pct = progressbar("making samples", 50, num, list->size, pct); |
||||
ret = simdb_record_add(simdb, num, path, 0); |
||||
if (ret < 0) { |
||||
fprintf(stderr, "\r! can't add file #%d '%s' -- %s\n", num, path, simdb_error(ret)); |
||||
simdb_record_del(simdb, num); |
||||
filelist_del(flist, num); |
||||
} |
||||
} |
||||
log_msg(verbose, "\n"); /* force newline after progress messages */ |
||||
|
||||
return; |
||||
} |
||||
|
||||
static group_t * |
||||
make_groups(filelist_t *list, simdb_t *simdb) { |
||||
simdb_search_t search; |
||||
group_t *groups = NULL, *group, **map = NULL; |
||||
int pct = 0, inum, gnum = 1; /* next group number */ |
||||
|
||||
assert(list != NULL); |
||||
assert(simdb != NULL); |
||||
|
||||
if ((map = calloc(flist->size + 1, sizeof(group_t *))) == NULL) { |
||||
fprintf(stderr, "! can't allocate groups map: out-of-memory\n"); |
||||
return NULL; |
||||
} |
||||
|
||||
simdb_search_init(&search); |
||||
for (int num = 1; num < list->size; num++) { |
||||
if (!filelist_get(flist, num)) |
||||
continue; /* file was not sampled */ |
||||
if (map[num]) |
||||
continue; /* this image already in some group */ |
||||
pct = progressbar("grouping images", 50, num, list->size, pct); |
||||
simdb_search_byid(simdb, &search, num); |
||||
if (search.found <= 0) |
||||
continue; /* nothing similar found in database */ |
||||
group = NULL; |
||||
/* try to find existing group */ |
||||
for (int i = 0; i < search.found; i++) { |
||||
inum = search.matches[i].num; |
||||
if (map[inum] == NULL) |
||||
continue; |
||||
/* found some group */ |
||||
group = map[inum]; |
||||
break; |
||||
} |
||||
/* create new group if not found any */ |
||||
if (!group) { |
||||
if ((group = group_create(gnum++, 0)) == NULL) { |
||||
fprintf(stderr, "\n! can't create new image group: out-of-memory\n"); |
||||
break; |
||||
} |
||||
group->next = groups; |
||||
groups = group; |
||||
} |
||||
group_append(group, num); |
||||
/* place in map pointer to group for each found image */ |
||||
for (int i = 0; i < search.found; i++) { |
||||
inum = search.matches[i].num; |
||||
group_append(group, inum); |
||||
map[inum] = group; |
||||
} |
||||
} |
||||
log_msg(verbose, "\n"); /* force newline after progress messages */ |
||||
|
||||
simdb_search_free(&search); |
||||
free(map); |
||||
|
||||
return groups; |
||||
} |
||||
|
||||
static void |
||||
print_groups(filelist_t *list, group_t *groups) { |
||||
int inum = 0; |
||||
|
||||
assert(list != NULL); |
||||
|
||||
for (group_t *group = groups; group != NULL; group = group->next) { |
||||
for (int i = 0; i < group->size; i++) { |
||||
inum = group->ids[i]; |
||||
puts(filelist_get(list, inum)); |
||||
} |
||||
puts(""); /* force newline after group */ |
||||
} |
||||
} |
||||
|
||||
static void |
||||
free_groups(group_t *groups) { |
||||
group_t *group = groups, *next = NULL; |
||||
|
||||
while (group != NULL) { |
||||
next = group->next; |
||||
group_free(group); |
||||
free(group); |
||||
group = next; |
||||
} |
||||
} |
||||
|
||||
int main(int argc, char **argv) { |
||||
simdb_t *simdb = NULL; |
||||
group_t *groups = NULL; |
||||
char tempdb[] = "/tmp/simdb-XXXXXX"; |
||||
char path[PATH_MAX] = ""; |
||||
int opt = -1, ret = 0; |
||||
|
||||
if (argc <= 1) |
||||
usage(EXIT_FAILURE); |
||||
|
||||
while ((opt = getopt(argc, argv, "hv")) != -1) { |
||||
switch (opt) { |
||||
case 'v': |
||||
if (msglevel < debug) |
||||
msglevel++; |
||||
break; |
||||
case 'h': |
||||
usage(EXIT_SUCCESS); |
||||
break; |
||||
default : |
||||
usage(EXIT_FAILURE); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (optind < argc) { |
||||
root = argv[optind]; |
||||
} else { |
||||
usage(EXIT_FAILURE); |
||||
} |
||||
|
||||
/* resolve root path */ |
||||
if (realpath(root, path) == NULL) { |
||||
perror("Can't resolve given path"); |
||||
exit(EXIT_FAILURE); |
||||
} |
||||
|
||||
/* load magic database */ |
||||
if ((mcookie = magic_open(MAGIC_MIME_TYPE)) == NULL) { |
||||
perror("can't open magic database"); |
||||
return EXIT_FAILURE; |
||||
} |
||||
if (magic_load(mcookie, NULL) < 0) { |
||||
fprintf(stderr, "! can't load magic database: %s\n", magic_error(mcookie)); |
||||
return EXIT_FAILURE; |
||||
} |
||||
|
||||
/* make images filelist */ |
||||
if ((flist = filelist_create(1000)) == NULL) { |
||||
fprintf(stderr, "! can't create filelist struct: out-of-memory?\n"); |
||||
return EXIT_FAILURE; |
||||
} |
||||
log_msg(verbose, "* scanning for images\n"); |
||||
if (ftw(path, &ftw_handler, 20) < 0) { |
||||
fprintf(stderr, "! ftw() error, aborting\n"); |
||||
return EXIT_FAILURE; |
||||
} else { |
||||
log_msg(verbose, "* found %d images after initial scan\n", flist->size); |
||||
} |
||||
magic_close(mcookie); |
||||
|
||||
mkstemp(tempdb); |
||||
unlink(tempdb); |
||||
|
||||
if (!simdb_create(tempdb)) { |
||||
fprintf(stderr, "! can't create temporary simdb\n"); |
||||
return EXIT_FAILURE; |
||||
} |
||||
|
||||
if ((simdb = simdb_open(tempdb, SIMDB_FLAG_WRITE | SIMDB_FLAG_LOCKNB, &ret)) == NULL) { |
||||
fprintf(stderr, "! can't open temporary simdb: %s\n", simdb_error(ret)); |
||||
return EXIT_FAILURE; |
||||
} |
||||
|
||||
make_samples(flist, simdb); |
||||
|
||||
groups = make_groups(flist, simdb); |
||||
|
||||
if (groups) { |
||||
log_msg(verbose, "* found image groups:\n"); |
||||
print_groups(flist, groups); |
||||
free_groups(groups); |
||||
} |
||||
|
||||
simdb_close(simdb); |
||||
unlink(tempdb); |
||||
|
||||
filelist_free(flist); |
||||
free(flist); |
||||
|
||||
return EXIT_SUCCESS; |
||||
} |
Loading…
Reference in new issue