Browse Source

+ main.c

master
Alex 'AdUser' Z 7 years ago
parent
commit
aec57b7730
  1. 322
      src/main.c

322
src/main.c

@ -0,0 +1,322 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor Boston, MA 02110-1301, USA
*/
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ftw.h>
#include <getopt.h>
#include <magic.h>
#include <simdb.h>
#include "filelist.h"
#include "group.h"
/* opts */
enum msglevel { normal = 0, verbose, debug } msglevel = normal;
const char *root = NULL;
/* vars */
magic_t mcookie;
filelist_t *flist;
/* funcs */
static void
usage(int exitcode) {
fprintf(stderr,
"Usage: simdb-fdupes [path]\n"
" -h This help\n"
" -v Verbose messages\n"
);
exit(exitcode);
}
static void
log_msg(enum msglevel l, const char *fmt, ...) {
va_list ap;
if (l > msglevel)
return;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
static int
progressbar(const char *prefix, int width, int num, int total, int lastpct) {
char bar[width + 1];
int pct = 0, fill = 0;
if (width <= 0 || total <= 0)
return 0;
pct = 100 * ((float) num / total);
if (pct == lastpct)
return lastpct;
if (pct == 100) {
lastpct = pct;
}
fill = (int) ((pct * width) / 100);
memset(bar, ' ', sizeof(char) * width);
memset(bar, '=', sizeof(char) * fill);
bar[fill] = '>';
bar[width] = '\0';
log_msg(verbose, "\r* %s: % 3d%% [%s]", prefix, pct, bar);
return pct;
}
static int
ftw_handler(const char *path, const struct stat *sb, int typeflag) {
const char *mime = NULL;
(void)(sb); /* unused */
if (typeflag == FTW_D)
return 0;
if (typeflag == FTW_DNR) {
fprintf(stderr, "! can't read: %s\n", path);
return 0;
}
if (typeflag == FTW_NS) {
fprintf(stderr, "! can't stat: %s\n", path);
return 0;
}
mime = magic_file(mcookie, path);
if (mime == NULL) {
fprintf(stderr, "! can't detect mimetype of file %s\n", path);
return 0;
}
if (strncmp(mime, "image/", 6) != 0)
return 0; /* not an image */
log_msg(debug, "~ found image file: %s\n", path);
if (!filelist_append(flist, path)) {
fprintf(stderr, "! can't add file to queue: out of memory");
return -1; /* stop ftw() */
}
return 0;
}
static void
make_samples(filelist_t *list, simdb_t *simdb) {
const char *path;
int ret = 0, pct = 0;
assert(list != NULL);
assert(simdb != NULL);
for (int num = 1; num <= list->size; num++) {
path = filelist_get(list, num);
pct = progressbar("making samples", 50, num, list->size, pct);
ret = simdb_record_add(simdb, num, path, 0);
if (ret < 0) {
fprintf(stderr, "\r! can't add file #%d '%s' -- %s\n", num, path, simdb_error(ret));
simdb_record_del(simdb, num);
filelist_del(flist, num);
}
}
log_msg(verbose, "\n"); /* force newline after progress messages */
return;
}
static group_t *
make_groups(filelist_t *list, simdb_t *simdb) {
simdb_search_t search;
group_t *groups = NULL, *group, **map = NULL;
int pct = 0, inum, gnum = 1; /* next group number */
assert(list != NULL);
assert(simdb != NULL);
if ((map = calloc(flist->size + 1, sizeof(group_t *))) == NULL) {
fprintf(stderr, "! can't allocate groups map: out-of-memory\n");
return NULL;
}
simdb_search_init(&search);
for (int num = 1; num < list->size; num++) {
if (!filelist_get(flist, num))
continue; /* file was not sampled */
if (map[num])
continue; /* this image already in some group */
pct = progressbar("grouping images", 50, num, list->size, pct);
simdb_search_byid(simdb, &search, num);
if (search.found <= 0)
continue; /* nothing similar found in database */
group = NULL;
/* try to find existing group */
for (int i = 0; i < search.found; i++) {
inum = search.matches[i].num;
if (map[inum] == NULL)
continue;
/* found some group */
group = map[inum];
break;
}
/* create new group if not found any */
if (!group) {
if ((group = group_create(gnum++, 0)) == NULL) {
fprintf(stderr, "\n! can't create new image group: out-of-memory\n");
break;
}
group->next = groups;
groups = group;
}
group_append(group, num);
/* place in map pointer to group for each found image */
for (int i = 0; i < search.found; i++) {
inum = search.matches[i].num;
group_append(group, inum);
map[inum] = group;
}
}
log_msg(verbose, "\n"); /* force newline after progress messages */
simdb_search_free(&search);
free(map);
return groups;
}
static void
print_groups(filelist_t *list, group_t *groups) {
int inum = 0;
assert(list != NULL);
for (group_t *group = groups; group != NULL; group = group->next) {
for (int i = 0; i < group->size; i++) {
inum = group->ids[i];
puts(filelist_get(list, inum));
}
puts(""); /* force newline after group */
}
}
static void
free_groups(group_t *groups) {
group_t *group = groups, *next = NULL;
while (group != NULL) {
next = group->next;
group_free(group);
free(group);
group = next;
}
}
int main(int argc, char **argv) {
simdb_t *simdb = NULL;
group_t *groups = NULL;
char tempdb[] = "/tmp/simdb-XXXXXX";
char path[PATH_MAX] = "";
int opt = -1, ret = 0;
if (argc <= 1)
usage(EXIT_FAILURE);
while ((opt = getopt(argc, argv, "hv")) != -1) {
switch (opt) {
case 'v':
if (msglevel < debug)
msglevel++;
break;
case 'h':
usage(EXIT_SUCCESS);
break;
default :
usage(EXIT_FAILURE);
break;
}
}
if (optind < argc) {
root = argv[optind];
} else {
usage(EXIT_FAILURE);
}
/* resolve root path */
if (realpath(root, path) == NULL) {
perror("Can't resolve given path");
exit(EXIT_FAILURE);
}
/* load magic database */
if ((mcookie = magic_open(MAGIC_MIME_TYPE)) == NULL) {
perror("can't open magic database");
return EXIT_FAILURE;
}
if (magic_load(mcookie, NULL) < 0) {
fprintf(stderr, "! can't load magic database: %s\n", magic_error(mcookie));
return EXIT_FAILURE;
}
/* make images filelist */
if ((flist = filelist_create(1000)) == NULL) {
fprintf(stderr, "! can't create filelist struct: out-of-memory?\n");
return EXIT_FAILURE;
}
log_msg(verbose, "* scanning for images\n");
if (ftw(path, &ftw_handler, 20) < 0) {
fprintf(stderr, "! ftw() error, aborting\n");
return EXIT_FAILURE;
} else {
log_msg(verbose, "* found %d images after initial scan\n", flist->size);
}
magic_close(mcookie);
mkstemp(tempdb);
unlink(tempdb);
if (!simdb_create(tempdb)) {
fprintf(stderr, "! can't create temporary simdb\n");
return EXIT_FAILURE;
}
if ((simdb = simdb_open(tempdb, SIMDB_FLAG_WRITE | SIMDB_FLAG_LOCKNB, &ret)) == NULL) {
fprintf(stderr, "! can't open temporary simdb: %s\n", simdb_error(ret));
return EXIT_FAILURE;
}
make_samples(flist, simdb);
groups = make_groups(flist, simdb);
if (groups) {
log_msg(verbose, "* found image groups:\n");
print_groups(flist, groups);
free_groups(groups);
}
simdb_close(simdb);
unlink(tempdb);
filelist_free(flist);
free(flist);
return EXIT_SUCCESS;
}
Loading…
Cancel
Save