Browse Source

* split simdb_search by two functions, sharing the same code:

* simdb_search_byid(db, search, num)
  * simdb_search_file(db, search, path)
* move pointer to found matches inside simdb_search_t
* fix search without limit (possible OOM)
master
Alex 'AdUser' Z 8 years ago
parent
commit
05fbfde344
  1. 110
      src/database.c
  2. 15
      src/simdb-query.c
  3. 40
      src/simdb.h

110
src/database.c

@ -323,46 +323,42 @@ simdb_record_ratio(simdb_urec_t *r) {
} }
int int
simdb_search(simdb_t * const db, int num, simdb_search(simdb_t *db, simdb_search_t *search, simdb_urec_t *sample) {
simdb_search_t * const search, simdb_match_t *matches;
simdb_match_t **matches)
{
simdb_match_t match; simdb_match_t match;
simdb_urec_t *data = NULL; simdb_urec_t *rec, *data = NULL;
simdb_urec_t *rec, sample;
const int blksize = 4096; const int blksize = 4096;
uint64_t found = 0;
float ratio_s = 0.0; /* source */ float ratio_s = 0.0; /* source */
float ratio_t = 0.0; /* tested */ float ratio_t = 0.0; /* tested */
int ret = 0; int ret = 0, found = 0, capacity = 16;
assert(db != NULL); assert(db != NULL);
assert(search != NULL); assert(search != NULL);
assert(matches != NULL);
assert(search->maxdiff_ratio >= 0.0 && search->maxdiff_ratio <= 1.0);
assert(search->maxdiff_bitmap >= 0.0 && search->maxdiff_bitmap <= 1.0);
memset(&match, 0x0, sizeof(simdb_match_t));
if ((ret = simdb_read(db, num, 1, &rec)) < 1) if (search->maxdiff_ratio < 0.0 && search->maxdiff_ratio > 1.0)
return ret; return SIMDB_ERR_USAGE;
if (search->maxdiff_bitmap < 0.0 && search->maxdiff_bitmap > 1.0)
return SIMDB_ERR_USAGE;
memcpy(&sample, rec, sizeof(sample)); memset(&match, 0x0, sizeof(simdb_match_t));
FREE(rec);
if (search->limit == 0) if (search->limit == 0)
search->limit = -1; /* unsigned -> max */ search->limit = INT_MAX;
if (search->maxdiff_ratio > 0.0) if (search->maxdiff_ratio > 0.0)
ratio_s = simdb_record_ratio(&sample); ratio_s = simdb_record_ratio(sample);
if ((*matches = calloc(search->limit, sizeof(simdb_match_t))) == NULL) if ((matches = calloc(capacity, sizeof(simdb_match_t))) == NULL)
return SIMDB_ERR_OOM; return SIMDB_ERR_OOM;
for (num = 1; ; num += blksize) { for (int num = 1; ; num += blksize) {
ret = simdb_read(db, num, blksize, &data); ret = simdb_read(db, num, blksize, &data);
if (ret < 0) if (ret == 0)
return ret; break; /* end of records */
if (ret < 0) {
FREE(matches);
return ret; /* error */
}
rec = data; rec = data;
for (int i = 0; i < ret; i++, rec++) { for (int i = 0; i < ret; i++, rec++) {
if (!rec->used) if (!rec->used)
@ -381,15 +377,26 @@ simdb_search(simdb_t * const db, int num,
} else { } else {
/* either source or target ratio not set, can't compare, skip test */ /* either source or target ratio not set, can't compare, skip test */
} }
/* - compare bitmap - more expensive */ /* - compare bitmap - more expensive */
match.diff_bitmap = simdb_bitmap_compare(rec->bitmap, sample.bitmap) / SIMDB_BITMAP_BITS; match.diff_bitmap = simdb_bitmap_compare(rec->bitmap, sample->bitmap) / SIMDB_BITMAP_BITS;
if (match.diff_bitmap > search->maxdiff_bitmap) if (match.diff_bitmap > search->maxdiff_bitmap)
continue; continue;
/* whoa! a match found */
/* create match record */ /* allocate more memory for results array if needed */
if (found == capacity) {
simdb_match_t *tmp = NULL;
capacity *= 2;
if ((tmp = realloc(matches, capacity)) == NULL) {
/* fuck! */
FREE(matches);
FREE(data);
return SIMDB_ERR_OOM;
}
matches = tmp; /* successfully relocated */
}
/* copy match to results array */
match.num = num + i; match.num = num + i;
memcpy(&(*matches)[found], &match, sizeof(simdb_match_t)); memcpy(&matches[found], &match, sizeof(simdb_match_t));
found++; found++;
if (found >= search->limit) if (found >= search->limit)
break; break;
@ -399,9 +406,56 @@ simdb_search(simdb_t * const db, int num,
break; break;
} }
if (found) {
search->found = found;
search->matches = matches;
} else {
FREE(matches);
}
return found; return found;
} }
int
simdb_search_byid(simdb_t *db, simdb_search_t *search, int num) {
simdb_urec_t *sample;
int ret = 0;
assert(db != NULL);
assert(search != NULL);
if (num <= 0)
return SIMDB_ERR_USAGE;
if ((ret = simdb_read(db, num, 1, &sample)) < 1)
return ret;
ret = simdb_search(db, search, sample);
FREE(sample);
return ret;
}
int
simdb_search_file(simdb_t *db, simdb_search_t *search, const char *path) {
simdb_urec_t *sample = NULL;
int ret = 0;
assert(db != NULL);
assert(search != NULL);
if (path == NULL)
return SIMDB_ERR_USAGE;
if ((sample = simdb_record_create(path)) == NULL)
return SIMDB_ERR_SAMPLER;
ret = simdb_search(db, search, sample);
FREE(sample);
return ret;
}
int int
simdb_usage_map(simdb_t * const db, char ** const map) { simdb_usage_map(simdb_t * const db, char ** const map) {
const int blksize = 4096; const int blksize = 4096;

15
src/simdb-query.c

@ -44,26 +44,27 @@ void usage(int exitcode) {
int search_similar(simdb_t *db, int num, float maxdiff) { int search_similar(simdb_t *db, int num, float maxdiff) {
int ret = 0, i = 0; int ret = 0, i = 0;
simdb_match_t *matches = NULL;
simdb_search_t search; simdb_search_t search;
memset(&search, 0x0, sizeof(simdb_search_t)); memset(&search, 0x0, sizeof(simdb_search_t));
search.maxdiff_ratio = 0.2; /* 20% */ search.maxdiff_ratio = 0.2; /* 20% */
search.maxdiff_bitmap = maxdiff; search.maxdiff_bitmap = maxdiff;
if ((ret = simdb_search(db, num, &search, &matches)) < 0) { if ((ret = simdb_search_byid(db, &search, num)) < 0) {
fprintf(stderr, "%s\n", simdb_error(ret)); fprintf(stderr, "%s\n", simdb_error(ret));
return 1; return 1;
} }
for (i = 0; i < ret; i++) { for (i = 0; i < search.found; i++) {
printf("%llu -- %.1f (bitmap), %.1f (ratio)\n", printf("%llu -- %.1f (bitmap), %.1f (ratio)\n",
matches[i].num, search.matches[i].num,
matches[i].diff_bitmap * 100, search.matches[i].diff_bitmap * 100,
matches[i].diff_ratio * 100); search.matches[i].diff_ratio * 100);
} }
FREE(matches); if (search.found > 0)
FREE(search.matches);
return 0; return 0;
} }

40
src/simdb.h

@ -48,16 +48,6 @@
/** opaque database handler */ /** opaque database handler */
typedef struct _simdb_t simdb_t; typedef struct _simdb_t simdb_t;
/**
* search parameters
* maxdiff_* fields should have value from 0.0 to 1.0 (0% - 100%)
*/
typedef struct {
uint8_t limit; /**< max results */
float maxdiff_bitmap; /**< max difference of luma bitmaps */
float maxdiff_ratio; /**< max difference of ratios, default - 7% */
} simdb_search_t;
/** /**
* search matches * search matches
*/ */
@ -67,6 +57,18 @@ typedef struct {
float diff_bitmap; /**< difference of bitmap */ float diff_bitmap; /**< difference of bitmap */
} simdb_match_t; } simdb_match_t;
/**
* search parameters
* maxdiff_* fields should have value from 0.0 to 1.0 (0% - 100%)
*/
typedef struct {
float maxdiff_bitmap; /**< max difference of luma bitmaps */
float maxdiff_ratio; /**< max difference of ratios, default - 7% */
int limit; /**< max results */
int found; /**< count of found results */
simdb_match_t *matches;
} simdb_search_t;
/** /**
* @brief Creates empty database at given path * @brief Creates empty database at given path
* @param path Path to database * @param path Path to database
@ -99,7 +101,7 @@ void simdb_close(simdb_t *db);
const char * simdb_error(int code); const char * simdb_error(int code);
/** /**
* @brief Search compare given record in database to other images * @brief Compare given record in database to other records
* @param db Database handle * @param db Database handle
* @param num Record sample number * @param num Record sample number
* @param search Search parameters * @param search Search parameters
@ -108,9 +110,19 @@ const char * simdb_error(int code);
* @retval 0 if nothing found * @retval 0 if nothing found
* @retval <0 on error * @retval <0 on error
*/ */
int simdb_search(simdb_t * const db, int num, int simdb_search_byid(simdb_t *db, simdb_search_t *search, int num);
simdb_search_t * const search,
simdb_match_t ** matches); /**
* @brief Compare given file against other records in database
* @param db Database handle
* @param file Path to file to compare against database
* @param search Search parameters
* @param matches Pointer to storage for found matches (allocated)
* @retval >0 if found some matches
* @retval 0 if nothing found
* @retval <0 on error
*/
int simdb_search_file(simdb_t *db, simdb_search_t *search, const char *file);
/** /**
* @brief Checks is record with given number is used * @brief Checks is record with given number is used

Loading…
Cancel
Save