From 05fbfde34432dc131e7fc3df4101c38ba69c6561 Mon Sep 17 00:00:00 2001 From: Alex 'AdUser' Z Date: Tue, 10 Jan 2017 22:58:44 +1000 Subject: [PATCH] * split simdb_search by two functions, sharing the same code: * simdb_search_byid(db, search, num) * simdb_search_file(db, search, path) * move pointer to found matches inside simdb_search_t * fix search without limit (possible OOM) --- src/database.c | 110 ++++++++++++++++++++++++++++++++++------------ src/simdb-query.c | 15 ++++--- src/simdb.h | 40 +++++++++++------ 3 files changed, 116 insertions(+), 49 deletions(-) diff --git a/src/database.c b/src/database.c index dbc0c6c..22e36da 100644 --- a/src/database.c +++ b/src/database.c @@ -323,46 +323,42 @@ simdb_record_ratio(simdb_urec_t *r) { } int -simdb_search(simdb_t * const db, int num, - simdb_search_t * const search, - simdb_match_t **matches) -{ +simdb_search(simdb_t *db, simdb_search_t *search, simdb_urec_t *sample) { + simdb_match_t *matches; simdb_match_t match; - simdb_urec_t *data = NULL; - simdb_urec_t *rec, sample; + simdb_urec_t *rec, *data = NULL; const int blksize = 4096; - uint64_t found = 0; float ratio_s = 0.0; /* source */ float ratio_t = 0.0; /* tested */ - int ret = 0; + int ret = 0, found = 0, capacity = 16; assert(db != NULL); assert(search != NULL); - assert(matches != NULL); - assert(search->maxdiff_ratio >= 0.0 && search->maxdiff_ratio <= 1.0); - assert(search->maxdiff_bitmap >= 0.0 && search->maxdiff_bitmap <= 1.0); - - memset(&match, 0x0, sizeof(simdb_match_t)); - if ((ret = simdb_read(db, num, 1, &rec)) < 1) - return ret; + if (search->maxdiff_ratio < 0.0 && search->maxdiff_ratio > 1.0) + return SIMDB_ERR_USAGE; + if (search->maxdiff_bitmap < 0.0 && search->maxdiff_bitmap > 1.0) + return SIMDB_ERR_USAGE; - memcpy(&sample, rec, sizeof(sample)); - FREE(rec); + memset(&match, 0x0, sizeof(simdb_match_t)); if (search->limit == 0) - search->limit = -1; /* unsigned -> max */ + search->limit = INT_MAX; if (search->maxdiff_ratio > 0.0) - ratio_s = simdb_record_ratio(&sample); + ratio_s = simdb_record_ratio(sample); - if ((*matches = calloc(search->limit, sizeof(simdb_match_t))) == NULL) + if ((matches = calloc(capacity, sizeof(simdb_match_t))) == NULL) return SIMDB_ERR_OOM; - for (num = 1; ; num += blksize) { + for (int num = 1; ; num += blksize) { ret = simdb_read(db, num, blksize, &data); - if (ret < 0) - return ret; + if (ret == 0) + break; /* end of records */ + if (ret < 0) { + FREE(matches); + return ret; /* error */ + } rec = data; for (int i = 0; i < ret; i++, rec++) { if (!rec->used) @@ -381,15 +377,26 @@ simdb_search(simdb_t * const db, int num, } else { /* either source or target ratio not set, can't compare, skip test */ } - /* - compare bitmap - more expensive */ - match.diff_bitmap = simdb_bitmap_compare(rec->bitmap, sample.bitmap) / SIMDB_BITMAP_BITS; + match.diff_bitmap = simdb_bitmap_compare(rec->bitmap, sample->bitmap) / SIMDB_BITMAP_BITS; if (match.diff_bitmap > search->maxdiff_bitmap) continue; - - /* create match record */ + /* whoa! a match found */ + /* allocate more memory for results array if needed */ + if (found == capacity) { + simdb_match_t *tmp = NULL; + capacity *= 2; + if ((tmp = realloc(matches, capacity)) == NULL) { + /* fuck! */ + FREE(matches); + FREE(data); + return SIMDB_ERR_OOM; + } + matches = tmp; /* successfully relocated */ + } + /* copy match to results array */ match.num = num + i; - memcpy(&(*matches)[found], &match, sizeof(simdb_match_t)); + memcpy(&matches[found], &match, sizeof(simdb_match_t)); found++; if (found >= search->limit) break; @@ -399,9 +406,56 @@ simdb_search(simdb_t * const db, int num, break; } + if (found) { + search->found = found; + search->matches = matches; + } else { + FREE(matches); + } + return found; } +int +simdb_search_byid(simdb_t *db, simdb_search_t *search, int num) { + simdb_urec_t *sample; + int ret = 0; + + assert(db != NULL); + assert(search != NULL); + + if (num <= 0) + return SIMDB_ERR_USAGE; + + if ((ret = simdb_read(db, num, 1, &sample)) < 1) + return ret; + + ret = simdb_search(db, search, sample); + FREE(sample); + + return ret; +} + +int +simdb_search_file(simdb_t *db, simdb_search_t *search, const char *path) { + simdb_urec_t *sample = NULL; + int ret = 0; + + assert(db != NULL); + assert(search != NULL); + + if (path == NULL) + return SIMDB_ERR_USAGE; + + if ((sample = simdb_record_create(path)) == NULL) + return SIMDB_ERR_SAMPLER; + + ret = simdb_search(db, search, sample); + FREE(sample); + + return ret; +} + int simdb_usage_map(simdb_t * const db, char ** const map) { const int blksize = 4096; diff --git a/src/simdb-query.c b/src/simdb-query.c index 9d1610c..44644b5 100644 --- a/src/simdb-query.c +++ b/src/simdb-query.c @@ -44,26 +44,27 @@ void usage(int exitcode) { int search_similar(simdb_t *db, int num, float maxdiff) { int ret = 0, i = 0; - simdb_match_t *matches = NULL; simdb_search_t search; memset(&search, 0x0, sizeof(simdb_search_t)); + search.maxdiff_ratio = 0.2; /* 20% */ search.maxdiff_bitmap = maxdiff; - if ((ret = simdb_search(db, num, &search, &matches)) < 0) { + if ((ret = simdb_search_byid(db, &search, num)) < 0) { fprintf(stderr, "%s\n", simdb_error(ret)); return 1; } - for (i = 0; i < ret; i++) { + for (i = 0; i < search.found; i++) { printf("%llu -- %.1f (bitmap), %.1f (ratio)\n", - matches[i].num, - matches[i].diff_bitmap * 100, - matches[i].diff_ratio * 100); + search.matches[i].num, + search.matches[i].diff_bitmap * 100, + search.matches[i].diff_ratio * 100); } - FREE(matches); + if (search.found > 0) + FREE(search.matches); return 0; } diff --git a/src/simdb.h b/src/simdb.h index 792f32c..e6d6b7a 100644 --- a/src/simdb.h +++ b/src/simdb.h @@ -48,16 +48,6 @@ /** opaque database handler */ typedef struct _simdb_t simdb_t; -/** - * search parameters - * maxdiff_* fields should have value from 0.0 to 1.0 (0% - 100%) - */ -typedef struct { - uint8_t limit; /**< max results */ - float maxdiff_bitmap; /**< max difference of luma bitmaps */ - float maxdiff_ratio; /**< max difference of ratios, default - 7% */ -} simdb_search_t; - /** * search matches */ @@ -67,6 +57,18 @@ typedef struct { float diff_bitmap; /**< difference of bitmap */ } simdb_match_t; +/** + * search parameters + * maxdiff_* fields should have value from 0.0 to 1.0 (0% - 100%) + */ +typedef struct { + float maxdiff_bitmap; /**< max difference of luma bitmaps */ + float maxdiff_ratio; /**< max difference of ratios, default - 7% */ + int limit; /**< max results */ + int found; /**< count of found results */ + simdb_match_t *matches; +} simdb_search_t; + /** * @brief Creates empty database at given path * @param path Path to database @@ -99,7 +101,7 @@ void simdb_close(simdb_t *db); const char * simdb_error(int code); /** - * @brief Search compare given record in database to other images + * @brief Compare given record in database to other records * @param db Database handle * @param num Record sample number * @param search Search parameters @@ -108,9 +110,19 @@ const char * simdb_error(int code); * @retval 0 if nothing found * @retval <0 on error */ -int simdb_search(simdb_t * const db, int num, - simdb_search_t * const search, - simdb_match_t ** matches); +int simdb_search_byid(simdb_t *db, simdb_search_t *search, int num); + +/** + * @brief Compare given file against other records in database + * @param db Database handle + * @param file Path to file to compare against database + * @param search Search parameters + * @param matches Pointer to storage for found matches (allocated) + * @retval >0 if found some matches + * @retval 0 if nothing found + * @retval <0 on error + */ +int simdb_search_file(simdb_t *db, simdb_search_t *search, const char *file); /** * @brief Checks is record with given number is used