libffuzzy  2.1
Fast ssdeep comparison library
 All Data Structures Files Functions Variables Macros
ffuzzy.h
Go to the documentation of this file.
1 /*
2 
3  libffuzzy : Fast ssdeep comparison library
4 
5  ffuzzy.h
6  Public API for libffuzzy
7 
8 
9  CREDITS OF ORIGINAL VERSION OF SSDEEP
10 
11  Copyright (C) 2002 Andrew Tridgell <tridge@samba.org>
12  Copyright (C) 2006 ManTech International Corporation
13  Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
14 
15  This program is free software; you can redistribute it and/or modify
16  it under the terms of the GNU General Public License as published by
17  the Free Software Foundation; either version 2 of the License, or
18  (at your option) any later version.
19 
20  This program is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23  GNU General Public License for more details.
24 
25  You should have received a copy of the GNU General Public License
26  along with this program; if not, write to the Free Software
27  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 
29 
30  CREDIT OF MODIFIED PORTIONS
31 
32  Copyright (C) 2014 Tsukasa OI <li@livegrid.org>
33 
34 */
35 #ifndef FFUZZY_FFUZZY_H
36 #define FFUZZY_FFUZZY_H
37 
55 #ifndef __cplusplus
56 #include <stdbool.h>
57 #endif
58 #include <stddef.h>
59 
61 #define FFUZZY_SPAMSUM_LENGTH 64
62 
64 #define FFUZZY_MIN_BLOCKSIZE 3
65 
70 #define FFUZZY_MIN_MATCH 7
71 
72 
73 #ifdef __cplusplus
74 extern "C" {
75 #endif
76 
113 typedef struct
114 {
115  size_t len1, len2;
116  unsigned long block_size;
117  char digest[FFUZZY_SPAMSUM_LENGTH * 2];
118 } ffuzzy_digest;
119 
120 
121 
136 bool ffuzzy_read_digest(ffuzzy_digest *digest, const char *s);
137 
145 int ffuzzy_compare_digest(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
146 
154 int ffuzzy_compare(const char *str1, const char *str2);
155 
179 int ffuzzy_compare_digest_near(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
180 
192 
204 
230 bool ffuzzy_blocksize_is_valid(unsigned long block_size);
231 
247 bool ffuzzy_blocksize_is_natural(unsigned long block_size);
248 
263 bool ffuzzy_blocksize_is_near(unsigned long block_size1, unsigned long block_size2);
264 
283 bool ffuzzy_blocksize_is_far_le(unsigned long block_size1, unsigned long block_size2);
284 
301 
318 bool ffuzzy_digest_is_valid_buffer(const ffuzzy_digest *digest);
319 
344 
351 bool ffuzzy_digest_is_valid(const ffuzzy_digest *digest);
352 
359 bool ffuzzy_digest_is_natural(const ffuzzy_digest *digest);
360 
378 int ffuzzy_digestcmp(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
379 
390 int ffuzzy_digestcmp_blocksize(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
391 
410 
419 bool ffuzzy_pretty_digest(char *buf, size_t buflen, const ffuzzy_digest *digest);
420 
462 typedef struct
463 {
464  size_t len1, len2;
465  unsigned long block_size;
466  char digest[FFUZZY_SPAMSUM_LENGTH * 2];
468 
469 
480 bool ffuzzy_read_udigest(ffuzzy_udigest *udigest, const char *s);
481 
489 
507 
514 bool ffuzzy_udigest_is_valid(const ffuzzy_udigest *udigest);
515 
522 bool ffuzzy_udigest_is_natural(const ffuzzy_udigest *udigest);
523 
541 int ffuzzy_udigestcmp(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2);
542 
554 
573 
582 bool ffuzzy_pretty_udigest(char *buf, size_t buflen, const ffuzzy_udigest *udigest);
583 
584 
592 
600 
626 int ffuzzy_score_cap(int s1len, int s2len, unsigned long block_size);
627 
643 int ffuzzy_score_cap_1(int minslen, unsigned long block_size);
644 
660  const char *s1, size_t s1len,
661  const char *s2, size_t s2len,
662  unsigned long block_size
663 );
664 
669 #ifdef __cplusplus
670 }
671 #endif
672 
673 #endif
int ffuzzy_compare_digest_near_lt(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes assuming second block size is double as first one.
int ffuzzy_digestcmp(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two ffuzzy_digest values.
int ffuzzy_compare_digest(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes and compute similarity score.
bool ffuzzy_blocksize_is_near(unsigned long block_size1, unsigned long block_size2)
Determines whether given block sizes are "near".
int ffuzzy_score_strings(const char *s1, size_t s1len, const char *s2, size_t s2len, unsigned long block_size)
Compute partial similarity score for given two block strings and block size.
void ffuzzy_convert_digest_to_udigest(ffuzzy_udigest *udigest, const ffuzzy_digest *digest)
Convert ffuzzy_digest to ffuzzy_udigest.
#define FFUZZY_SPAMSUM_LENGTH
Maximum length for the digest block.
Definition: ffuzzy.h:61
int ffuzzy_score_cap(int s1len, int s2len, unsigned long block_size)
Retrieve score cap for given block lengths and the block size.
int ffuzzy_digestcmp_blocksize_n(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two ffuzzy_digest values by whether block sizes are "natural" and block size values...
int ffuzzy_compare(const char *str1, const char *str2)
Compute similarity score for given ssdeep hash strings.
bool ffuzzy_read_udigest(ffuzzy_udigest *udigest, const char *s)
Read unnormalized ssdeep digest from the string.
The type to store unnormalized ssdeep digest after parsing.
Definition: ffuzzy.h:462
unsigned long block_size
Block size of the ssdeep digest.
Definition: ffuzzy.h:465
int ffuzzy_score_cap_1(int minslen, unsigned long block_size)
Retrieve score cap for given block length and size.
bool ffuzzy_blocksize_is_far_le(unsigned long block_size1, unsigned long block_size2)
Determines whether given ordered block sizes "far" enough.
int ffuzzy_compare_digest_near(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes assuming two block sizes of given hashes are "near".
bool ffuzzy_blocksize_is_valid(unsigned long block_size)
Determines whether given block size is valid to use in libffuzzy.
bool ffuzzy_pretty_digest(char *buf, size_t buflen, const ffuzzy_digest *digest)
Convert ffuzzy_digest to the string.
The type to store ssdeep digest after parsing.
Definition: ffuzzy.h:113
bool ffuzzy_udigest_is_valid_lengths(const ffuzzy_udigest *udigest)
Determines whether block lengths of given digest are valid.
bool ffuzzy_udigest_is_natural_buffer(const ffuzzy_udigest *udigest)
Determines whether digest blocks are "natural".
size_t len2
Digest length for second block of the digest.
Definition: ffuzzy.h:464
bool ffuzzy_pretty_udigest(char *buf, size_t buflen, const ffuzzy_udigest *udigest)
Convert ffuzzy_udigest to the string.
bool ffuzzy_digest_is_natural_buffer(const ffuzzy_digest *digest)
Determines whether digest blocks are valid and "natural".
void ffuzzy_convert_udigest_to_digest(ffuzzy_digest *digest, const ffuzzy_udigest *udigest)
Convert ffuzzy_udigest to ffuzzy_digest.
int ffuzzy_udigestcmp(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2)
Compare two ffuzzy_udigest values.
bool ffuzzy_digest_is_valid_lengths(const ffuzzy_digest *digest)
Determines whether block lengths of given digest are valid.
size_t len2
Digest length for second block of the digest.
Definition: ffuzzy.h:115
int ffuzzy_udigestcmp_blocksize_n(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2)
Compare two ffuzzy_udigest values by whether block sizes are "natural" and block size values...
bool ffuzzy_udigest_is_valid(const ffuzzy_udigest *udigest)
Determines whether given digest is valid.
bool ffuzzy_digest_is_valid_buffer(const ffuzzy_digest *digest)
Determines whether digest blocks are valid.
bool ffuzzy_udigest_is_natural(const ffuzzy_udigest *udigest)
Determines whether given digest is valid and "natural".
int ffuzzy_compare_digest_near_eq(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes assuming two block sizes are same.
unsigned long block_size
Block size of the ssdeep digest.
Definition: ffuzzy.h:116
bool ffuzzy_read_digest(ffuzzy_digest *digest, const char *s)
Read ssdeep digest from the string.
int ffuzzy_digestcmp_blocksize(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two ffuzzy_digest values by block sizes.
bool ffuzzy_blocksize_is_natural(unsigned long block_size)
Determines whether given block size is "natural".
int ffuzzy_udigestcmp_blocksize(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2)
Compare two ffuzzy_udigest values by block sizes.
bool ffuzzy_digest_is_natural(const ffuzzy_digest *digest)
Determines whether given digest is valid and "natural".
bool ffuzzy_digest_is_valid(const ffuzzy_digest *digest)
Determines whether given digest is valid.