libffuzzy  2.1.4
Fast ssdeep comparison library
 All Data Structures Files Functions Variables Macros
ffuzzy.h
Go to the documentation of this file.
1 /*
2 
3  libffuzzy : Fast ssdeep comparison library
4 
5  ffuzzy.h
6  Public API for libffuzzy
7 
8 
9  CREDITS OF ORIGINAL VERSION OF SSDEEP
10 
11  Copyright (C) 2002 Andrew Tridgell <tridge@samba.org>
12  Copyright (C) 2006 ManTech International Corporation
13  Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
14 
15  This program is free software; you can redistribute it and/or modify
16  it under the terms of the GNU General Public License as published by
17  the Free Software Foundation; either version 2 of the License, or
18  (at your option) any later version.
19 
20  This program is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23  GNU General Public License for more details.
24 
25  You should have received a copy of the GNU General Public License
26  along with this program; if not, write to the Free Software
27  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 
29 
30  CREDIT OF MODIFIED PORTIONS
31 
32  Copyright (C) 2014 Tsukasa OI <li@livegrid.org>
33 
34 */
35 #ifndef FFUZZY_FFUZZY_H
36 #define FFUZZY_FFUZZY_H
37 
55 #ifndef __cplusplus
56 #include <stdbool.h>
57 #endif
58 #include <stddef.h>
59 
61 #define FFUZZY_SPAMSUM_LENGTH 64
62 
64 #define FFUZZY_MIN_BLOCKSIZE 3ul
65 
67 #define FFUZZY_NUM_BLOCKHASHES 31
68 
70 #define FFUZZY_MAX_BLOCKSIZE (FFUZZY_MIN_BLOCKSIZE << (FFUZZY_NUM_BLOCKHASHES-1))
71 
87 #define FFUZZY_PRETTY_LEN 141
88 
93 #define FFUZZY_MIN_MATCH 7
94 
95 
96 #ifdef __cplusplus
97 extern "C" {
98 #endif
99 
136 typedef struct
137 {
138  size_t len1, len2;
139  unsigned long block_size;
140  char digest[FFUZZY_SPAMSUM_LENGTH * 2];
141 } ffuzzy_digest;
142 
143 
144 
159 bool ffuzzy_read_digest(ffuzzy_digest *digest, const char *s);
160 
168 int ffuzzy_compare_digest(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
169 
177 int ffuzzy_compare(const char *str1, const char *str2);
178 
202 int ffuzzy_compare_digest_near(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
203 
215 
227 
246 bool ffuzzy_blocksize_is_valid(unsigned long block_size);
247 
263 bool ffuzzy_blocksize_is_natural(unsigned long block_size);
264 
279 bool ffuzzy_blocksize_is_near(unsigned long block_size1, unsigned long block_size2);
280 
299 bool ffuzzy_blocksize_is_far_le(unsigned long block_size1, unsigned long block_size2);
300 
317 
334 bool ffuzzy_digest_is_valid_buffer(const ffuzzy_digest *digest);
335 
360 
367 bool ffuzzy_digest_is_valid(const ffuzzy_digest *digest);
368 
375 bool ffuzzy_digest_is_natural(const ffuzzy_digest *digest);
376 
394 int ffuzzy_digestcmp(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
395 
406 int ffuzzy_digestcmp_blocksize(const ffuzzy_digest *d1, const ffuzzy_digest *d2);
407 
426 
435 bool ffuzzy_pretty_digest(char *buf, size_t buflen, const ffuzzy_digest *digest);
436 
478 typedef struct
479 {
480  size_t len1, len2;
481  unsigned long block_size;
482  char digest[FFUZZY_SPAMSUM_LENGTH * 2];
484 
485 
496 bool ffuzzy_read_udigest(ffuzzy_udigest *udigest, const char *s);
497 
505 
523 
530 bool ffuzzy_udigest_is_valid(const ffuzzy_udigest *udigest);
531 
538 bool ffuzzy_udigest_is_natural(const ffuzzy_udigest *udigest);
539 
557 int ffuzzy_udigestcmp(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2);
558 
570 
589 
598 bool ffuzzy_pretty_udigest(char *buf, size_t buflen, const ffuzzy_udigest *udigest);
599 
600 
608 
616 
642 int ffuzzy_score_cap(int s1len, int s2len, unsigned long block_size);
643 
659 int ffuzzy_score_cap_1(int minslen, unsigned long block_size);
660 
676  const char *s1, size_t s1len,
677  const char *s2, size_t s2len,
678  unsigned long block_size
679 );
680 
685 #ifdef __cplusplus
686 }
687 #endif
688 
689 #endif
int ffuzzy_compare_digest_near_lt(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes assuming second block size is double as first one.
int ffuzzy_digestcmp(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two ffuzzy_digest values.
int ffuzzy_compare_digest(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes and compute similarity score.
bool ffuzzy_blocksize_is_near(unsigned long block_size1, unsigned long block_size2)
Determines whether given block sizes are "near".
int ffuzzy_score_strings(const char *s1, size_t s1len, const char *s2, size_t s2len, unsigned long block_size)
Compute partial similarity score for given two block strings and block size.
void ffuzzy_convert_digest_to_udigest(ffuzzy_udigest *udigest, const ffuzzy_digest *digest)
Convert ffuzzy_digest to ffuzzy_udigest.
#define FFUZZY_SPAMSUM_LENGTH
Maximum length for the digest block.
Definition: ffuzzy.h:61
int ffuzzy_score_cap(int s1len, int s2len, unsigned long block_size)
Retrieve score cap for given block lengths and the block size.
int ffuzzy_digestcmp_blocksize_n(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two ffuzzy_digest values by whether block sizes are "natural" and block size values...
int ffuzzy_compare(const char *str1, const char *str2)
Compute similarity score for given ssdeep hash strings.
bool ffuzzy_read_udigest(ffuzzy_udigest *udigest, const char *s)
Read unnormalized ssdeep digest from the string.
The type to store unnormalized ssdeep digest after parsing.
Definition: ffuzzy.h:478
unsigned long block_size
Block size of the ssdeep digest.
Definition: ffuzzy.h:481
int ffuzzy_score_cap_1(int minslen, unsigned long block_size)
Retrieve score cap for given block length and size.
bool ffuzzy_blocksize_is_far_le(unsigned long block_size1, unsigned long block_size2)
Determines whether given ordered block sizes "far" enough.
int ffuzzy_compare_digest_near(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes assuming two block sizes of given hashes are "near".
bool ffuzzy_blocksize_is_valid(unsigned long block_size)
Determines whether given block size is valid to use in libffuzzy.
bool ffuzzy_pretty_digest(char *buf, size_t buflen, const ffuzzy_digest *digest)
Convert ffuzzy_digest to the string.
The type to store ssdeep digest after parsing.
Definition: ffuzzy.h:136
bool ffuzzy_udigest_is_valid_lengths(const ffuzzy_udigest *udigest)
Determines whether block lengths of given digest are valid.
bool ffuzzy_udigest_is_natural_buffer(const ffuzzy_udigest *udigest)
Determines whether digest blocks are "natural".
size_t len2
Digest length for second block of the digest.
Definition: ffuzzy.h:480
bool ffuzzy_pretty_udigest(char *buf, size_t buflen, const ffuzzy_udigest *udigest)
Convert ffuzzy_udigest to the string.
bool ffuzzy_digest_is_natural_buffer(const ffuzzy_digest *digest)
Determines whether digest blocks are valid and "natural".
void ffuzzy_convert_udigest_to_digest(ffuzzy_digest *digest, const ffuzzy_udigest *udigest)
Convert ffuzzy_udigest to ffuzzy_digest.
int ffuzzy_udigestcmp(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2)
Compare two ffuzzy_udigest values.
bool ffuzzy_digest_is_valid_lengths(const ffuzzy_digest *digest)
Determines whether block lengths of given digest are valid.
size_t len2
Digest length for second block of the digest.
Definition: ffuzzy.h:138
int ffuzzy_udigestcmp_blocksize_n(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2)
Compare two ffuzzy_udigest values by whether block sizes are "natural" and block size values...
bool ffuzzy_udigest_is_valid(const ffuzzy_udigest *udigest)
Determines whether given digest is valid.
bool ffuzzy_digest_is_valid_buffer(const ffuzzy_digest *digest)
Determines whether digest blocks are valid.
bool ffuzzy_udigest_is_natural(const ffuzzy_udigest *udigest)
Determines whether given digest is valid and "natural".
int ffuzzy_compare_digest_near_eq(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two fuzzy hashes assuming two block sizes are same.
unsigned long block_size
Block size of the ssdeep digest.
Definition: ffuzzy.h:139
bool ffuzzy_read_digest(ffuzzy_digest *digest, const char *s)
Read ssdeep digest from the string.
int ffuzzy_digestcmp_blocksize(const ffuzzy_digest *d1, const ffuzzy_digest *d2)
Compare two ffuzzy_digest values by block sizes.
bool ffuzzy_blocksize_is_natural(unsigned long block_size)
Determines whether given block size is "natural".
int ffuzzy_udigestcmp_blocksize(const ffuzzy_udigest *d1, const ffuzzy_udigest *d2)
Compare two ffuzzy_udigest values by block sizes.
bool ffuzzy_digest_is_natural(const ffuzzy_digest *digest)
Determines whether given digest is valid and "natural".
bool ffuzzy_digest_is_valid(const ffuzzy_digest *digest)
Determines whether given digest is valid.