-
-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
$mol_bloom - Bloom filter with automatic optimal parameters.
- Loading branch information
jin
committed
Dec 29, 2024
1 parent
16487a5
commit d1b57ba
Showing
3 changed files
with
105 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
namespace $ { | ||
$mol_test({ | ||
|
||
'Add and check strings'() { | ||
|
||
const bloom = new $mol_bloom( 5 ) | ||
$mol_assert_equal( bloom.has_str( 'Hello' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'World' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'Hello World' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'hello' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'world' ), 0 ) | ||
|
||
bloom.add_str( 'Hello World' ) | ||
$mol_assert_equal( bloom.has_str( 'Hello' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'World' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'Hello World' ), 1 ) | ||
$mol_assert_equal( bloom.has_str( 'hello' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'world' ), 0 ) | ||
|
||
bloom.add_str( 'Hello' ) | ||
bloom.add_str( 'World' ) | ||
$mol_assert_equal( bloom.has_str( 'Hello' ), 1 ) | ||
$mol_assert_equal( bloom.has_str( 'World' ), 1 ) | ||
$mol_assert_equal( bloom.has_str( 'hello' ), 0 ) | ||
$mol_assert_equal( bloom.has_str( 'world' ), 0 ) | ||
|
||
}, | ||
|
||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
namespace $ { | ||
|
||
/** | ||
* Bloom filter with automatic optimal parameters. | ||
* False negative is impossible. | ||
* False positive is controlled. | ||
*/ | ||
export class $mol_bloom extends Object { | ||
|
||
bitmap: Uint32Array | ||
hashes: number | ||
|
||
constructor( | ||
/** Max count of stored values. */ | ||
count: number, | ||
/** Chance of false positive. 1e-6 by default */ | ||
public risk = 1e-6, | ||
) { | ||
super() | ||
this.hashes = Math.ceil( -1.44 * Math.log( risk ) ) | ||
|
||
const length = Math.ceil( -.065 * count * Math.log( risk ) ) | ||
this.bitmap = new Uint32Array( 2 ** Math.ceil( Math.log2( length ) ) ) | ||
} | ||
|
||
add_str( word: string ) { | ||
this.add_bin( $mol_charset_encode( word ) ) | ||
} | ||
|
||
has_str( word: string ) { | ||
return this.has_bin( $mol_charset_encode( word ) ) | ||
} | ||
|
||
add_bin( bin: Uint8Array ) { | ||
for( const index of this.hash( bin ) ) { | ||
this.add_bit( index ) | ||
} | ||
} | ||
|
||
has_bin( bin: Uint8Array ) { | ||
let res = 1 | ||
for( const index of this.hash( bin ) ) { | ||
res &= this.has_bit( index ) | ||
} | ||
return res | ||
} | ||
|
||
hash( data: Uint8Array ) { | ||
const res = [] as number[] | ||
fill: while( true ) { | ||
data = $mol_crypto_hash( data ) | ||
for( const index of new Uint32Array( data.buffer ) ) { | ||
res.push( index ) | ||
if( res.length >= this.hashes ) break fill | ||
} | ||
} | ||
return res | ||
} | ||
|
||
add_bit( index: number ) { | ||
const int = Math.ceil( index / 32 ) % this.bitmap.length | ||
const bit = index & 0b11111 | ||
this.bitmap[ int ] |= 1 << bit | ||
} | ||
|
||
has_bit( index: number ) { | ||
const int = Math.ceil( index / 32 ) % this.bitmap.length | ||
const bit = index & 0b11111 | ||
return 1&( this.bitmap[ int ] >> bit ) | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters