Skip to content

Commit

Permalink
$mol_bloom - Bloom filter with automatic optimal parameters.
Browse files Browse the repository at this point in the history
  • Loading branch information
jin committed Dec 29, 2024
1 parent 16487a5 commit d1b57ba
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 1 deletion.
30 changes: 30 additions & 0 deletions bloom/bloom.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
namespace $ {
$mol_test({

'Add and check strings'() {

const bloom = new $mol_bloom( 5 )
$mol_assert_equal( bloom.has_str( 'Hello' ), 0 )
$mol_assert_equal( bloom.has_str( 'World' ), 0 )
$mol_assert_equal( bloom.has_str( 'Hello World' ), 0 )
$mol_assert_equal( bloom.has_str( 'hello' ), 0 )
$mol_assert_equal( bloom.has_str( 'world' ), 0 )

bloom.add_str( 'Hello World' )
$mol_assert_equal( bloom.has_str( 'Hello' ), 0 )
$mol_assert_equal( bloom.has_str( 'World' ), 0 )
$mol_assert_equal( bloom.has_str( 'Hello World' ), 1 )
$mol_assert_equal( bloom.has_str( 'hello' ), 0 )
$mol_assert_equal( bloom.has_str( 'world' ), 0 )

bloom.add_str( 'Hello' )
bloom.add_str( 'World' )
$mol_assert_equal( bloom.has_str( 'Hello' ), 1 )
$mol_assert_equal( bloom.has_str( 'World' ), 1 )
$mol_assert_equal( bloom.has_str( 'hello' ), 0 )
$mol_assert_equal( bloom.has_str( 'world' ), 0 )

},

})
}
74 changes: 74 additions & 0 deletions bloom/bloom.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
namespace $ {

/**
* Bloom filter with automatic optimal parameters.
* False negative is impossible.
* False positive is controlled.
*/
export class $mol_bloom extends Object {

bitmap: Uint32Array
hashes: number

constructor(
/** Max count of stored values. */
count: number,
/** Chance of false positive. 1e-6 by default */
public risk = 1e-6,
) {
super()
this.hashes = Math.ceil( -1.44 * Math.log( risk ) )

const length = Math.ceil( -.065 * count * Math.log( risk ) )
this.bitmap = new Uint32Array( 2 ** Math.ceil( Math.log2( length ) ) )
}

add_str( word: string ) {
this.add_bin( $mol_charset_encode( word ) )
}

has_str( word: string ) {
return this.has_bin( $mol_charset_encode( word ) )
}

add_bin( bin: Uint8Array ) {
for( const index of this.hash( bin ) ) {
this.add_bit( index )
}
}

has_bin( bin: Uint8Array ) {
let res = 1
for( const index of this.hash( bin ) ) {
res &= this.has_bit( index )
}
return res
}

hash( data: Uint8Array ) {
const res = [] as number[]
fill: while( true ) {
data = $mol_crypto_hash( data )
for( const index of new Uint32Array( data.buffer ) ) {
res.push( index )
if( res.length >= this.hashes ) break fill
}
}
return res
}

add_bit( index: number ) {
const int = Math.ceil( index / 32 ) % this.bitmap.length
const bit = index & 0b11111
this.bitmap[ int ] |= 1 << bit
}

has_bit( index: number ) {
const int = Math.ceil( index / 32 ) % this.bitmap.length
const bit = index & 0b11111
return 1&( this.bitmap[ int ] >> bit )
}

}

}
2 changes: 1 addition & 1 deletion crypto/hash/hash.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ namespace $ {

let sponge = new Uint32Array(80)

/** Fast small sync SHA-1 */
/** Fast small sync SHA-1 (20 bytes, 160 bits) */
export function $mol_crypto_hash( data: Uint8Array ) {

const bits = data.byteLength << 3
Expand Down

0 comments on commit d1b57ba

Please sign in to comment.