Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

read index from redis #6

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ Menjalankan
1. Clone atau download repository ini ke folder web server (htdocs atau /var/www/html)
2. Langsung kunjungi di browser

Tidak ada setup khusus karena aplikasi ini tidak menggunakan database.
##### Optional
Anda bisa mengaktifkan index menggunakan Redis dengan langkah :
1. Mengganti value `use_redis` pada *web/search.php* dengan **true**
2. Pastikan service Redis telah aktif pada host server
3. Jalankan *redis-indexer.php* dari repository https://github.com/lafzi/lafzi-indexer pada host server

Disarankan menggunakan sistem operasi Linux karena sistem cache mengandalkan command di Linux.

Lisensi
Expand Down
1 change: 1 addition & 0 deletions lib/predis
Submodule predis added at 98ec0c
79 changes: 58 additions & 21 deletions search/search_ff.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,21 @@
include_once '../lib/trigram.php';
include_once '../lib/array_utility.php';
include_once '../lib/doc_class.php';
include_once '../lib/predis/autoload.php';

// fungsi pencari
// param : $query_final yang siap cari (sudah melalui pengodean fonetik)
// $term_list_filename nama file term list
// $post_list_filename nama file posting list
// $score_order true jika ingin menghitung keterurutan kemunculan term
// return : array of found_doc object
function search($query_final, $term_list_filename, $post_list_filename, $score_order = true, $filtered = true, $filter_threshold = 0.8) {
function search($query_final, $vocal, $post_list_filename, $score_order = true, $filtered = true, $filter_threshold = 0.8, $use_redis = false) {

$term_list_filename = $vocal ? "../data/index_termlist_vokal.txt" : "../data/index_termlist_nonvokal.txt";
$key_prefix = $vocal ? "vocal-" : "nonvocal-";

Predis\Autoloader::register();
$redis = new Predis\Client();

// baca seluruh term list simpan dalam hashmap
$term_hashmap = array();
Expand Down Expand Up @@ -44,29 +51,59 @@ function search($query_final, $term_list_filename, $post_list_filename, $score_o
foreach ($query_trigrams as $query_trigram => $qtfp) {
list($qt_freq, $qt_pos) = $qtfp;

if (isset($term_hashmap[$query_trigram])) {
// ambil posting list yang sesuai untuk trigram ini
$post_list_file->fseek($term_hashmap[$query_trigram]);
$matched_posting_lists = explode(';', trim($post_list_file->current()));

// untuk setiap posting list untuk trigram ini
foreach ($matched_posting_lists as $data) {
list ($doc_id, $term_freq, $term_pos) = explode(':', $data);
$term_pos = explode(',', $term_pos);
//$term_pos = reset(explode(',', $term_pos));

// hitung jumlah kemunculan dll
if (isset($matched_docs[$doc_id])) {
$matched_docs[$doc_id]->matched_trigrams_count += ($qt_freq < $term_freq) ? $qt_freq : $term_freq;
} else {
$matched_docs[$doc_id] = new found_doc();
$matched_docs[$doc_id]->matched_trigrams_count = 1;
$matched_docs[$doc_id]->id = $doc_id;
}
if ($use_redis) {

$key = $key_prefix.$query_trigram;

// index dari redis
if ($redis->exists($key)){
//ambil posting list yang sesuai untuk trigram ini
$matched_posting_lists = explode(';',trim($redis->get($key)));

// untuk setiap posting list untuk trigram ini
foreach ($matched_posting_lists as $data) {
list ($doc_id, $term_freq, $term_pos) = explode(':', $data);
$term_pos = explode(',', $term_pos);

// hitung jumlah kemunculan dll
if (isset($matched_docs[$doc_id])) {
$matched_docs[$doc_id]->matched_trigrams_count += ($qt_freq < $term_freq) ? $qt_freq : $term_freq;
} else {
$matched_docs[$doc_id] = new found_doc();
$matched_docs[$doc_id]->matched_trigrams_count = 1;
$matched_docs[$doc_id]->id = $doc_id;
}

$matched_docs[$doc_id]->matched_terms[$query_trigram] = $term_pos; // $term_pos is an array
$matched_docs[$doc_id]->matched_terms[$query_trigram] = $term_pos; // $term_pos is an array
}
}

} else {

// index dari file
if (isset($term_hashmap[$query_trigram])) {
// ambil posting list yang sesuai untuk trigram ini
$post_list_file->fseek($term_hashmap[$query_trigram]);
$matched_posting_lists = explode(';', trim($post_list_file->current()));

// untuk setiap posting list untuk trigram ini
foreach ($matched_posting_lists as $data) {
list ($doc_id, $term_freq, $term_pos) = explode(':', $data);
$term_pos = explode(',', $term_pos);
//$term_pos = reset(explode(',', $term_pos));

// hitung jumlah kemunculan dll
if (isset($matched_docs[$doc_id])) {
$matched_docs[$doc_id]->matched_trigrams_count += ($qt_freq < $term_freq) ? $qt_freq : $term_freq;
} else {
$matched_docs[$doc_id] = new found_doc();
$matched_docs[$doc_id]->matched_trigrams_count = 1;
$matched_docs[$doc_id]->id = $doc_id;
}

$matched_docs[$doc_id]->matched_terms[$query_trigram] = $term_pos; // $term_pos is an array
}
}
}
}

Expand Down
12 changes: 7 additions & 5 deletions web/search.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
// fwrite($lf, $ls);
// fclose($lf);

$configs = (object) array(
'use_redis' => false
);

if (isset($_GET['q']) && $_GET['q'] != "") {

//if (isset($_GET['order'])) {
Expand Down Expand Up @@ -39,10 +43,8 @@
$query_trigrams_count = strlen($query_final) - 2;

if ($vowel) {
$term_list_filename = "../data/index_termlist_vokal.txt";
$post_list_filename = "../data/index_postlist_vokal.txt";
} else {
$term_list_filename = "../data/index_termlist_nonvokal.txt";
$post_list_filename = "../data/index_postlist_nonvokal.txt";
}

Expand Down Expand Up @@ -83,18 +85,18 @@

// pertama dengan threshold 0.8
$th = 0.95; //0.8;
$matched_docs = search($query_final, $term_list_filename, $post_list_filename, $order, $filtered, $th);
$matched_docs = search($query_final, $vowel, $post_list_filename, $order, $filtered, $th, $configs->use_redis);

// jika ternyata tanpa hasil, turunkan threshold jadi 0.7
if(count($matched_docs) == 0) {
$th = 0.8; //0.7;
$matched_docs = search($query_final, $term_list_filename, $post_list_filename, $order, $filtered, $th);
$matched_docs = search($query_final, $vowel, $post_list_filename, $order, $filtered, $th, $configs->use_redis);
}

// jika ternyata tanpa hasil, turunkan threshold jadi 0.6
if(count($matched_docs) == 0) {
$th = 0.7; //0.6;
$matched_docs = search($query_final, $term_list_filename, $post_list_filename, $order, $filtered, $th);
$matched_docs = search($query_final, $vowel, $post_list_filename, $order, $filtered, $th, $configs->use_redis);
}

// jika masih tanpa hasil, ya sudah
Expand Down