From 72cb6ee2ef9947b0a99122d7144482e97fd5698a Mon Sep 17 00:00:00 2001 From: Alfat Saputra Harun Date: Mon, 10 Apr 2017 21:10:41 +0700 Subject: [PATCH] read index from redis --- README.md | 7 +++- lib/predis | 1 + search/search_ff.php | 79 ++++++++++++++++++++++++++++++++------------ web/search.php | 12 ++++--- 4 files changed, 72 insertions(+), 27 deletions(-) create mode 160000 lib/predis diff --git a/README.md b/README.md index 5ffb17d..80a89e7 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,12 @@ Menjalankan 1. Clone atau download repository ini ke folder web server (htdocs atau /var/www/html) 2. Langsung kunjungi di browser -Tidak ada setup khusus karena aplikasi ini tidak menggunakan database. +##### Optional +Anda bisa mengaktifkan index menggunakan Redis dengan langkah : +1. Mengganti value `use_redis` pada *web/search.php* dengan **true** +2. Pastikan service Redis telah aktif pada host server +3. Jalankan *redis-indexer.php* dari repository https://github.com/lafzi/lafzi-indexer pada host server + Disarankan menggunakan sistem operasi Linux karena sistem cache mengandalkan command di Linux. Lisensi diff --git a/lib/predis b/lib/predis new file mode 160000 index 0000000..98ec0cc --- /dev/null +++ b/lib/predis @@ -0,0 +1 @@ +Subproject commit 98ec0cc27efc1efab32b369962f97e29ecc0464a diff --git a/search/search_ff.php b/search/search_ff.php index d149145..bc7923c 100644 --- a/search/search_ff.php +++ b/search/search_ff.php @@ -5,6 +5,7 @@ include_once '../lib/trigram.php'; include_once '../lib/array_utility.php'; include_once '../lib/doc_class.php'; +include_once '../lib/predis/autoload.php'; // fungsi pencari // param : $query_final yang siap cari (sudah melalui pengodean fonetik) @@ -12,7 +13,13 @@ // $post_list_filename nama file posting list // $score_order true jika ingin menghitung keterurutan kemunculan term // return : array of found_doc object -function search($query_final, $term_list_filename, $post_list_filename, $score_order = true, $filtered = true, $filter_threshold = 0.8) { +function search($query_final, $vocal, $post_list_filename, $score_order = true, $filtered = true, $filter_threshold = 0.8, $use_redis = false) { + + $term_list_filename = $vocal ? "../data/index_termlist_vokal.txt" : "../data/index_termlist_nonvokal.txt"; + $key_prefix = $vocal ? "vocal-" : "nonvocal-"; + + Predis\Autoloader::register(); + $redis = new Predis\Client(); // baca seluruh term list simpan dalam hashmap $term_hashmap = array(); @@ -44,29 +51,59 @@ function search($query_final, $term_list_filename, $post_list_filename, $score_o foreach ($query_trigrams as $query_trigram => $qtfp) { list($qt_freq, $qt_pos) = $qtfp; - if (isset($term_hashmap[$query_trigram])) { - // ambil posting list yang sesuai untuk trigram ini - $post_list_file->fseek($term_hashmap[$query_trigram]); - $matched_posting_lists = explode(';', trim($post_list_file->current())); - - // untuk setiap posting list untuk trigram ini - foreach ($matched_posting_lists as $data) { - list ($doc_id, $term_freq, $term_pos) = explode(':', $data); - $term_pos = explode(',', $term_pos); - //$term_pos = reset(explode(',', $term_pos)); - - // hitung jumlah kemunculan dll - if (isset($matched_docs[$doc_id])) { - $matched_docs[$doc_id]->matched_trigrams_count += ($qt_freq < $term_freq) ? $qt_freq : $term_freq; - } else { - $matched_docs[$doc_id] = new found_doc(); - $matched_docs[$doc_id]->matched_trigrams_count = 1; - $matched_docs[$doc_id]->id = $doc_id; - } + if ($use_redis) { + + $key = $key_prefix.$query_trigram; + + // index dari redis + if ($redis->exists($key)){ + //ambil posting list yang sesuai untuk trigram ini + $matched_posting_lists = explode(';',trim($redis->get($key))); + + // untuk setiap posting list untuk trigram ini + foreach ($matched_posting_lists as $data) { + list ($doc_id, $term_freq, $term_pos) = explode(':', $data); + $term_pos = explode(',', $term_pos); + + // hitung jumlah kemunculan dll + if (isset($matched_docs[$doc_id])) { + $matched_docs[$doc_id]->matched_trigrams_count += ($qt_freq < $term_freq) ? $qt_freq : $term_freq; + } else { + $matched_docs[$doc_id] = new found_doc(); + $matched_docs[$doc_id]->matched_trigrams_count = 1; + $matched_docs[$doc_id]->id = $doc_id; + } - $matched_docs[$doc_id]->matched_terms[$query_trigram] = $term_pos; // $term_pos is an array + $matched_docs[$doc_id]->matched_terms[$query_trigram] = $term_pos; // $term_pos is an array + } } + } else { + + // index dari file + if (isset($term_hashmap[$query_trigram])) { + // ambil posting list yang sesuai untuk trigram ini + $post_list_file->fseek($term_hashmap[$query_trigram]); + $matched_posting_lists = explode(';', trim($post_list_file->current())); + + // untuk setiap posting list untuk trigram ini + foreach ($matched_posting_lists as $data) { + list ($doc_id, $term_freq, $term_pos) = explode(':', $data); + $term_pos = explode(',', $term_pos); + //$term_pos = reset(explode(',', $term_pos)); + + // hitung jumlah kemunculan dll + if (isset($matched_docs[$doc_id])) { + $matched_docs[$doc_id]->matched_trigrams_count += ($qt_freq < $term_freq) ? $qt_freq : $term_freq; + } else { + $matched_docs[$doc_id] = new found_doc(); + $matched_docs[$doc_id]->matched_trigrams_count = 1; + $matched_docs[$doc_id]->id = $doc_id; + } + + $matched_docs[$doc_id]->matched_terms[$query_trigram] = $term_pos; // $term_pos is an array + } + } } } diff --git a/web/search.php b/web/search.php index 6842392..26f930b 100644 --- a/web/search.php +++ b/web/search.php @@ -6,6 +6,10 @@ // fwrite($lf, $ls); // fclose($lf); +$configs = (object) array( + 'use_redis' => false + ); + if (isset($_GET['q']) && $_GET['q'] != "") { //if (isset($_GET['order'])) { @@ -39,10 +43,8 @@ $query_trigrams_count = strlen($query_final) - 2; if ($vowel) { - $term_list_filename = "../data/index_termlist_vokal.txt"; $post_list_filename = "../data/index_postlist_vokal.txt"; } else { - $term_list_filename = "../data/index_termlist_nonvokal.txt"; $post_list_filename = "../data/index_postlist_nonvokal.txt"; } @@ -83,18 +85,18 @@ // pertama dengan threshold 0.8 $th = 0.95; //0.8; - $matched_docs = search($query_final, $term_list_filename, $post_list_filename, $order, $filtered, $th); + $matched_docs = search($query_final, $vowel, $post_list_filename, $order, $filtered, $th, $configs->use_redis); // jika ternyata tanpa hasil, turunkan threshold jadi 0.7 if(count($matched_docs) == 0) { $th = 0.8; //0.7; - $matched_docs = search($query_final, $term_list_filename, $post_list_filename, $order, $filtered, $th); + $matched_docs = search($query_final, $vowel, $post_list_filename, $order, $filtered, $th, $configs->use_redis); } // jika ternyata tanpa hasil, turunkan threshold jadi 0.6 if(count($matched_docs) == 0) { $th = 0.7; //0.6; - $matched_docs = search($query_final, $term_list_filename, $post_list_filename, $order, $filtered, $th); + $matched_docs = search($query_final, $vowel, $post_list_filename, $order, $filtered, $th, $configs->use_redis); } // jika masih tanpa hasil, ya sudah