Skip to content

Commit

Permalink
Merge pull request #37 from georgbuechner/feat/35-fast-open
Browse files Browse the repository at this point in the history
feat: fast open
  • Loading branch information
georgbuechner authored Oct 21, 2023
2 parents c96919f + 78554a2 commit a1dd892
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 17 deletions.
2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ members = [

[workspace.dependencies]
tantivy = "0.19.2"
mockall_double = "0.3.0"
mockall = "0.11.4"
lopdf = "0.29.0"
clap = "4.2.2"
serde = "1.0.160"
Expand Down
12 changes: 11 additions & 1 deletion index/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,24 @@ impl Index {
pub fn add_all_pdf_documents(&mut self) -> Result<()> {
let mut checksum_map = self.open_or_create_checksum_map()?;
for path in self.get_pdf_dir_entries() {
let relative_path = path
.path()
.strip_prefix(&self.documents_path)
.map_err(|e| CreationError(e.to_string()))?;

let str_path = &path.path().to_string_lossy().to_string();
if !self
.compare_checksum(str_path, &checksum_map)
.unwrap_or(false)
{
println!("Adding document: {}", relative_path.to_string_lossy());
self.add_pdf_document_pages(&path)?;
self.update_checksum(str_path, &mut checksum_map)?;
} else {
println!(
"Skipped (already exists): {}",
relative_path.to_string_lossy()
);
}
}
self.store_checksum_map(&checksum_map)?;
Expand Down Expand Up @@ -271,7 +282,6 @@ impl Index {
path: &str,
checksum_map: &HashMap<String, (u64, SystemTime)>,
) -> Result<bool> {
println!("Checking of {} exists", path);
let file = std::fs::File::open(path).map_err(|e| CreationError(e.to_string()))?;
let metadata = file.metadata().map_err(|e| CreationError(e.to_string()))?;
let modified = metadata
Expand Down
100 changes: 93 additions & 7 deletions litt/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;
use std::env;
use std::fmt;
use std::fmt::Formatter;
use std::fs;
Expand Down Expand Up @@ -31,14 +33,78 @@ impl fmt::Display for LittError {
}
}

fn main() -> Result<(), LittError> {
let cli = Cli::parse();
fn get_first_term(query: &str) -> String {
let parts = query.split(' ').collect::<Vec<_>>();
if let Some(first_str) = parts.first() {
if let Some(stripped) = first_str.strip_prefix('\"') {
return stripped.to_string();
}
first_str.to_string()
} else {
"".to_string()
}
}

fn main() -> Result<(), LittError> {
let mut index_tracker = match IndexTracker::create(".litt".into()) {
Ok(index_tracker) => index_tracker,
Err(e) => return Err(LittError(e.to_string())),
};

// Check for fast last-number access
let args: Vec<String> = env::args().collect();
let first_arg_option = args.get(1);
if let Some(first_arg) = first_arg_option {
if let Ok(last_result) = &first_arg.trim().parse::<u32>() {
let fast_results = match index_tracker.load_fast_results() {
Ok(fast_results) => fast_results,
Err(e) => return Err(LittError(e.to_string())),
};
let path = fast_results
.get(last_result)
.expect("Number not in last results");
println!("Got path: {}", path.0);
let mut cmd = std::process::Command::new("zathura");
cmd.arg(&path.0)
.arg("-P")
.arg(&path.1.to_string())
.arg("-f")
.arg(&path.2);

let zathura_was_successful = match cmd.status() {
Ok(status) => match status.code() {
None => false,
Some(code) => code == 0,
},
Err(_) => false,
};

if !zathura_was_successful {
println!(
"Consider installing zathura so we can open the PDF on the correct page for you.\n\
Using standard system PDF viewer..."
);
#[cfg(unix)]
std::process::Command::new("open")
.arg(&path.0)
.spawn()
.map_err(|e| LittError(e.to_string()))?;

#[cfg(windows)]
std::process::Command::new("cmd")
.arg("/c")
.arg("start")
.arg(&path.0)
.spawn()
.map_err(|e| LittError(e.to_string()))?;
}

return Ok(());
}
}

let cli = Cli::parse();

// everything that does not require litt index

// Print existing litt indices
Expand Down Expand Up @@ -127,28 +193,32 @@ fn main() -> Result<(), LittError> {
// update existing index
if cli.update {
println!("Updating index \"{}\".", index_name);
let old_num_docs = index.searcher().num_docs();
let start = Instant::now();
if let Err(e) = index.add_all_pdf_documents() {
return Err(LittError(e.to_string()));
}
println!(
"Update done. Now: {} document pages in {:?}",
"Update done. Successfully indexed {} new document pages in {:?}. Now {} document pages.",
index.searcher().num_docs()-old_num_docs,
start.elapsed(),
index.searcher().num_docs(),
start.elapsed()
);
return Ok(());
}
// reload existing index
if cli.reload {
println!("Reloading index \"{}\".", index_name);
let old_num_docs = index.searcher().num_docs();
let start = Instant::now();
if let Err(e) = index.reload() {
return Err(LittError(e.to_string()));
}
println!(
"Reload done. Successfully indexed {} document pages in {:?}",
"Reload done. Successfully indexed {} new document pages in {:?}. Now {} document pages.",
index.searcher().num_docs()-old_num_docs,
start.elapsed(),
index.searcher().num_docs(),
start.elapsed()
);
return Ok(());
}
Expand All @@ -168,7 +238,10 @@ fn main() -> Result<(), LittError> {
Err(e) => return Err(LittError(e.to_string())),
};
println!("Found results in {} document(s):", results.len());
let mut fast_store_results: HashMap<u32, (String, u32, String)> = HashMap::new();
let first_query_term = get_first_term(&cli.term);
let mut counter = 0;
let mut res_counter = 1;
for (title, pages) in &results {
counter += 1;
let title_name = Path::new(title)
Expand All @@ -179,18 +252,31 @@ fn main() -> Result<(), LittError> {
let index_path = index_path.join(title);
println!(" ({})", index_path.to_string_lossy().italic());
for page in pages {
fast_store_results.insert(
res_counter,
(
index_path.to_string_lossy().to_string(),
page.page,
first_query_term.clone(),
),
);
let preview = match search.get_preview(page, &cli.term) {
Ok(preview) => preview,
Err(e) => return Err(LittError(e.to_string())),
};
println!(
" - p.{}: \"{}\", (score: {})",
" - [{}] p.{}: \"{}\", (score: {})",
res_counter,
page.page,
preview.italic(),
page.score
);
res_counter += 1;
}
}
if let Err(e) = index_tracker.store_fast_results(fast_store_results) {
return Err(LittError(e.to_string()));
}
println!(
"{} results from {} pages in {:?}.",
results.values().fold(0, |acc, list| acc + list.len()),
Expand Down
41 changes: 35 additions & 6 deletions litt/src/tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{fmt, fs};
use litt_shared::LITT_DIRECTORY_NAME;

const INDICIES_FILENAME: &str = "indices.json";
const FAST_RESULTS_FILENAME: &str = "last_results.json";

#[derive(Debug)]
pub enum LittIndexTrackerError {
Expand Down Expand Up @@ -40,7 +41,7 @@ impl IndexTracker {
pub fn create(_path: String) -> Result<Self> {
let base_path = PathBuf::new().join("~/").join(LITT_DIRECTORY_NAME);
let litt_root = shellexpand::tilde(&base_path.to_string_lossy().to_string()).to_string();
let litt_json = shellexpand::tilde(
let json_path = shellexpand::tilde(
&base_path
.join(INDICIES_FILENAME)
.to_string_lossy()
Expand All @@ -49,9 +50,9 @@ impl IndexTracker {
.to_string();

// Check if stored litt indices json already exists
if Path::new(&litt_json).exists() {
if Path::new(&json_path).exists() {
// load json
let data = fs::read_to_string(litt_json)
let data = fs::read_to_string(json_path)
.map_err(|e| LittIndexTrackerError::UnknownError(e.to_string()))?;
let indices: HashMap<String, PathBuf> = serde_json::from_str(&data)
.map_err(|e| LittIndexTrackerError::UnknownError(e.to_string()))?;
Expand Down Expand Up @@ -109,15 +110,43 @@ impl IndexTracker {
Ok(self.indices.clone())
}

pub fn store_fast_results(
&self,
fast_results: HashMap<u32, (String, u32, String)>,
) -> Result<()> {
let base_path = PathBuf::new()
.join("~/")
.join(LITT_DIRECTORY_NAME)
.join(FAST_RESULTS_FILENAME);
let json_path = shellexpand::tilde(&base_path.to_string_lossy().to_string()).to_string();
let json_str = serde_json::to_string(&fast_results)
.map_err(|e| LittIndexTrackerError::SaveError(e.to_string()))?;
std::fs::write(json_path, json_str)
.map_err(|e| LittIndexTrackerError::SaveError(e.to_string()))
}

pub fn load_fast_results(&self) -> Result<HashMap<u32, (String, u32, String)>> {
let base_path = PathBuf::new()
.join("~/")
.join(LITT_DIRECTORY_NAME)
.join(FAST_RESULTS_FILENAME);
let json_path = shellexpand::tilde(&base_path.to_string_lossy().to_string()).to_string();
let data = fs::read_to_string(json_path)
.map_err(|e| LittIndexTrackerError::UnknownError(e.to_string()))?;
let fast_results: HashMap<u32, (String, u32, String)> = serde_json::from_str(&data)
.map_err(|e| LittIndexTrackerError::UnknownError(e.to_string()))?;
Ok(fast_results)
}

fn store_indicies(&self) -> Result<()> {
let base_path = PathBuf::new()
.join("~/")
.join(LITT_DIRECTORY_NAME)
.join(INDICIES_FILENAME);
let litt_json = shellexpand::tilde(&base_path.to_string_lossy().to_string()).to_string();
let indices_str = serde_json::to_string(&self.indices)
let json_path = shellexpand::tilde(&base_path.to_string_lossy().to_string()).to_string();
let json_str = serde_json::to_string(&self.indices)
.map_err(|e| LittIndexTrackerError::SaveError(e.to_string()))?;
std::fs::write(litt_json, indices_str)
std::fs::write(json_path, json_str)
.map_err(|e| LittIndexTrackerError::SaveError(e.to_string()))
}
}
2 changes: 2 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
install:
cp -f target/debug/litt /usr/local/bin/
1 change: 0 additions & 1 deletion search/src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ impl Search {
"Fatal: Field \"path\" could not be read as text!",
)))?;
let text = fs::read_to_string(path).map_err(|e| SearchError(e.to_string()))?;
// println!("get_preview: got body: {}", text);

// Generate snippet
let snippet = snippet_generator.snippet(&text);
Expand Down

0 comments on commit a1dd892

Please sign in to comment.