From 1998fff8f44697f201747ab3671a5960b4a76253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=20M=C3=BCller?= <22560219+rmuller-ml@users.noreply.github.com> Date: Mon, 8 Jan 2024 09:27:01 -0500 Subject: [PATCH] Additional chunks based on references and definitions in code and proc tools (#1180) * saving work symbol tool add metadata to code tool chunks showing metadata add github to repo filter empty symbols fixing some bugs, dedup fmt adding aliases add code to symbol tool new prompt add chunk to exchanges adding symbol ids save work now working adding filter save work saving work remove function call get ref def outside of exchange impl agent refdef to chunks clean chunk generation filter_chunks filter_chunks in new file fix repo_ref add to proc delete symbol tool warnings adding constants relative_path field revert to text user query simplified repo_ref to accept github and local remove filter action, make a function for llm call better naming and adding comments fmt adding some error handling * fix error handling * clippy * fmt * wip: refactor changes * using snippet from file_symbol * adding repo_ref, reverting empty filter, adding filter comments * tokeninfo response not pub anymore * simplifying how we update path in a chunk * symbol alias instead of chunk alias * moving prompt to prompts.rs * reverting filter * fix unwrap in get_token_info * adding some comments * typo * better doc * make start_byte/end_byte usize and move function call def into prompts.rs * log selected symbol * unwrap_or_default * revert Cargo.lock * latency * use all hoverable symbols, tokeninfo only for the selected one * no function_calling * filter out local symbols * removing unused functions * clippy filter_map * prompt improved * fix explanation and add a const * reformat prompt and inline llm call * remove dbg! calls --------- Co-authored-by: Gabriel Gordon-Hall Co-authored-by: Akshay --- server/bleep/src/agent.rs | 1 + server/bleep/src/agent/exchange.rs | 4 +- server/bleep/src/agent/prompts.rs | 19 +- server/bleep/src/agent/symbol.rs | 323 ++++++++++++++++++ server/bleep/src/agent/tools/answer.rs | 4 + server/bleep/src/agent/tools/code.rs | 9 +- server/bleep/src/agent/tools/proc.rs | 6 + .../bleep/src/intelligence/code_navigation.rs | 30 +- server/bleep/src/webserver.rs | 4 +- server/bleep/src/webserver/answer.rs | 2 + server/bleep/src/webserver/hoverable.rs | 6 +- server/bleep/src/webserver/intelligence.rs | 113 +++--- 12 files changed, 462 insertions(+), 59 deletions(-) create mode 100644 server/bleep/src/agent/symbol.rs diff --git a/server/bleep/src/agent.rs b/server/bleep/src/agent.rs index 93f903f3f1..09a43e61ca 100644 --- a/server/bleep/src/agent.rs +++ b/server/bleep/src/agent.rs @@ -27,6 +27,7 @@ const MAX_STEPS: usize = 10; pub mod exchange; pub mod model; pub mod prompts; +pub mod symbol; pub mod transcoder; /// A collection of modules that each add methods to `Agent`. diff --git a/server/bleep/src/agent/exchange.rs b/server/bleep/src/agent/exchange.rs index f7963fa08b..50b079b8ba 100644 --- a/server/bleep/src/agent/exchange.rs +++ b/server/bleep/src/agent/exchange.rs @@ -149,14 +149,14 @@ impl SearchStep { #[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] pub struct CodeChunk { pub path: String, - #[serde(rename = "alias")] pub alias: usize, - #[serde(rename = "snippet")] pub snippet: String, #[serde(rename = "start")] pub start_line: usize, #[serde(rename = "end")] pub end_line: usize, + pub start_byte: Option, + pub end_byte: Option, } impl CodeChunk { diff --git a/server/bleep/src/agent/prompts.rs b/server/bleep/src/agent/prompts.rs index e6b3dd2e1d..0659931284 100644 --- a/server/bleep/src/agent/prompts.rs +++ b/server/bleep/src/agent/prompts.rs @@ -99,7 +99,7 @@ pub fn system<'a>(paths: impl IntoIterator) -> String { - DO NOT call a function that you've used before with the same arguments - DO NOT assume the structure of the codebase, or the existence of files or folders - Your queries to functions.code or functions.path should be significantly different to previous queries -- Call functions.none with paths that you are confident will help answer the user's query +- Call functions.none with paths that you are confident will help answer the user's query, include paths containing the information needed for a complete answer including definitions and references - If the user query is general (e.g. 'What does this do?', 'What is this repo?') look for READMEs, documentation and entry points in the code (main files, index files, api files etc.) - If the user is referring to, or asking for, information that is in your history, call functions.none - If after attempting to gather information you are still unsure how to answer the query, call functions.none @@ -386,6 +386,23 @@ Here is the full context for reference: ) } +pub fn symbol_classification_prompt(snippets: &str) -> String { + format!( + r#"{snippets} + +Above are code chunks and non-local symbols that have been extracted from the chunks. Each chunk is followed by an enumerated list of symbols that it contains. Given a user query, select the symbol which is most relevant to it, e.g. the references or definition of this symbol would help somebody answer the query. Symbols which are language builtins or which come from third party libraries are unlikely to be helpful. + +Do not answer with the symbol name, use the symbol index. + +### Examples ### +Q: how does ranking work? +23 + +Q: which function makes an api call +3"# + ) +} + pub fn hypothetical_document_prompt(query: &str) -> String { format!( r#"Write a code snippet that could hypothetically be returned by a code search engine as the answer to the query: {query} diff --git a/server/bleep/src/agent/symbol.rs b/server/bleep/src/agent/symbol.rs new file mode 100644 index 0000000000..f279724e82 --- /dev/null +++ b/server/bleep/src/agent/symbol.rs @@ -0,0 +1,323 @@ +use crate::agent::{exchange::CodeChunk, Agent}; +use crate::intelligence::{code_navigation::FileSymbols, Language, TSLanguage}; +use crate::llm_gateway; +use crate::webserver::intelligence::{get_token_info, TokenInfoRequest}; +use anyhow::{Context, Result}; +use tracing::log::{debug, info, warn}; + +use super::prompts::symbol_classification_prompt; + +pub struct ChunkWithHoverableSymbols { + pub chunk: CodeChunk, + pub symbols: Vec, +} + +/// This helps the code and proc tool return related chunks based on references and definitions. +/// `get_related_chunks` receives a list of chunks from code or proc search and returns `MAX_CHUNKS` related chunks +/// For each input chunk, we extract all symbols (variables, function names, structs...). +/// Then we disconsider symbols that are defined in the same file using the scope graph. +/// We then pick ONE symbol using a classifier (`filter_symbols`), where the classifier has access to user query, original chunks and filtered list of symbols. +/// This selected symbol may be present in many files one or more times. +/// We extract the surrounding code (up to `NUMBER_CHUNK_LINES` lines) for each occurence and pick `MAX_CHUNKS` occurrences/chunks. + +impl Agent { + pub async fn extract_hoverable_symbols( + &self, + chunk: CodeChunk, + ) -> Result { + // get hoverable elements + let document = self + .app + .indexes + .file + .by_path(&self.repo_ref, &chunk.path, None) + .await? + .with_context(|| format!("failed to read path: {}", &chunk.path))?; + + let graph = document + .symbol_locations + .scope_graph() + .with_context(|| format!("no scope graph for file: {}", &chunk.path))?; + + let hoverable_ranges = document + .hoverable_ranges() + .ok_or_else(|| anyhow::anyhow!("no hoverable ranges"))?; + + let mut symbols = hoverable_ranges + .into_iter() + .filter(|range| { + (range.start.byte >= chunk.start_byte.unwrap_or_default()) + && (range.start.byte < chunk.end_byte.unwrap_or_default()) + }) + .filter(|range| { + // if this node can be resolved locally in the scope-graph, omit it + if let Some(node_by_range) = graph.node_by_range(range.start.byte, range.end.byte) { + if graph.is_reference(node_by_range) || graph.is_definition(node_by_range) { + return false; + } + } + true + }) + .map(|range| HoverableSymbol { + name: chunk.snippet[(range.start.byte - chunk.start_byte.unwrap_or_default()) + ..(range.end.byte - chunk.start_byte.unwrap_or_default())] + .to_string(), + token_info_request: TokenInfoRequest { + relative_path: chunk.path.clone(), + repo_ref: self.repo_ref.display_name(), + branch: None, + start: range.start.byte, + end: range.end.byte, + }, + path: chunk.path.clone(), + }) + .collect::>(); + + symbols.sort_by(|a, b| a.name.cmp(&b.name)); + symbols.dedup_by(|a, b| a.name == b.name); + + debug!( + "Attached {} symbols: {:?}", + symbols.len(), + symbols.iter().map(|s| s.name.as_str()).collect::>() + ); + + Ok(ChunkWithHoverableSymbols { + chunk: chunk.clone(), + symbols, + }) + } + + pub async fn expand_symbol_into_chunks(&self, symbol: Symbol) -> Vec { + // each symbol may be in multiple files and have multiple occurences in each file + symbol + .related_symbols + .iter() + .flat_map(|file_symbols| { + let filename = file_symbols.file.clone(); + + file_symbols + .data + .iter() + .map(|occurrence| CodeChunk { + path: filename.clone(), + alias: 0, + snippet: occurrence.snippet.data.clone(), + start_line: occurrence.snippet.line_range.start, + end_line: occurrence.snippet.line_range.end, + start_byte: None, + end_byte: None, + }) + .collect::>() + }) + .collect::>() + } + + pub async fn filter_symbols( + &self, + query: &str, + chunks_with_symbols: Vec, + ) -> Result { + if chunks_with_symbols.is_empty() { + return Err(SymbolError::ListEmpty); + } + + const NUMBER_CHUNK_LINES: usize = 10; + + // we have multiples chunks and each chunk may have multiple symbols + // unique alias (i) per symbol + let mut i: i32 = -1; + let symbols = chunks_with_symbols + .into_iter() + .map(|chunk_with_symbol| { + ( + chunk_with_symbol.chunk, + chunk_with_symbol + .symbols + .into_iter() + .map(|symbol| { + i += 1; + (i, symbol) + }) + .collect::>(), + ) + }) + .collect::>(); + + // Classifier + + // context + let chunks_string = symbols + .iter() + .filter(|(_, s)| !s.is_empty()) + .map(|(c, s)| { + let symbols_string = s + .iter() + .map(|(i, refdef)| format!("{}: {}", i, refdef.name)) + .collect::>() + .join("\n"); + + format!( + "```{}\n{}```\n\n{}", + c.path.clone(), + c.snippet.clone(), + symbols_string + ) + }) + .collect::>() + .join("\n\n"); + + // instruction + let messages = vec![ + llm_gateway::api::Message::system(&symbol_classification_prompt(&chunks_string)), + llm_gateway::api::Message::user(query), + ]; + + let response = match self + .llm_gateway + .clone() + .model("gpt-4-0613") + .temperature(0.0) + .chat(&messages, None) + .await + { + Ok(response) => response, + Err(e) => { + warn!( + "Symbol classifier llm call failed, picking the first symbol: {}", + e + ); + "0".into() + } + }; + + let selected_symbol = match response.as_str().parse::() { + Ok(symbol) => symbol, + Err(e) => { + warn!("Parsing to integer failed, picking the first symbol: {}", e); + 0 + } + }; + + // finding symbol metadata + match symbols + .into_iter() + .flat_map(|(_, symbol_with_alias)| symbol_with_alias) + .find(|(alias, _)| *alias == selected_symbol) + { + Some((_alias, symbol_metadata)) => Ok(Symbol { + name: symbol_metadata.name, + related_symbols: { + let document = self + .app + .indexes + .file + .by_path(&self.repo_ref, &symbol_metadata.path, None) + .await + .unwrap() + .unwrap(); + + let all_docs = { + let associated_langs = + match document.lang.as_deref().map(TSLanguage::from_id) { + Some(Language::Supported(config)) => config.language_ids, + _ => &[], + }; + self.app + .indexes + .file + .by_repo(&self.repo_ref, associated_langs.iter(), None) + .await + }; + + get_token_info( + symbol_metadata.token_info_request, + &self.repo_ref, + self.app.indexes.clone(), + &document, + &all_docs, + Some(0), + Some(NUMBER_CHUNK_LINES), + ) + .await + .unwrap() + .into_iter() + .filter(|file_symbol| file_symbol.file != symbol_metadata.path) + .collect::>() + }, + }), + _ => Err(SymbolError::OutOfBounds), + } + } + + pub async fn get_related_chunks(&mut self, chunks: Vec) -> Vec { + const MAX_CHUNKS: usize = 3; + + // get symbols with ref/defs for each chunk + let chunks_with_symbols = futures::future::join_all( + chunks + .iter() + .filter(|c| !c.is_empty()) + .map(|c| self.extract_hoverable_symbols(c.clone())), // TODO: Log failure + ) + .await + .into_iter() + .filter_map(Result::ok) + .collect(); + + // get original user query + let user_query = self.last_exchange().query.target().unwrap(); + + // select one symbol + let selected_symbol = match self.filter_symbols(&user_query, chunks_with_symbols).await { + Ok(selected_symbol) => { + info!("Selected symbol: {}", selected_symbol.name); + selected_symbol + } + Err(e) => { + info!("Returning no extra chunks: {}", e); + return Vec::new(); + } + }; + + // take 3 chunks, update path aliases, update enchange chunks + let extra_chunks = self + .expand_symbol_into_chunks(selected_symbol) + .await + .iter() + .take(MAX_CHUNKS) + .map(|c| { + let chunk = CodeChunk { + alias: self.get_path_alias(c.path.as_str()), + ..c.clone() + }; + self.exchanges + .last_mut() + .unwrap() + .code_chunks + .push(chunk.clone()); + chunk + }) + .collect::>(); + + extra_chunks + } +} + +pub struct HoverableSymbol { + pub name: String, + pub token_info_request: TokenInfoRequest, + pub path: String, +} +pub struct Symbol { + pub name: String, + pub related_symbols: Vec, +} + +#[derive(thiserror::Error, Debug)] +pub enum SymbolError { + #[error("No symbol retrieved in the provided chunks")] + ListEmpty, + #[error("Selected symbol out of bounds")] + OutOfBounds, +} diff --git a/server/bleep/src/agent/tools/answer.rs b/server/bleep/src/agent/tools/answer.rs index 7c36b475c7..0f0eb4b46b 100644 --- a/server/bleep/src/agent/tools/answer.rs +++ b/server/bleep/src/agent/tools/answer.rs @@ -367,6 +367,8 @@ impl Agent { snippet, start_line: span.start, end_line: span.end, + start_byte: None, + end_byte: None, } }) .collect::>(); @@ -383,6 +385,8 @@ impl Agent { snippet: trimmed_snippet.to_string(), start_line: chunk.start_line, end_line: (chunk.start_line + num_trimmed_lines).saturating_sub(1), + start_byte: chunk.start_byte, + end_byte: chunk.end_byte, }] } else { code_chunks diff --git a/server/bleep/src/agent/tools/code.rs b/server/bleep/src/agent/tools/code.rs index d4ad4e5e94..16caef7c5a 100644 --- a/server/bleep/src/agent/tools/code.rs +++ b/server/bleep/src/agent/tools/code.rs @@ -76,6 +76,8 @@ impl Agent { snippet: chunk.text, start_line: chunk.start_line as usize, end_line: chunk.end_line as usize, + start_byte: Some(chunk.start_byte as usize), + end_byte: Some(chunk.end_byte as usize), } }) .collect::>(); @@ -90,8 +92,13 @@ impl Agent { .push(chunk.clone()) } + let extra_chunks = self.get_related_chunks(chunks.clone()).await; + + chunks.extend(extra_chunks); + let response = chunks - .iter() + .clone() + .into_iter() .filter(|c| !c.is_empty()) .map(|c| c.to_string()) .collect::>() diff --git a/server/bleep/src/agent/tools/proc.rs b/server/bleep/src/agent/tools/proc.rs index e55a701482..18410de830 100644 --- a/server/bleep/src/agent/tools/proc.rs +++ b/server/bleep/src/agent/tools/proc.rs @@ -55,6 +55,8 @@ impl Agent { snippet: chunk.text, start_line: chunk.start_line as usize, end_line: chunk.end_line as usize, + start_byte: Some(chunk.start_byte as usize), + end_byte: Some(chunk.end_byte as usize), } }) .collect::>(); @@ -69,6 +71,10 @@ impl Agent { .push(chunk.clone()) } + let extra_chunks = self.get_related_chunks(chunks.clone()).await; + + chunks.extend(extra_chunks); + let response = chunks .iter() .filter(|c| !c.is_empty()) diff --git a/server/bleep/src/intelligence/code_navigation.rs b/server/bleep/src/intelligence/code_navigation.rs index b3e8a878b5..2284d1ea5c 100644 --- a/server/bleep/src/intelligence/code_navigation.rs +++ b/server/bleep/src/intelligence/code_navigation.rs @@ -50,6 +50,7 @@ pub struct CodeNavigationContext<'a, 'b> { pub token: Token<'a>, pub all_docs: &'b [ContentDocument], pub source_document_idx: usize, + pub snipper: Option, } impl<'a, 'b> CodeNavigationContext<'a, 'b> { @@ -134,6 +135,7 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { start_byte: source_sg.graph[idx].range().start.byte, end_byte: source_sg.graph[idx].range().end.byte, }, + snipper: None, } .local_definitions() .is_none() @@ -179,6 +181,7 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { all_docs: std::slice::from_ref(source_document), source_document_idx: 0, token, + snipper: None, } } @@ -303,7 +306,11 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { .map(|idx| Occurrence { kind: OccurrenceKind::Definition, range: scope_graph.graph[idx].range(), - snippet: to_occurrence(self.source_document(), scope_graph.graph[idx].range()), + snippet: to_occurrence( + self.source_document(), + scope_graph.graph[idx].range(), + self.snipper, + ), }) .collect::>(); @@ -335,7 +342,7 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { .map(|idx| Occurrence { kind: OccurrenceKind::Definition, range: scope_graph.graph[idx].range(), - snippet: to_occurrence(doc, scope_graph.graph[idx].range()), + snippet: to_occurrence(doc, scope_graph.graph[idx].range(), self.snipper), }) .collect::>(); @@ -360,7 +367,11 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { .map(|idx| Occurrence { kind: OccurrenceKind::Reference, range: scope_graph.graph[idx].range(), - snippet: to_occurrence(self.source_document(), scope_graph.graph[idx].range()), + snippet: to_occurrence( + self.source_document(), + scope_graph.graph[idx].range(), + self.snipper, + ), }) .collect::>(); @@ -396,7 +407,7 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { .map(|idx| Occurrence { kind: OccurrenceKind::Reference, range: scope_graph.graph[idx].range(), - snippet: to_occurrence(doc, scope_graph.graph[idx].range()), + snippet: to_occurrence(doc, scope_graph.graph[idx].range(), self.snipper), }) .collect::>(); @@ -418,7 +429,11 @@ impl<'a, 'b> CodeNavigationContext<'a, 'b> { .map(|idx| Occurrence { kind: OccurrenceKind::Definition, range: scope_graph.graph[idx].range(), - snippet: to_occurrence(self.source_document(), scope_graph.graph[idx].range()), + snippet: to_occurrence( + self.source_document(), + scope_graph.graph[idx].range(), + self.snipper, + ), }) .collect::>(); @@ -437,11 +452,12 @@ pub struct Token<'a> { pub end_byte: usize, } -fn to_occurrence(doc: &ContentDocument, range: TextRange) -> Snippet { +fn to_occurrence(doc: &ContentDocument, range: TextRange, snipper: Option) -> Snippet { let src = &doc.content; let line_end_indices = &doc.line_end_indices; let highlight = range.start.byte..range.end.byte; - Snipper::default() + snipper + .unwrap_or_default() .expand(highlight, src, line_end_indices) .reify(src, &[]) } diff --git a/server/bleep/src/webserver.rs b/server/bleep/src/webserver.rs index 3d331895d6..f0d6c470c5 100644 --- a/server/bleep/src/webserver.rs +++ b/server/bleep/src/webserver.rs @@ -21,9 +21,9 @@ mod config; mod docs; mod file; mod github; -mod hoverable; +pub mod hoverable; mod index; -mod intelligence; +pub mod intelligence; pub mod middleware; mod query; mod quota; diff --git a/server/bleep/src/webserver/answer.rs b/server/bleep/src/webserver/answer.rs index 8dce6b5a39..b632b0c74f 100644 --- a/server/bleep/src/webserver/answer.rs +++ b/server/bleep/src/webserver/answer.rs @@ -451,6 +451,8 @@ pub async fn explain( start_line: params.line_start, end_line: params.line_end, snippet, + start_byte: None, + end_byte: None, }); let action = Action::Answer { paths: vec![0] }; diff --git a/server/bleep/src/webserver/hoverable.rs b/server/bleep/src/webserver/hoverable.rs index dbb379e4b3..78fcd288db 100644 --- a/server/bleep/src/webserver/hoverable.rs +++ b/server/bleep/src/webserver/hoverable.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; /// The request made to the `hoverable` endpoint. #[derive(Debug, Deserialize)] -pub(super) struct HoverableRequest { +pub struct HoverableRequest { /// The repo_ref of the file of interest repo_ref: String, @@ -21,8 +21,8 @@ pub(super) struct HoverableRequest { /// The response from the `hoverable` endpoint. #[derive(Serialize)] -pub(super) struct HoverableResponse { - ranges: Vec, +pub struct HoverableResponse { + pub ranges: Vec, } impl super::ApiResponse for HoverableResponse {} diff --git a/server/bleep/src/webserver/intelligence.rs b/server/bleep/src/webserver/intelligence.rs index a68ba9b007..c46747a5bf 100644 --- a/server/bleep/src/webserver/intelligence.rs +++ b/server/bleep/src/webserver/intelligence.rs @@ -19,19 +19,19 @@ use serde::{Deserialize, Serialize}; /// The request made to the `local-intel` endpoint. #[derive(Debug, Deserialize)] -pub(super) struct TokenInfoRequest { +pub struct TokenInfoRequest { /// The repo_ref of the file of interest - repo_ref: String, + pub repo_ref: String, /// The path to the file of interest, relative to the repo root - relative_path: String, + pub relative_path: String, /// Branch name to use for the lookup, - branch: Option, + pub branch: Option, /// The byte range to look for - start: usize, - end: usize, + pub start: usize, + pub end: usize, } /// The response from the `local-intel` endpoint. @@ -54,19 +54,13 @@ pub(super) async fn handle( ) -> Result { let repo_ref = payload.repo_ref.parse::().map_err(Error::user)?; - let token = Token { - relative_path: payload.relative_path.as_str(), - start_byte: payload.start, - end_byte: payload.end, - }; - - let source_document = indexes + let source_doc = indexes .file .by_path(&repo_ref, &payload.relative_path, payload.branch.as_deref()) .await .map_err(Error::user)? .ok_or_else(|| Error::user("path not found").with_status(StatusCode::NOT_FOUND))?; - let lang = source_document.lang.as_deref(); + let lang = source_doc.lang.as_deref(); let all_docs = { let associated_langs = match lang.map(TSLanguage::from_id) { Some(Language::Supported(config)) => config.language_ids, @@ -82,33 +76,19 @@ pub(super) async fn handle( .await }; - let source_document_idx = all_docs - .iter() - .position(|doc| doc.relative_path == payload.relative_path) - .ok_or(Error::internal("invalid language"))?; - - let ctx = CodeNavigationContext { - token, - all_docs: &all_docs, - source_document_idx, - }; - - let data = ctx.token_info(); - if data.is_empty() { - search_nav( - Arc::clone(&indexes), - &repo_ref, - ctx.active_token_text(), - ctx.active_token_range(), - payload.branch.as_deref(), - &source_document, - ) - .await - .map(TokenInfoResponse::new) - .map(json) - } else { - Ok(json(TokenInfoResponse { data })) - } + let symbols = get_token_info( + payload, + &repo_ref, + indexes, + &source_doc, + &all_docs, + None, + None, + ) + .await + .map_err(Error::internal)?; + + Ok(json(TokenInfoResponse::new(symbols))) } /// The request made to the `related-files` endpoint. @@ -335,6 +315,51 @@ pub(super) async fn token_value( Ok(json(TokenValueResponse { range, content })) } +pub async fn get_token_info( + params: TokenInfoRequest, + repo_ref: &RepoRef, + indexes: Arc, + source_doc: &ContentDocument, + all_docs: &Vec, + context_before: Option, + context_after: Option, +) -> anyhow::Result> { + let source_document_idx = all_docs + .iter() + .position(|doc| doc.relative_path == source_doc.relative_path) + .ok_or(anyhow::anyhow!("invalid language"))?; + + let snipper = + Some(Snipper::default().context(context_before.unwrap_or(0), context_after.unwrap_or(0))); + + let ctx: CodeNavigationContext<'_, '_> = CodeNavigationContext { + token: Token { + relative_path: params.relative_path.as_str(), + start_byte: params.start, + end_byte: params.end, + }, + all_docs, + source_document_idx, + snipper, + }; + + let data = ctx.token_info(); + if data.is_empty() { + search_nav( + Arc::clone(&indexes), + repo_ref, + ctx.active_token_text(), + ctx.active_token_range(), + params.branch.as_deref(), + source_doc, + snipper, + ) + .await + } else { + Ok(data) + } +} + async fn search_nav( indexes: Arc, repo_ref: &RepoRef, @@ -342,7 +367,8 @@ async fn search_nav( payload_range: std::ops::Range, branch: Option<&str>, source_document: &ContentDocument, -) -> Result> { + snipper: Option, +) -> anyhow::Result> { use crate::{ indexes::{reader::ContentReader, DocumentRead}, query::compiler::trigrams, @@ -468,7 +494,8 @@ async fn search_nav( }) .unwrap_or_default(); let highlight = start_byte..end_byte; - let snippet = Snipper::default() + let snippet = snipper + .unwrap_or_default() .expand(highlight, &doc.content, &doc.line_end_indices) .reify(&doc.content, &[]);