Skip to content

Commit

Permalink
feat: add query, getSemanticTokens api
Browse files Browse the repository at this point in the history
  • Loading branch information
Myriad-Dreamin committed Nov 3, 2023
1 parent ac13b9e commit 3af05ab
Show file tree
Hide file tree
Showing 12 changed files with 593 additions and 7 deletions.
28 changes: 26 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ sha2.workspace = true
flate2.workspace = true
ecow.workspace = true
instant.workspace = true
strum = { version = "0.25.0", features = ["derive"] }

serde.workspace = true
serde_json.workspace = true
Expand Down
9 changes: 9 additions & 0 deletions compiler/src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
mod modifier_set;
mod semantic_tokens;
// mod token_encode;
mod typst_tokens;

use typst::{diag::FileResult, syntax::Source};

use typst_ts_core::TypstFileId;

pub use semantic_tokens::{
get_semantic_tokens_full, get_semantic_tokens_legend, SemanticToken, SemanticTokensLegend,
};

pub fn reparse(source_id: TypstFileId, prev: Option<Source>, next: String) -> FileResult<Source> {
use dissimilar::Chunk;
match prev {
Expand Down
33 changes: 33 additions & 0 deletions compiler/src/parser/modifier_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use std::ops;

use super::typst_tokens::Modifier;

#[derive(Default, Clone, Copy)]
pub struct ModifierSet(u32);

impl ModifierSet {
pub fn empty() -> Self {
Self::default()
}

pub fn new(modifiers: &[Modifier]) -> Self {
let bits = modifiers
.iter()
.copied()
.map(Modifier::bitmask)
.fold(0, |bits, mask| bits | mask);
Self(bits)
}

pub fn bitset(self) -> u32 {
self.0
}
}

impl ops::BitOr for ModifierSet {
type Output = Self;

fn bitor(self, rhs: Self) -> Self::Output {
Self(self.0 | rhs.0)
}
}
226 changes: 226 additions & 0 deletions compiler/src/parser/semantic_tokens.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
//! From <https://github.com/nvarner/typst-lsp/blob/cc7bad9bd9764bfea783f2fab415cb3061fd8bff/src/server/semantic_tokens/mod.rs>
use strum::IntoEnumIterator;
use typst::syntax::{ast, LinkedNode, Source, SyntaxKind};

use super::modifier_set::ModifierSet;
use super::typst_tokens::{Modifier, TokenType};

#[derive(serde::Deserialize, serde::Serialize)]
pub struct SemanticTokensLegend {
#[serde(rename = "tokenTypes")]
pub token_types: Vec<String>,
#[serde(rename = "tokenModifiers")]
pub token_modifiers: Vec<String>,
}

pub fn get_semantic_tokens_legend() -> SemanticTokensLegend {
SemanticTokensLegend {
token_types: TokenType::iter()
.map(|e| {
let e: &'static str = e.into();

e.to_owned()
})
.collect(),
token_modifiers: Modifier::iter()
.map(|e| {
let e: &'static str = e.into();

e.to_owned()
})
.collect(),
}
}

pub fn get_semantic_tokens_full(source: &Source) -> Vec<SemanticToken> {
let root = LinkedNode::new(source.root());
let mut full = tokenize_tree(source, &root, ModifierSet::empty());

let mut init = (0, 0);
for token in full.iter_mut() {
let next = (token.delta_line, token.delta_start_character);
token.delta_line -= init.0;
if token.delta_line == 0 {
token.delta_start_character -= init.1;
}
init = next;
}

full
}

fn tokenize_single_node(
ctx: &Source,
node: &LinkedNode,
modifiers: ModifierSet,
) -> Option<SemanticToken> {
let is_leaf = node.children().next().is_none();

token_from_node(node)
.or_else(|| is_leaf.then_some(TokenType::Text))
.map(|token_type| SemanticToken::new(ctx, token_type, modifiers, node))
}

/// Tokenize a node and its children
fn tokenize_tree(
ctx: &Source,
root: &LinkedNode<'_>,
parent_modifiers: ModifierSet,
) -> Vec<SemanticToken> {
let modifiers = parent_modifiers | modifiers_from_node(root);

let token = tokenize_single_node(ctx, root, modifiers).into_iter();
let children = root
.children()
.flat_map(move |child| tokenize_tree(ctx, &child, modifiers));
token.chain(children).collect()
}

pub struct SemanticToken {
pub delta_line: u32,
pub delta_start_character: u32,
pub length: u32,
pub token_type: u32,
pub token_modifiers: u32,
}

impl SemanticToken {
pub fn new(
ctx: &Source,
token_type: TokenType,
modifiers: ModifierSet,
node: &LinkedNode,
) -> Self {
let source = node.get().clone().into_text();

let position = offset_to_position(node.offset(), ctx);

Self {
token_type: token_type as u32,
token_modifiers: modifiers.bitset(),
delta_line: position.0,
delta_start_character: position.1,
length: source.chars().map(char::len_utf16).sum::<usize>() as u32,
}
}
}

/// Determines the [`Modifier`]s to be applied to a node and all its children.
///
/// Note that this does not recurse up, so calling it on a child node may not
/// return a modifier that should be applied to it due to a parent.
fn modifiers_from_node(node: &LinkedNode) -> ModifierSet {
match node.kind() {
SyntaxKind::Emph => ModifierSet::new(&[Modifier::Emph]),
SyntaxKind::Strong => ModifierSet::new(&[Modifier::Strong]),
SyntaxKind::Math | SyntaxKind::Equation => ModifierSet::new(&[Modifier::Math]),
_ => ModifierSet::empty(),
}
}

/// Determines the best [`TokenType`] for an entire node and its children, if
/// any. If there is no single `TokenType`, or none better than `Text`, returns
/// `None`.
///
/// In tokenization, returning `Some` stops recursion, while returning `None`
/// continues and attempts to tokenize each of `node`'s children. If there are
/// no children, `Text` is taken as the default.
fn token_from_node(node: &LinkedNode) -> Option<TokenType> {
use SyntaxKind::*;

match node.kind() {
Star if node.parent_kind() == Some(Strong) => Some(TokenType::Punctuation),
Star if node.parent_kind() == Some(ModuleImport) => Some(TokenType::Operator),

Underscore if node.parent_kind() == Some(Emph) => Some(TokenType::Punctuation),
Underscore if node.parent_kind() == Some(MathAttach) => Some(TokenType::Operator),

MathIdent | Ident => Some(token_from_ident(node)),
Hash => token_from_hashtag(node),

LeftBrace | RightBrace | LeftBracket | RightBracket | LeftParen | RightParen | Comma
| Semicolon | Colon => Some(TokenType::Punctuation),
Linebreak | Escape | Shorthand => Some(TokenType::Escape),
Link => Some(TokenType::Link),
Raw => Some(TokenType::Raw),
Label => Some(TokenType::Label),
RefMarker => Some(TokenType::Ref),
Heading | HeadingMarker => Some(TokenType::Heading),
ListMarker | EnumMarker | TermMarker => Some(TokenType::ListMarker),
MathAlignPoint | Plus | Minus | Slash | Hat | Dot | Eq | EqEq | ExclEq | Lt | LtEq | Gt
| GtEq | PlusEq | HyphEq | StarEq | SlashEq | Dots | Arrow | Not | And | Or => {
Some(TokenType::Operator)
}
Dollar => Some(TokenType::Delimiter),
None | Auto | Let | Show | If | Else | For | In | While | Break | Continue | Return
| Import | Include | As | Set => Some(TokenType::Keyword),
Bool => Some(TokenType::Bool),
Int | Float | Numeric => Some(TokenType::Number),
Str => Some(TokenType::String),
LineComment | BlockComment => Some(TokenType::Comment),
Error => Some(TokenType::Error),

// Disambiguate from `SyntaxKind::None`
_ => Option::None,
}
}

// TODO: differentiate also using tokens in scope, not just context
fn is_function_ident(ident: &LinkedNode) -> bool {
let Some(next) = ident.next_leaf() else {
return false;
};
let function_call = matches!(next.kind(), SyntaxKind::LeftParen)
&& matches!(
next.parent_kind(),
Some(SyntaxKind::Args | SyntaxKind::Params)
);
let function_content = matches!(next.kind(), SyntaxKind::LeftBracket)
&& matches!(next.parent_kind(), Some(SyntaxKind::ContentBlock));
function_call || function_content
}

fn token_from_ident(ident: &LinkedNode) -> TokenType {
if is_function_ident(ident) {
TokenType::Function
} else {
TokenType::Interpolated
}
}

fn get_expr_following_hashtag<'a>(hashtag: &LinkedNode<'a>) -> Option<LinkedNode<'a>> {
hashtag
.next_sibling()
.filter(|next| next.cast::<ast::Expr>().map_or(false, |expr| expr.hash()))
.and_then(|node| node.leftmost_leaf())
}

fn token_from_hashtag(hashtag: &LinkedNode) -> Option<TokenType> {
get_expr_following_hashtag(hashtag)
.as_ref()
.and_then(token_from_node)
}

fn offset_to_position(typst_offset: usize, typst_source: &Source) -> (u32, u32) {
let line_index = typst_source.byte_to_line(typst_offset).unwrap();

let lsp_line = line_index as u32;

// See the implementation of `lsp_to_typst::position_to_offset` for discussion
// relevant to this function.

// TODO: Typst's `Source` could easily provide an implementation of the method
// we need here. Submit a PR to `typst` to add it, then update
// this if/when merged.

let utf16_offset = typst_source.byte_to_utf16(typst_offset).unwrap();

let byte_line_offset = typst_source.line_to_byte(line_index).unwrap();
let utf16_line_offset = typst_source.byte_to_utf16(byte_line_offset).unwrap();

let utf16_column_offset = utf16_offset - utf16_line_offset;
let lsp_column = utf16_column_offset;

(lsp_line, lsp_column as u32)
}
Loading

0 comments on commit 3af05ab

Please sign in to comment.