Skip to content

Commit

Permalink
feat(Range): first stage of supporting single-position ranges (#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
favonia authored Oct 27, 2024
1 parent f473cd3 commit 957b161
Showing 22 changed files with 420 additions and 350 deletions.
6 changes: 1 addition & 5 deletions src-lsp/LspShims.ml
Original file line number Diff line number Diff line change
@@ -10,11 +10,7 @@ struct
let lsp_range_of_range (r : Asai.Range.t option) =
match r with
| Some r ->
let (start , stop) =
match Asai.Range.view r with
| `Range (start, stop) -> start, stop
| `End_of_file pos -> pos, pos
in
let (start, stop) = Asai.Range.split r in
L.Range.create
~start:(lsp_pos_of_pos start)
~end_:(lsp_pos_of_pos stop)
19 changes: 14 additions & 5 deletions src/Explication.ml
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
include ExplicationData

let dump_seg dump_tag = Utils.dump_pair (Utils.dump_option dump_tag) Utils.dump_string
let dump_marker dump_tag fmt =
function
| RangeBegin tag -> Format.fprintf fmt {|@[<2>RangeBegin@ @[%a@]@]|} dump_tag tag
| RangeEnd tag -> Format.fprintf fmt {|@[<2>RangeEnd@ @[%a@]@]|} dump_tag tag
| Point tag -> Format.fprintf fmt {|@[<2>Point@ @[%a@]@]|} dump_tag tag
let dump_line dump_tag fmt {tags; segments} =
Format.fprintf fmt {|@[<1>{@[<2>tags=@,@[%a@]@];@ @[<2>segments=@ @[%a@]@]}@]|}
(Utils.dump_list dump_tag) tags
(Utils.dump_list (dump_seg dump_tag)) segments
let dump_token dump_tag fmt =
function
| String str -> Format.fprintf fmt {|@[<2>String@ "%s"@]|} (String.escaped str)
| Marker m -> Format.fprintf fmt {|@[<2>Marker@ @[<1>(%a)@]@]|} (dump_marker dump_tag) m
let dump_line dump_tag fmt {markers; tokens} =
Format.fprintf fmt {|@[<1>{@[<2>markers=@,@[%a@]@];@ @[<2>tokens=@ @[%a@]@]}@]|}
(Utils.dump_list dump_tag) markers
(Utils.dump_list (dump_token dump_tag)) tokens
let dump_block dump_tag fmt {begin_line_num; end_line_num; lines} =
Format.fprintf fmt {|@[<1>{begin_line_num=%d;@ end_line_num=%d;@ @[<2>lines=@ @[%a@]@]}@]|}
5 changes: 4 additions & 1 deletion src/Explication.mli
Original file line number Diff line number Diff line change
@@ -5,5 +5,8 @@ include module type of ExplicationData

(** {1 Debugging} *)

(** Ugly printer for debugging *)
(** Ugly printer for {!type:marker} *)
val dump_marker : (Format.formatter -> 'tag -> unit) -> Format.formatter -> 'tag marker -> unit

(** Ugly printer for {!type:t} *)
val dump : (Format.formatter -> 'tag -> unit) -> Format.formatter -> 'tag t -> unit
16 changes: 12 additions & 4 deletions src/ExplicationData.ml
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
(** A segment is an optionally tagged string from the user content. (Note the use of [option].) *)
type 'tag segment = 'tag option * string
(** A marker is a delimiter of a range or a specific point. *)
type 'tag marker =
| RangeBegin of 'tag
| RangeEnd of 'tag
| Point of 'tag

(** A token is either a string or a marker. *)
type 'tag token =
| String of string
| Marker of 'tag marker

(** A line is a list of {!type:segment}s along with tags. *)
type 'tag line =
{ tags : 'tag list
; segments : 'tag segment list
{ markers : 'tag list (** All tags in this line *)
; tokens : 'tag token list
}

(** A block is a collection of consecutive lines. *)
113 changes: 53 additions & 60 deletions src/Explicator.ml
Original file line number Diff line number Diff line change
@@ -30,9 +30,6 @@ let print_invalid_range fmt : UserContent.invalid_range -> unit =
Format.fprintf fmt "its@ beginning@ position@ is@ invalid;@ %a" print_invalid_position r
| `End r ->
Format.fprintf fmt "its@ ending@ position@ is@ invalid;@ %a" print_invalid_position r
| `Not_end_of_file (l, l') ->
Format.fprintf fmt "its@ offset@ %d@ is@ not@ the@ end@ of@ file@ (%d)." l l'
| `End_of_file r -> print_invalid_position fmt r

let () = Printexc.register_printer @@
function
@@ -45,12 +42,11 @@ let () = Printexc.register_printer @@
| _ -> None

let to_start_of_line (pos : Range.position) = {pos with offset = pos.start_of_line}
let default_blend ~(priority : _ -> int) t1 t2 = if priority t2 <= priority t1 then t2 else t1

module Make (Tag : Tag) = struct
type position = Range.position

(** Skip the newline sequence, assuming that [shift] is not zero. (Otherwise, it means we already reached eof.) *)
(** Skip the newline sequence, assuming that [shift] is not zero. (Otherwise, it means we already reached EOF.) *)
let eol_to_next_line shift (pos : position) : position =
assert (shift <> 0);
{ source = pos.source;
@@ -65,9 +61,8 @@ module Make (Tag : Tag) = struct

type explicator_state =
{ lines : Tag.t line bwd
; segments : Tag.t segment bwd
; remaining_tagged_lines : (Tag.t * int) list
; current_tag : Tag.t option
; tokens : Tag.t token bwd
; remaining_line_markers : (int * Tag.t) list
; cursor : Range.position
; eol : int
; eol_shift : int option
@@ -76,105 +71,103 @@ module Make (Tag : Tag) = struct

module F = Flattener.Make(Tag)

let explicate_block ~line_breaks (b : Tag.t Flattener.block) : Tag.t block =
match b.tagged_positions with
| [] -> invalid_arg "explicate_block: empty block"
| ((_, ploc) :: _) as ps ->
let source = SourceReader.load ploc.source in
let explicate_block ~line_breaks source (b : Tag.t Flattener.block) : Tag.t block =
match b.markers with
| [] -> invalid_arg "explicate_block: empty block; should be impossible"
| ((first_loc, _) :: _) as markers ->
let source = SourceReader.load source in
let eof = SourceReader.length source in
let find_eol i = UserContent.find_eol ~line_breaks (SourceReader.unsafe_get source) (i, eof) in
let rec go state : (Tag.t option * Range.position) list -> _ =
let rec go state : (Range.position * Tag.t marker) list -> _ =
function
| (ptag, ploc) :: ps when state.cursor.line_num = ploc.line_num ->
if ploc.offset > eof then invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode";
if ploc.offset > state.eol then invalid_arg "Asai.Explicator.explicate: unexpected newline; use the debug mode";
if ploc.offset = state.cursor.offset then
go {state with cursor = ploc; current_tag = ptag} ps
else
(* Still on the same line *)
let segments =
state.segments <:
(state.current_tag, read_between ~source state.cursor.offset ploc.offset)
in
go { state with segments; cursor = ploc; current_tag = ptag } ps
| ps ->
| (loc, marker) :: markers when state.cursor.line_num = loc.line_num (* on the same line *) ->
if loc.offset > eof then invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode";
if loc.offset > state.eol then invalid_arg "Asai.Explicator.explicate: unexpected newline; use the debug mode";
let tokens =
if loc.offset = state.cursor.offset then
state.tokens <: Marker marker
else
state.tokens <: String (read_between ~source state.cursor.offset loc.offset) <: Marker marker
in
go { state with tokens; cursor = loc } markers
| markers ->
(* Shifting to the next line *)
let lines, remaining_tagged_lines =
let segments =
let lines, remaining_line_markers =
let tokens =
if state.cursor.offset < state.eol then
state.segments
<: (state.current_tag, read_between ~source state.cursor.offset state.eol)
else if Option.is_none state.eol_shift && Option.is_some state.current_tag then
state.segments
<: (state.current_tag, "‹EOF›")
state.tokens <: String (read_between ~source state.cursor.offset state.eol)
else
state.segments
state.tokens
in
let line_markers, remaining_line_markers =
Utils.span (fun (line_num, _) -> line_num = state.line_num) state.remaining_line_markers
in
let tagged_lines, remaining_tagged_lines = Utils.span (fun (_, i) -> i = state.line_num) state.remaining_tagged_lines in
(state.lines <: {segments = Bwd.to_list segments; tags = List.map fst tagged_lines}), remaining_tagged_lines
(state.lines <:
{ tokens = Bwd.to_list tokens
; markers = List.map snd line_markers
}),
remaining_line_markers
in
(* Continue the process if [ps] is not empty. *)
match ps, state.eol_shift with
(* Continue the process if [markers] is not empty. *)
match markers, state.eol_shift with
| [], _ ->
assert (state.line_num = b.end_line_num);
lines
| _ :: _, None -> invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode"
| (_, ploc) :: _, Some eol_shift ->
if ploc.offset > eof then invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode";
if ploc.offset <= state.eol then invalid_arg "Asai.Explicator.explicate: expected newline missing; use the debug mode";
if ploc.offset < state.eol + eol_shift then invalid_arg "Asai.Explicator.explicate: offset within newline; use the debug mode";
| _ :: _, None -> invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode"
| (loc, _) :: _, Some eol_shift ->
if loc.offset > eof then invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode";
if loc.offset <= state.eol then invalid_arg "Asai.Explicator.explicate: expected newline missing; use the debug mode";
if loc.offset < state.eol + eol_shift then invalid_arg "Asai.Explicator.explicate: offset within newline; use the debug mode";
(* Okay, p is really on the next line *)
let cursor = eol_to_next_line eol_shift {state.cursor with offset = state.eol} in
let eol, eol_shift = find_eol (state.eol + eol_shift) in
go
{ lines
; segments = Emp
; remaining_tagged_lines
; current_tag = state.current_tag
; tokens = Emp
; remaining_line_markers
; cursor
; eol
; eol_shift
; line_num = state.line_num + 1
}
ps
markers
in
let begin_pos = to_start_of_line ploc in
let eol, eol_shift = find_eol ploc.offset in
let lines =
let begin_pos = to_start_of_line first_loc in
let eol, eol_shift = find_eol first_loc.offset in
go
{ lines = Emp
; segments = Emp
; remaining_tagged_lines = b.tagged_lines
; current_tag = None
; tokens = Emp
; remaining_line_markers = b.line_markers
; cursor = begin_pos
; eol
; eol_shift
; line_num = b.begin_line_num
}
ps
markers
in
{ begin_line_num = b.begin_line_num
; end_line_num = b.end_line_num
; lines = Bwd.to_list @@ lines
}

let[@inline] explicate_blocks ~line_breaks = List.map (explicate_block ~line_breaks)
let[@inline] explicate_blocks ~line_breaks source ranges =
List.map (explicate_block ~line_breaks source) ranges

let[@inline] explicate_part ~line_breaks (source, bs) : Tag.t part =
{ source; blocks = explicate_blocks ~line_breaks bs }
{ source; blocks = explicate_blocks ~line_breaks source bs }

let check_ranges ~line_breaks ranges =
List.iter
(fun (_, range) ->
(fun (range, _) ->
let source = SourceReader.load @@ Range.source range in
let read = SourceReader.unsafe_get source in
let eof = SourceReader.length source in
try UserContent.check_range ~line_breaks ~eof read range
with UserContent.Invalid_range reason -> raise @@ Invalid_range (range, reason))
ranges

let explicate ?(line_breaks=`Traditional) ?(block_splitting_threshold=5)
?(blend=default_blend ~priority:Tag.priority) ?(debug=false) ranges =
let explicate ?(line_breaks=`Traditional) ?(block_splitting_threshold=5) ?(debug=false) ranges =
if debug then check_ranges ~line_breaks ranges;
List.map (explicate_part ~line_breaks) @@ F.flatten ~block_splitting_threshold ~blend ranges
List.map (explicate_part ~line_breaks) @@ F.flatten ~block_splitting_threshold ranges
end
3 changes: 0 additions & 3 deletions src/Explicator.mli
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
include module type of ExplicatorSigs

(** The default tag blending algorithm that chooses the more important tag based on priority. *)
val default_blend : priority:('tag -> int) -> 'tag -> 'tag -> 'tag

(** Making an explicator. *)
module Make : functor (Tag : Tag) -> S with module Tag := Tag
7 changes: 4 additions & 3 deletions src/ExplicatorSigs.ml
Original file line number Diff line number Diff line change
@@ -8,7 +8,9 @@ module type Tag = sig
(** The abstract type of tags. *)
type t

(** Get the priority number of a tag. We followed the UNIX convention here---a {i smaller} priority number represents higher priority. The convention works well with {!val:List.sort}, which sorts numbers in ascending order. (The more important things go first.) *)
(** Get the priority number of a tag. A {i smaller} priority number represents higher priority.
The convention works well with {!val:List.sort}, which sorts numbers in ascending order: the more important things go first. *)
val priority : t -> int

(** Ugly printer for debugging *)
@@ -19,12 +21,11 @@ end
module type S = sig
module Tag : Tag

val explicate : ?line_breaks:[`Unicode | `Traditional] -> ?block_splitting_threshold:int -> ?blend:(Tag.t -> Tag.t -> Tag.t) -> ?debug:bool -> (Tag.t * Range.t) list -> Tag.t t
val explicate : ?line_breaks:[`Unicode | `Traditional] -> ?block_splitting_threshold:int -> ?debug:bool -> (Range.t * Tag.t) list -> Tag.t t
(** Explicate a list of ranges using content from a data reader. This function must be run under [SourceReader.run].
@param line_breaks The set of character sequences that are recognized as (hard) line breaks. The [`Unicode] set contains all Unicode character sequences in {{:https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G41643}Unicode 15.0.0 Table 5-1.} The [`Traditional] set only contains [U+000A (LF)], [U+000D (CR)], and [U+000D U+000A (CRLF)] as line breaks. The default is the [`Traditional] set.
@param block_splitting_threshold The maximum number of consecutive, non-highlighted lines allowed in a block. The function will try to minimize the number of blocks, as long as no block has too many consecutive, non-highlighted lines. A higher threshold will lead to fewer blocks. When the threshold is zero, it means no block can contain any non-highlighted line. The default value is zero.
@param blend The algorithm to blend two tags on a visual range. The default algorithm chooses the more important tag based on priority.
@param debug Whether to enable the debug mode that performs expensive extra checking. The default is [false].
@raise Invalid_range See {!exception:Invalid_range}.
Loading

0 comments on commit 957b161

Please sign in to comment.