Skip to content

Commit

Permalink
use tuples to pass the pattern as enum
Browse files Browse the repository at this point in the history
  • Loading branch information
mruoss committed Oct 8, 2023
1 parent b3bfd26 commit 3f663c4
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 10 deletions.
8 changes: 4 additions & 4 deletions lib/tokenizers/pre_tokenizer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,15 @@ defmodule Tokenizers.PreTokenizer do
* `:invert` - whether to invert the split or not. Defaults to `false`
"""
@spec split(String.t() | Regex.t(), split_delimiter_behaviour(), keyword()) :: t()
@spec split(String.t() | {:string, String.t()}| {:regex, String.t()} , split_delimiter_behaviour(), keyword()) :: t()
def split(pattern, behavior, opts \\ [])

def split(pattern, behavior, opts) when is_binary(pattern) do
Tokenizers.Native.pre_tokenizers_split(pattern, behavior, opts)
split({:string, pattern}, behavior, opts)
end

def split(%Regex{} = pattern, behavior, opts) do
split(Regex.source(pattern), behavior, Keyword.put(opts, :use_regex, true))
def split(pattern, behavior, opts) do
Tokenizers.Native.pre_tokenizers_split(pattern, behavior, opts)
end


Expand Down
17 changes: 12 additions & 5 deletions native/ex_tokenizers/src/pre_tokenizers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,25 +240,32 @@ impl From<SplitDelimiterBehavior> for tokenizers::SplitDelimiterBehavior {
#[derive(NifTaggedEnum)]
pub enum SplitOption {
Invert(bool),
UseRegex(bool)
}

#[derive(NifTaggedEnum)]
pub enum LocalSplitPattern {
String(String),
Regex(String)
}

#[rustler::nif]
pub fn pre_tokenizers_split(
pattern: String,
pattern: LocalSplitPattern,
behavior: SplitDelimiterBehavior,
options: Vec<SplitOption>,
) -> Result<ExTokenizersPreTokenizer, rustler::Error> {
struct Opts {
invert: bool,
}
let mut opts = Opts { invert: false };
let mut final_pattern = SplitPattern::String(String::from(""));
let final_pattern = match pattern {
LocalSplitPattern::String(pattern) => SplitPattern::String(pattern),
LocalSplitPattern::Regex(pattern) => SplitPattern::Regex(pattern),
};

for option in options {
match option {
SplitOption::Invert(invert) => opts.invert = invert,
SplitOption::UseRegex(true) => final_pattern = SplitPattern::Regex(pattern.to_owned()),
SplitOption::UseRegex(false) => final_pattern = SplitPattern::String(pattern.to_owned()),
}
}

Expand Down
2 changes: 1 addition & 1 deletion test/tokenizers/pre_tokenizer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ defmodule Tokenizers.PreTokenizerTest do

test "accepts regular expressions" do
assert %Tokenizers.PreTokenizer{} =
Tokenizers.PreTokenizer.split(~r/.*/, :removed)
Tokenizers.PreTokenizer.split({:regex, ~S/.*/}, :removed)
end
end

Expand Down

0 comments on commit 3f663c4

Please sign in to comment.