Skip to content

Commit

Permalink
[GR-18163] Fix rb_enc_left_char_head() (#3267)
Browse files Browse the repository at this point in the history
PullRequest: truffleruby/4013
  • Loading branch information
eregon committed Sep 20, 2023
2 parents 9f8ff77 + 4d17ba4 commit c77f8bb
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ New features:

Bug fixes:

* Fix `rb_enc_left_char_head()` so it is not always `ArgumentError` (#3267, @eregon).

Compatibility:

Expand Down
2 changes: 1 addition & 1 deletion lib/cext/ABI_check.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4
5
16 changes: 16 additions & 0 deletions spec/ruby/optional/capi/encoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,22 @@
end
end

describe "rb_enc_left_char_head" do
it 'returns the head position of a character' do
@s.rb_enc_left_char_head("é", 1).should == 0
@s.rb_enc_left_char_head("éééé", 7).should == 6

@s.rb_enc_left_char_head("a", 0).should == 0

# unclear if this is intended to work
@s.rb_enc_left_char_head("a", 1).should == 1

# Works because for single-byte encodings rb_enc_left_char_head() just returns the pointer
@s.rb_enc_left_char_head("a".force_encoding(Encoding::US_ASCII), 88).should == 88
@s.rb_enc_left_char_head("a".b, 88).should == 88
end
end

describe "ONIGENC_MBC_CASE_FOLD" do
it "returns the correct case fold for the given string" do
@s.ONIGENC_MBC_CASE_FOLD("lower").should == ["l", 1]
Expand Down
7 changes: 7 additions & 0 deletions spec/ruby/optional/capi/ext/encoding_spec.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,12 @@ static VALUE encoding_spec_rb_enc_strlen(VALUE self, VALUE str, VALUE length, VA
return LONG2FIX(rb_enc_strlen(p, e, rb_to_encoding(encoding)));
}

static VALUE encoding_spec_rb_enc_left_char_head(VALUE self, VALUE str, VALUE offset) {
char *ptr = RSTRING_PTR(str);
char *result = rb_enc_left_char_head(ptr, ptr + NUM2INT(offset), RSTRING_END(str), rb_enc_get(str));
return LONG2NUM(result - ptr);
}

void Init_encoding_spec(void) {
VALUE cls;
native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*));
Expand Down Expand Up @@ -364,6 +370,7 @@ void Init_encoding_spec(void) {
rb_define_method(cls, "rb_enc_str_asciionly_p", encoding_spec_rb_enc_str_asciionly_p, 1);
rb_define_method(cls, "rb_uv_to_utf8", encoding_spec_rb_uv_to_utf8, 2);
rb_define_method(cls, "ONIGENC_MBC_CASE_FOLD", encoding_spec_ONIGENC_MBC_CASE_FOLD, 1);
rb_define_method(cls, "rb_enc_left_char_head", encoding_spec_rb_enc_left_char_head, 2);
}

#ifdef __cplusplus
Expand Down
5 changes: 4 additions & 1 deletion src/main/c/cext/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,10 @@ int rb_enc_get_index(VALUE obj) {
}

char* rb_enc_left_char_head(const char *start, const char *p, const char *end, rb_encoding *enc) {
int length = start - end;
if (p <= start || p >= end) {
return p;
}
int length = end - start;
int position = polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "rb_enc_left_char_head",
rb_tr_unwrap(rb_enc_from_encoding(enc)),
rb_tr_unwrap(rb_str_new(start, length)),
Expand Down

0 comments on commit c77f8bb

Please sign in to comment.