Skip to content

Commit

Permalink
Search: Add plain text match (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
Chaoses-Ib committed Nov 7, 2021
1 parent fcd67e9 commit 78705b5
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 6 deletions.
50 changes: 45 additions & 5 deletions Hijacker/match.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "pch.h"
#include "match.hpp"
#include <functional>
#include <IbWinCppLib/WinCppLib.hpp>

char32_t read_char32(const char8_t* str, int* length) {
char c = str[0];
Expand All @@ -15,31 +14,72 @@ char32_t read_char32(const char8_t* str, int* length) {

Pattern* compile(const char8_t* pattern, PatternFlag::Value flags, std::vector<pinyin::PinyinFlagValue>* pinyin_flags) {
size_t length = 1; // '\0'
size_t length_u8 = 0;
{
const char8_t* p = pattern;
int char_len;
for (char32_t c = read_char32(p, &char_len); c; c = read_char32(p += char_len, &char_len))
for (char32_t c = read_char32(p, &char_len); c; c = read_char32(p += char_len, &char_len)) {
length++;
length_u8 += char_len;
}
}
//Pattern* pat = ib::Addr(new ib::Byte[sizeof Pattern + length * sizeof(char32_t)]);
Pattern* pat = ib::Addr(HeapAlloc(GetProcessHeap(), 0, sizeof Pattern + length * sizeof(char32_t)));
Pattern* pat = ib::Addr(HeapAlloc(GetProcessHeap(), 0, sizeof Pattern + length * sizeof(char32_t) + length_u8 * sizeof(char8_t)));

pat->flags = flags;
pat->pinyin_flags = pinyin_flags;

pat->pattern_len = length - 1;
pat->pattern_u8_len = length_u8;

const char8_t* p = pattern;
int char_len;
for (size_t i = 0; i < length; i++) {
pat->pattern[i] = read_char32(p, &char_len);
pat->pattern()[i] = read_char32(p, &char_len);
p += char_len;
}

memcpy(pat->pattern_u8(), pattern, length_u8);

return pat;
}

int exec(Pattern* pattern, const char8_t* subject, int length, size_t nmatch, int pmatch[], PatternFlag::Value flags)
{
const char8_t* subject_end = subject + length;

// plain text match
bool plain = true;
{
const char8_t* s = subject;
int char_len;
for (char32_t c = read_char32(s, &char_len); s != subject_end; c = read_char32(s += char_len, &char_len)) {
if (c >= 0x3007) {
plain = false;
break;
}
}
}
if (plain) {
std::u8string_view sv(subject, length);
std::u8string_view pt = pattern->pattern_u8_sv();
auto it = std::search(sv.begin(), sv.end(), pt.begin(), pt.end(),
[](char8_t c1, char8_t c2) {
return std::toupper(c1) == std::toupper(c2);
});

if (it == sv.end()) {
return -1;
} else {
if (nmatch) {
pmatch[0] = it - sv.begin();
pmatch[1] = it - sv.begin() + pt.size();
return 1;
} else {
return 0;
}
}
}

// DFA?
auto char_match = [pattern](char32_t c, const char32_t* pat) -> std::vector<size_t> {
Expand Down Expand Up @@ -85,7 +125,7 @@ int exec(Pattern* pattern, const char8_t* subject, int length, size_t nmatch, in
const char8_t* sub = subject;
int char_len;
while (sub != subject_end) {
if (const char8_t* s = subject_match(sub, pattern->pattern)) {
if (const char8_t* s = subject_match(sub, pattern->pattern())) {
if (nmatch) {
pmatch[0] = sub - subject;
pmatch[1] = s - subject;
Expand Down
16 changes: 15 additions & 1 deletion Hijacker/match.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <vector>
#include <IbWinCppLib/WinCppLib.hpp>
#define IB_PINYIN_ENCODING 32
#include <IbPinyinLib/Pinyin.hpp>

Expand All @@ -14,7 +15,20 @@ struct PatternFlag {
struct Pattern {
PatternFlag::Value flags;
std::vector<pinyin::PinyinFlagValue>* pinyin_flags;
char32_t pattern[];
unsigned int pattern_len;
unsigned int pattern_u8_len;
//char32_t pattern[];
//char8_t pattern_u8[];

char32_t* pattern() {
return ib::Addr(this) + sizeof(Pattern);
}
char8_t* pattern_u8() {
return ib::Addr(this) + sizeof(Pattern) + (pattern_len + 1) * sizeof(char32_t);
}
std::u8string_view pattern_u8_sv() {
return { pattern_u8(), pattern_u8_len };
}
};

Pattern* compile(const char8_t* pattern, PatternFlag::Value flags, std::vector<pinyin::PinyinFlagValue>* pinyin_flags);
Expand Down

0 comments on commit 78705b5

Please sign in to comment.