diff --git a/assets/js/match_query.js b/assets/js/match_query.js deleted file mode 100644 index 6fd0733fb..000000000 --- a/assets/js/match_query.js +++ /dev/null @@ -1,877 +0,0 @@ -/** -* booru.match_query: A port and modification of the search_parser library for -* performing client-side filtering. -*/ - -const tokenList = [ - ['fuzz', /^~(?:\d+(\.\d+)?|\.\d+)/], - ['boost', /^\^[-+]?\d+(\.\d+)?/], - ['quoted_lit', /^\s*"(?:[^"]|\\")+"/], - ['lparen', /^\s*\(\s*/], - ['rparen', /^\s*\)\s*/], - ['and_op', /^\s*(?:&&|AND)\s+/], - ['and_op', /^\s*,\s*/], - ['or_op', /^\s*(?:\|\||OR)\s+/], - ['not_op', /^\s*NOT(?:\s+|(?=\())/], - ['not_op', /^\s*[!-]\s*/], - ['space', /^\s+/], - ['word', /^(?:\\[\s,()^~]|[^\s,()^~])+/], - ['word', /^(?:\\[\s,()]|[^\s,()])+/] - ], - numberFields = ['id', 'width', 'height', 'aspect_ratio', - 'comment_count', 'score', 'upvotes', 'downvotes', - 'faves', 'tag_count'], - dateFields = ['created_at'], - literalFields = ['tags', 'orig_sha512_hash', 'sha512_hash', - 'score', 'uploader', 'source_url', 'description'], - termSpaceToImageField = { - tags: 'data-image-tag-aliases', - score: 'data-score', - upvotes: 'data-upvotes', - downvotes: 'data-downvotes', - uploader: 'data-uploader', - // Yeah, I don't think this is reasonably supportable. - // faved_by: 'data-faved-by', - id: 'data-image-id', - width: 'data-width', - height: 'data-height', - /* eslint-disable camelcase */ - aspect_ratio: 'data-aspect-ratio', - comment_count: 'data-comment-count', - tag_count: 'data-tag-count', - source_url: 'data-source-url', - faves: 'data-faves', - sha512_hash: 'data-sha512', - orig_sha512_hash: 'data-orig-sha512', - created_at: 'data-created-at' - /* eslint-enable camelcase */ - }; - - -function SearchTerm(termStr) { - this.term = termStr.trim(); - this.parsed = false; -} - -SearchTerm.prototype.append = function(substr) { - this.term += substr; - this.parsed = false; -}; - -SearchTerm.prototype.parseRangeField = function(field) { - if (numberFields.indexOf(field) !== -1) { - return [field, 'eq', 'number']; - } - - if (dateFields.indexOf(field) !== -1) { - return [field, 'eq', 'date']; - } - - const qual = /^(\w+)\.([lg]te?|eq)$/.exec(field); - - if (qual) { - if (numberFields.indexOf(qual[1]) !== -1) { - return [qual[1], qual[2], 'number']; - } - - if (dateFields.indexOf(qual[1]) !== -1) { - return [qual[1], qual[2], 'date']; - } - } - - return null; -}; - -SearchTerm.prototype.parseRelativeDate = function(dateVal, qual) { - const match = /(\d+) (second|minute|hour|day|week|month|year)s? ago/.exec(dateVal); - const bounds = { - second: 1000, - minute: 60000, - hour: 3600000, - day: 86400000, - week: 604800000, - month: 2592000000, - year: 31536000000 - }; - - if (match) { - const amount = parseInt(match[1], 10); - const scale = bounds[match[2]]; - - const now = new Date().getTime(); - const bottomDate = new Date(now - (amount * scale)); - const topDate = new Date(now - ((amount - 1) * scale)); - - switch (qual) { - case 'lte': - return [bottomDate, 'lt']; - case 'gte': - return [bottomDate, 'gte']; - case 'lt': - return [bottomDate, 'lt']; - case 'gt': - return [bottomDate, 'gte']; - default: - return [[bottomDate, topDate], 'eq']; - } - } - else { - throw new Error(`Cannot parse date string: ${dateVal}`); - } -}; - -SearchTerm.prototype.parseAbsoluteDate = function(dateVal, qual) { - const parseRes = [ - /^(\d{4})/, - /^-(\d{2})/, - /^-(\d{2})/, - /^(?:\s+|T|t)(\d{2})/, - /^:(\d{2})/, - /^:(\d{2})/ - ], - timeZoneOffset = [0, 0], - timeData = [0, 0, 1, 0, 0, 0], - origDateVal = dateVal; - let topDate = null, - i, - match, - bottomDate = null, - localDateVal = origDateVal; - - match = /([+-])(\d{2}):(\d{2})$/.exec(localDateVal); - if (match) { - timeZoneOffset[0] = parseInt(match[2], 10); - timeZoneOffset[1] = parseInt(match[3], 10); - if (match[1] === '-') { - timeZoneOffset[0] *= -1; - timeZoneOffset[1] *= -1; - } - localDateVal = localDateVal.substr(0, localDateVal.length - 6); - } - else { - localDateVal = localDateVal.replace(/[Zz]$/, ''); - } - - for (i = 0; i < parseRes.length; i += 1) { - if (localDateVal.length === 0) { - break; - } - - match = parseRes[i].exec(localDateVal); - if (match) { - if (i === 1) { - timeData[i] = parseInt(match[1], 10) - 1; - } - else { - timeData[i] = parseInt(match[1], 10); - } - localDateVal = localDateVal.substr( - match[0].length, localDateVal.length - match[0].length - ); - } - else { - throw new Error(`Cannot parse date string: ${origDateVal}`); - } - } - - if (localDateVal.length > 0) { - throw new Error(`Cannot parse date string: ${origDateVal}`); - } - - // Apply the user-specified time zone offset. The JS Date constructor - // is very flexible here. - timeData[3] -= timeZoneOffset[0]; - timeData[4] -= timeZoneOffset[1]; - - switch (qual) { - case 'lte': - timeData[i - 1] += 1; - return [Date.UTC.apply(Date, timeData), 'lt']; - case 'gte': - return [Date.UTC.apply(Date, timeData), 'gte']; - case 'lt': - return [Date.UTC.apply(Date, timeData), 'lt']; - case 'gt': - timeData[i - 1] += 1; - return [Date.UTC.apply(Date, timeData), 'gte']; - default: - bottomDate = Date.UTC.apply(Date, timeData); - timeData[i - 1] += 1; - topDate = Date.UTC.apply(Date, timeData); - return [[bottomDate, topDate], 'eq']; - } -}; - -SearchTerm.prototype.parseDate = function(dateVal, qual) { - try { - return this.parseAbsoluteDate(dateVal, qual); - } - catch (_) { - return this.parseRelativeDate(dateVal, qual); - } -}; - -SearchTerm.prototype.parse = function() { - let rangeParsing, - candidateTermSpace, - termCandidate; - - this.wildcardable = !this.fuzz && !/^"([^"]|\\")+"$/.test(this.term); - - if (!this.wildcardable && !this.fuzz) { - this.term = this.term.substr(1, this.term.length - 2); - } - - this.term = this._normalizeTerm(); - - // N.B.: For the purposes of this parser, boosting effects are ignored. - - // Default. - this.termSpace = 'tags'; - this.termType = 'literal'; - - const matchArr = this.term.split(':'); - - if (matchArr.length > 1) { - candidateTermSpace = matchArr[0]; - termCandidate = matchArr.slice(1).join(':'); - rangeParsing = this.parseRangeField(candidateTermSpace); - - if (rangeParsing) { - this.termSpace = rangeParsing[0]; - this.termType = rangeParsing[2]; - - if (this.termType === 'date') { - rangeParsing = this.parseDate(termCandidate, rangeParsing[1]); - this.term = rangeParsing[0]; - this.compare = rangeParsing[1]; - } - else { - this.term = parseFloat(termCandidate); - this.compare = rangeParsing[1]; - } - - this.wildcardable = false; - } - else if (literalFields.indexOf(candidateTermSpace) !== -1) { - this.termType = 'literal'; - this.term = termCandidate; - this.termSpace = candidateTermSpace; - } - else if (candidateTermSpace === 'my') { - this.termType = 'my'; - this.termSpace = termCandidate; - } - } - - if (this.wildcardable) { - // Transforms wildcard match into regular expression. - // A custom NFA with caching may be more sophisticated but not - // likely to be faster. - this.term = new RegExp( - `^${ - this.term.replace(/([.+^$[\]\\(){}|-])/g, '\\$1') - .replace(/([^\\]|[^\\](?:\\\\)+)\*/g, '$1.*') - .replace(/^(?:\\\\)*\*/g, '.*') - .replace(/([^\\]|[^\\](?:\\\\)+)\?/g, '$1.?') - .replace(/^(?:\\\\)*\?/g, '.?') - }$`, 'i' - ); - } - - // Update parse status flag to indicate the new properties are ready. - this.parsed = true; -}; - -SearchTerm.prototype._normalizeTerm = function() { - if (!this.wildcardable) { - return this.term.replace('"', '"'); - } - return this.term.replace(/\\([^*?])/g, '$1'); -}; - -SearchTerm.prototype.fuzzyMatch = function(targetStr) { - let targetDistance, - i, - j, - // Work vectors, representing the last three populated - // rows of the dynamic programming matrix of the iterative - // optimal string alignment calculation. - v0 = [], - v1 = [], - v2 = [], - temp; - - if (this.fuzz < 1.0) { - targetDistance = targetStr.length * (1.0 - this.fuzz); - } - else { - targetDistance = this.fuzz; - } - - const targetStrLower = targetStr.toLowerCase(); - - for (i = 0; i <= targetStrLower.length; i += 1) { - v1.push(i); - } - - for (i = 0; i < this.term.length; i += 1) { - v2[0] = i; - for (j = 0; j < targetStrLower.length; j += 1) { - const cost = this.term[i] === targetStrLower[j] ? 0 : 1; - v2[j + 1] = Math.min( - // Deletion. - v1[j + 1] + 1, - // Insertion. - v2[j] + 1, - // Substitution or No Change. - v1[j] + cost - ); - if (i > 1 && j > 1 && this.term[i] === targetStrLower[j - 1] && - targetStrLower[i - 1] === targetStrLower[j]) { - v2[j + 1] = Math.min(v2[j], v0[j - 1] + cost); - } - } - // Rotate dem vec pointers bra. - temp = v0; - v0 = v1; - v1 = v2; - v2 = temp; - } - - return v1[targetStrLower.length] <= targetDistance; -}; - -SearchTerm.prototype.exactMatch = function(targetStr) { - return this.term.toLowerCase() === targetStr.toLowerCase(); -}; - -SearchTerm.prototype.wildcardMatch = function(targetStr) { - return this.term.test(targetStr); -}; - -SearchTerm.prototype.interactionMatch = function(imageID, type, interaction, interactions) { - let ret = false; - - interactions.forEach(v => { - if (v.image_id === imageID && v.interaction_type === type && (interaction === null || v.value === interaction)) { - ret = true; - } - }); - - return ret; -}; - -SearchTerm.prototype.match = function(target) { - // eslint-disable-next-line @typescript-eslint/no-this-alias,consistent-this - const ohffs = this; - let ret = false, - compFunc, - numbuh, - date; - - if (!this.parsed) { - this.parse(); - } - - if (this.termType === 'literal') { - // Literal matching. - if (this.fuzz) { - compFunc = this.fuzzyMatch; - } - else if (this.wildcardable) { - compFunc = this.wildcardMatch; - } - else { - compFunc = this.exactMatch; - } - - if (this.termSpace === 'tags') { - target.getAttribute('data-image-tag-aliases').split(', ').every( - str => { - if (compFunc.call(ohffs, str)) { - ret = true; - return false; - } - return true; - } - ); - } - else { - ret = compFunc.call( - this, target.getAttribute(termSpaceToImageField[this.termSpace]) - ); - } - } - else if (this.termType === 'my' && window.booru.interactions.length > 0) { - // Should work with most my:conditions except watched. - switch (this.termSpace) { - case 'faves': - ret = this.interactionMatch(Number(target.getAttribute('data-image-id')), 'faved', null, window.booru.interactions); - - break; - case 'upvotes': - ret = this.interactionMatch(Number(target.getAttribute('data-image-id')), 'voted', 'up', window.booru.interactions); - - break; - case 'downvotes': - ret = this.interactionMatch(Number(target.getAttribute('data-image-id')), 'voted', 'down', window.booru.interactions); - - break; - default: - ret = false; // Other my: interactions aren't supported, return false to prevent them from triggering spoiler. - - break; - } - } - else if (this.termType === 'date') { - // Date matching. - date = new Date( - target.getAttribute(termSpaceToImageField[this.termSpace]) - ).getTime(); - - switch (this.compare) { - // The open-left, closed-right date range specified by the - // date/time format limits the types of comparisons that are - // done compared to numeric ranges. - case 'lt': - ret = this.term > date; - break; - case 'gte': - ret = this.term <= date; - break; - default: - ret = this.term[0] <= date && this.term[1] > date; - } - } - else { - // Range matching. - numbuh = parseFloat( - target.getAttribute(termSpaceToImageField[this.termSpace]) - ); - - if (isNaN(this.term)) { - ret = false; - } - else if (this.fuzz) { - ret = this.term <= numbuh + this.fuzz && - this.term + this.fuzz >= numbuh; - } - else { - switch (this.compare) { - case 'lt': - ret = this.term > numbuh; - break; - case 'gt': - ret = this.term < numbuh; - break; - case 'lte': - ret = this.term >= numbuh; - break; - case 'gte': - ret = this.term <= numbuh; - break; - default: - ret = this.term === numbuh; - } - } - } - - return ret; -}; - -function generateLexArray(searchStr) { - const opQueue = [], - groupNegate = [], - tokenStack = []; - let searchTerm = null, - boost = null, - fuzz = null, - lparenCtr = 0, - negate = false, - boostFuzzStr = '', - localSearchStr = searchStr; - - while (localSearchStr.length > 0) { - // eslint-disable-next-line no-loop-func - tokenList.every(tokenArr => { - const tokenName = tokenArr[0], - tokenRE = tokenArr[1]; - let match = tokenRE.exec(localSearchStr), - op; - - if (match) { - match = match[0]; - - if (Boolean(searchTerm) && ( - ['and_op', 'or_op'].indexOf(tokenName) !== -1 || - tokenName === 'rparen' && lparenCtr === 0)) { - // Set options. - searchTerm.boost = boost; - searchTerm.fuzz = fuzz; - // Push to stack. - tokenStack.push(searchTerm); - // Reset term and options data. - searchTerm = fuzz = boost = null; - boostFuzzStr = ''; - lparenCtr = 0; - - if (negate) { - tokenStack.push('not_op'); - negate = false; - } - } - - switch (tokenName) { - case 'and_op': - while (opQueue[0] === 'and_op') { - tokenStack.push(opQueue.shift()); - } - opQueue.unshift('and_op'); - break; - case 'or_op': - while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') { - tokenStack.push(opQueue.shift()); - } - opQueue.unshift('or_op'); - break; - case 'not_op': - if (searchTerm) { - // We're already inside a search term, so it does - // not apply, obv. - searchTerm.append(match); - } - else { - negate = !negate; - } - break; - case 'lparen': - if (searchTerm) { - // If we are inside the search term, do not error - // out just yet; instead, consider it as part of - // the search term, as a user convenience. - searchTerm.append(match); - lparenCtr += 1; - } - else { - opQueue.unshift('lparen'); - groupNegate.push(negate); - negate = false; - } - break; - case 'rparen': - if (lparenCtr > 0) { - if (searchTerm) { - searchTerm.append(match); - } - else { - searchTerm = new SearchTerm(match); - } - lparenCtr -= 1; - } - else { - while (opQueue.length) { - op = opQueue.shift(); - if (op === 'lparen') { - break; - } - tokenStack.push(op); - } - if (groupNegate.length > 0 && groupNegate.pop()) { - tokenStack.push('not_op'); - } - } - break; - case 'fuzz': - if (searchTerm) { - // For this and boost operations, we store the - // current match so far to a temporary string in - // case this is actually inside the term. - fuzz = parseFloat(match.substr(1)); - boostFuzzStr += match; - } - else { - searchTerm = new SearchTerm(match); - } - break; - case 'boost': - if (searchTerm) { - boost = match.substr(1); - boostFuzzStr += match; - } - else { - searchTerm = new SearchTerm(match); - } - break; - case 'quoted_lit': - if (searchTerm) { - searchTerm.append(match); - } - else { - searchTerm = new SearchTerm(match); - } - break; - case 'word': - if (searchTerm) { - if (fuzz || boost) { - boost = fuzz = null; - searchTerm.append(boostFuzzStr); - boostFuzzStr = ''; - } - searchTerm.append(match); - } - else { - searchTerm = new SearchTerm(match); - } - break; - default: - // Append extra spaces within search terms. - if (searchTerm) { - searchTerm.append(match); - } - } - - // Truncate string and restart the token tests. - localSearchStr = localSearchStr.substr( - match.length, localSearchStr.length - match.length - ); - - // Break since we have found a match. - return false; - } - - return true; - }); - } - - // Append final tokens to the stack, starting with the search term. - if (searchTerm) { - searchTerm.boost = boost; - searchTerm.fuzz = fuzz; - tokenStack.push(searchTerm); - } - if (negate) { - tokenStack.push('not_op'); - } - - if (opQueue.indexOf('rparen') !== -1 || - opQueue.indexOf('lparen') !== -1) { - throw new Error('Mismatched parentheses.'); - } - - // Memory-efficient concatenation of remaining operators queue to the - // token stack. - tokenStack.push.apply(tokenStack, opQueue); - - return tokenStack; -} - -function parseTokens(lexicalArray) { - const operandStack = []; - let negate, op1, op2; - lexicalArray.forEach((token, i) => { - if (token !== 'not_op') { - negate = lexicalArray[i + 1] === 'not_op'; - - if (typeof token === 'string') { - op2 = operandStack.pop(); - op1 = operandStack.pop(); - - if (typeof op1 === 'undefined' || typeof op2 === 'undefined') { - throw new Error('Missing operand.'); - } - - operandStack.push(new SearchAST(token, negate, op1, op2)); - } - else { - if (negate) { - operandStack.push(new SearchAST(null, true, token)); - } - else { - operandStack.push(token); - } - } - } - }); - - if (operandStack.length > 1) { - throw new Error('Missing operator.'); - } - - op1 = operandStack.pop(); - - if (typeof op1 === 'undefined') { - return new SearchAST(); - } - - if (isTerminal(op1)) { - return new SearchAST(null, false, op1); - } - - return op1; -} - -function parseSearch(searchStr) { - return parseTokens(generateLexArray(searchStr)); -} - -function isTerminal(operand) { - // Whether operand is a terminal SearchTerm. - return typeof operand.term !== 'undefined'; -} - -function SearchAST(op, negate, leftOperand, rightOperand) { - this.negate = Boolean(negate); - this.leftOperand = leftOperand || null; - this.op = op || null; - this.rightOperand = rightOperand || null; -} - -function combineOperands(ast1, ast2, parentAST) { - let localAst1; - if (parentAST.op === 'and_op') { - localAst1 = ast1 && ast2; - } - else { - localAst1 = ast1 || ast2; - } - - if (parentAST.negate) { - return !localAst1; - } - - return localAst1; -} - -// Evaluation of the AST in regard to a target image -SearchAST.prototype.hitsImage = function(image) { - const treeStack = []; - // Left side node. - // eslint-disable-next-line @typescript-eslint/no-this-alias,consistent-this - let ast1 = this, - // Right side node. - ast2, - // Parent node of the current subtree. - parentAST; - - // Build the initial tree node traversal stack, of the "far left" side. - // The general idea is to accumulate from the bottom and make stacks - // of right-hand subtrees that themselves accumulate upward. The left - // side node, ast1, will always be a Boolean representing the left-side - // evaluated value, up to the current subtree (parentAST). - while (!isTerminal(ast1)) { - treeStack.push(ast1); - ast1 = ast1.leftOperand; - - if (!ast1) { - // Empty tree. - return false; - } - } - - ast1 = ast1.match(image); - treeStack.push(ast1); - - while (treeStack.length > 0) { - parentAST = treeStack.pop(); - - if (parentAST === null) { - // We are at the end of a virtual stack for a right node - // subtree. We switch the result of this stack from left - // (ast1) to right (ast2), pop the original left node, - // and finally pop the parent subtree itself. See near the - // end of this function to view how this is populated. - ast2 = ast1; - ast1 = treeStack.pop(); - parentAST = treeStack.pop(); - } - else { - // First, check to see if we can do a short-circuit - // evaluation to skip evaluating the right side entirely. - if (!ast1 && parentAST.op === 'and_op') { - ast1 = parentAST.negate; - continue; - } - - if (ast1 && parentAST.op === 'or_op') { - ast1 = !parentAST.negate; - continue; - } - - // If we are not at the end of a stack, grab the right - // node. The left node (ast1) is currently a terminal Boolean. - ast2 = parentAST.rightOperand; - } - - if (typeof ast2 === 'boolean') { - ast1 = combineOperands(ast1, ast2, parentAST); - } - else if (!ast2) { - // A subtree with a single node. This is generally the case - // for negated tokens. - if (parentAST.negate) { - ast1 = !ast1; - } - } - else if (isTerminal(ast2)) { - // We are finally at a leaf and can evaluate. - ast2 = ast2.match(image); - ast1 = combineOperands(ast1, ast2, parentAST); - } - else { - // We are at a node whose right side is a new subtree. - // We will build a new "virtual" stack, but instead of - // building a new Array, we can insert a null object as a - // marker. - treeStack.push(parentAST, ast1, null); - - do { - treeStack.push(ast2); - ast2 = ast2.leftOperand; - } while (!isTerminal(ast2)); - - ast1 = ast2.match(image); - } - } - - return ast1; -}; - -SearchAST.prototype.dumpTree = function() { - // Dumps to string a simple diagram of the syntax tree structure - // (starting with this object as the root) for debugging purposes. - const retStrArr = [], - treeQueue = [['', this]]; - let treeArr, - prefix, - tree; - - while (treeQueue.length > 0) { - treeArr = treeQueue.shift(); - prefix = treeArr[0]; - tree = treeArr[1]; - - if (isTerminal(tree)) { - retStrArr.push(`${prefix}-> ${tree.term}`); - } - else { - if (tree.negate) { - retStrArr.push(`${prefix}+ NOT_OP`); - prefix += '\t'; - } - if (tree.op) { - retStrArr.push(`${prefix}+ ${tree.op.toUpperCase()}`); - prefix += '\t'; - treeQueue.unshift([prefix, tree.rightOperand]); - treeQueue.unshift([prefix, tree.leftOperand]); - } - else { - treeQueue.unshift([prefix, tree.leftOperand]); - } - } - } - - return retStrArr.join('\n'); -}; - -export default parseSearch; diff --git a/assets/js/match_query.ts b/assets/js/match_query.ts new file mode 100644 index 000000000..dd5071885 --- /dev/null +++ b/assets/js/match_query.ts @@ -0,0 +1,15 @@ +import { defaultMatcher } from './query/factory'; +import { generateLexArray } from './query/lex'; +import { parseTokens } from './query/parse'; +import { parseTerm } from './query/term'; + +function parseWithDefaultMatcher(term: string, fuzz: number) { + return parseTerm(term, fuzz, defaultMatcher); +} + +function parseSearch(query: string) { + const tokens = generateLexArray(query, parseWithDefaultMatcher); + return parseTokens(tokens); +} + +export default parseSearch; diff --git a/assets/js/query/__tests__/date.spec.ts b/assets/js/query/__tests__/date.spec.ts new file mode 100644 index 000000000..0c205d4d0 --- /dev/null +++ b/assets/js/query/__tests__/date.spec.ts @@ -0,0 +1,106 @@ +import { makeDateMatcher } from '../date'; + +function daysAgo(days: number) { + return new Date(Date.now() - days * 86400000).toISOString(); +} + +describe('Date parsing', () => { + it('should match relative dates (upper bound)', () => { + const matcher = makeDateMatcher('3 days ago', 'lte'); + + expect(matcher(daysAgo(4), 'created_at', 0)).toBe(true); + expect(matcher(daysAgo(2), 'created_at', 0)).toBe(false); + }); + + it('should match relative dates (lower bound)', () => { + const matcher = makeDateMatcher('3 days ago', 'gte'); + + expect(matcher(daysAgo(4), 'created_at', 0)).toBe(false); + expect(matcher(daysAgo(2), 'created_at', 0)).toBe(true); + }); + + it('should match absolute date ranges', () => { + const ltMatcher = makeDateMatcher('2025', 'lt'); + const gtMatcher = makeDateMatcher('2023', 'gt'); + + expect(ltMatcher(new Date(Date.UTC(2025, 5, 21)).toISOString(), 'created_at', 0)).toBe(false); + expect(ltMatcher(new Date(Date.UTC(2024, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(ltMatcher(new Date(Date.UTC(2023, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + + expect(gtMatcher(new Date(Date.UTC(2025, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(gtMatcher(new Date(Date.UTC(2024, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(gtMatcher(new Date(Date.UTC(2023, 5, 21)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through years', () => { + const matcher = makeDateMatcher('2024', 'eq'); + + expect(matcher(new Date(Date.UTC(2025, 5, 21)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2023, 5, 21)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through months', () => { + const matcher = makeDateMatcher('2024-06', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 6, 21)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 4, 21)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through days', () => { + const matcher = makeDateMatcher('2024-06-21', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 5, 22)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 5, 20)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through hours', () => { + const matcher = makeDateMatcher('2024-06-21T06', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 5, 21, 7)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 5)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through minutes', () => { + const matcher = makeDateMatcher('2024-06-21T06:21', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6, 22)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6, 21)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6, 20)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through seconds', () => { + const matcher = makeDateMatcher('2024-06-21T06:21:30Z', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6, 21, 31)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6, 21, 30)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 6, 21, 29)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through seconds with positive timezone offset', () => { + const matcher = makeDateMatcher('2024-06-21T06:21:30+01:30', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 5, 21, 4, 51, 31)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 4, 51, 30)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 4, 51, 29)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should match absolute dates through seconds with negative timezone offset', () => { + const matcher = makeDateMatcher('2024-06-21T06:21:30-01:30', 'eq'); + + expect(matcher(new Date(Date.UTC(2024, 5, 21, 7, 51, 31)).toISOString(), 'created_at', 0)).toBe(false); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 7, 51, 30)).toISOString(), 'created_at', 0)).toBe(true); + expect(matcher(new Date(Date.UTC(2024, 5, 21, 7, 51, 29)).toISOString(), 'created_at', 0)).toBe(false); + }); + + it('should not match malformed absolute date expressions', () => { + expect(() => makeDateMatcher('2024-06-21T06:21:30+01:3020', 'eq')).toThrow('Cannot parse date string: 2024-06-21T06:21:30+01:3020'); + }); + + it('should not match malformed relative date expressions', () => { + expect(() => makeDateMatcher('3 test failures ago', 'eq')).toThrow('Cannot parse date string: 3 test failures ago'); + }); +}); diff --git a/assets/js/query/__tests__/literal.spec.ts b/assets/js/query/__tests__/literal.spec.ts new file mode 100644 index 000000000..e5ea804bc --- /dev/null +++ b/assets/js/query/__tests__/literal.spec.ts @@ -0,0 +1,36 @@ +import { makeLiteralMatcher } from '../literal'; + +describe('Literal field parsing', () => { + it('should handle exact matching in arrayed fields', () => { + const matcher = makeLiteralMatcher('safe', 0, false); + expect(matcher('safe, solo', 'tags', 0)).toBe(true); + expect(matcher('solo', 'tags', 0)).toBe(false); + }); + + it('should handle exact matching in non-arrayed fields', () => { + const matcher = makeLiteralMatcher('safe', 0, false); + expect(matcher('safe, solo', 'description', 0)).toBe(false); + expect(matcher('safe', 'description', 0)).toBe(true); + expect(matcher('solo', 'description', 0)).toBe(false); + }); + + it('should handle fuzzy matching based on normalized edit distance', () => { + const matcher = makeLiteralMatcher('fluttersho', 0.8, false); + expect(matcher('fluttershy', 'tags', 0)).toBe(true); + expect(matcher('rarity', 'tags', 0)).toBe(false); + }); + + it('should handle fuzzy matching based on raw edit distance', () => { + const matcher = makeLiteralMatcher('fluttersho', 1, false); + expect(matcher('fluttershy', 'tags', 0)).toBe(true); + expect(matcher('rarity', 'tags', 0)).toBe(false); + }); + + it('should handle wildcard matching', () => { + const matcher = makeLiteralMatcher('fl?tter*', 0, true); + expect(matcher('fluttershy', 'tags', 0)).toBe(true); + expect(matcher('flitter', 'tags', 0)).toBe(true); + expect(matcher('rainbow dash', 'tags', 0)).toBe(false); + expect(matcher('gentle flutter', 'tags', 0)).toBe(false); + }); +}); diff --git a/assets/js/query/__tests__/number.spec.ts b/assets/js/query/__tests__/number.spec.ts new file mode 100644 index 000000000..da6e127bf --- /dev/null +++ b/assets/js/query/__tests__/number.spec.ts @@ -0,0 +1,53 @@ +import { makeNumberMatcher } from '../number'; + +describe('Number parsing', () => { + it('should match numbers directly', () => { + const intMatch = makeNumberMatcher(2067, 0, 'eq'); + + expect(intMatch('2066', 'value', 0)).toBe(false); + expect(intMatch('2067', 'value', 0)).toBe(true); + expect(intMatch('2068', 'value', 0)).toBe(false); + expect(intMatch('20677', 'value', 0)).toBe(false); + }); + + it('should match number ranges', () => { + const ltMatch = makeNumberMatcher(2067, 0, 'lt'); + const lteMatch = makeNumberMatcher(2067, 0, 'lte'); + const gtMatch = makeNumberMatcher(2067, 0, 'gt'); + const gteMatch = makeNumberMatcher(2067, 0, 'gte'); + + expect(ltMatch('2066', 'value', 0)).toBe(true); + expect(ltMatch('2067', 'value', 0)).toBe(false); + expect(ltMatch('2068', 'value', 0)).toBe(false); + expect(lteMatch('2066', 'value', 0)).toBe(true); + expect(lteMatch('2067', 'value', 0)).toBe(true); + expect(lteMatch('2068', 'value', 0)).toBe(false); + expect(gtMatch('2066', 'value', 0)).toBe(false); + expect(gtMatch('2067', 'value', 0)).toBe(false); + expect(gtMatch('2068', 'value', 0)).toBe(true); + expect(gteMatch('2066', 'value', 0)).toBe(false); + expect(gteMatch('2067', 'value', 0)).toBe(true); + expect(gteMatch('2068', 'value', 0)).toBe(true); + }); + + it('should not match unparsed values', () => { + const matcher = makeNumberMatcher(2067, 0, 'eq'); + + expect(matcher('NaN', 'value', 0)).toBe(false); + expect(matcher('test', 'value', 0)).toBe(false); + }); + + it('should interpret fuzz as an inclusive range around the value', () => { + const matcher = makeNumberMatcher(2067, 3, 'eq'); + + expect(matcher('2063', 'value', 0)).toBe(false); + expect(matcher('2064', 'value', 0)).toBe(true); + expect(matcher('2065', 'value', 0)).toBe(true); + expect(matcher('2066', 'value', 0)).toBe(true); + expect(matcher('2067', 'value', 0)).toBe(true); + expect(matcher('2068', 'value', 0)).toBe(true); + expect(matcher('2069', 'value', 0)).toBe(true); + expect(matcher('2070', 'value', 0)).toBe(true); + expect(matcher('2071', 'value', 0)).toBe(false); + }); +}); diff --git a/assets/js/query/__tests__/user.spec.ts b/assets/js/query/__tests__/user.spec.ts new file mode 100644 index 000000000..52545d0c8 --- /dev/null +++ b/assets/js/query/__tests__/user.spec.ts @@ -0,0 +1,50 @@ +import { makeUserMatcher } from '../user'; + +describe('User field parsing', () => { + beforeEach(() => { + /* eslint-disable camelcase */ + window.booru.interactions = [ + {image_id: 0, user_id: 0, interaction_type: 'faved', value: null}, + {image_id: 0, user_id: 0, interaction_type: 'voted', value: 'up'}, + {image_id: 1, user_id: 0, interaction_type: 'voted', value: 'down'}, + {image_id: 2, user_id: 0, interaction_type: 'hidden', value: null}, + ]; + /* eslint-enable camelcase */ + }); + + it('should parse my:faves', () => { + const matcher = makeUserMatcher('faves'); + + expect(matcher('', 'my', 0)).toBe(true); + expect(matcher('', 'my', 1)).toBe(false); + expect(matcher('', 'my', 2)).toBe(false); + }); + + it('should parse my:upvotes', () => { + const matcher = makeUserMatcher('upvotes'); + + expect(matcher('', 'my', 0)).toBe(true); + expect(matcher('', 'my', 1)).toBe(false); + expect(matcher('', 'my', 2)).toBe(false); + }); + + it('should parse my:downvotes', () => { + const matcher = makeUserMatcher('downvotes'); + + expect(matcher('', 'my', 0)).toBe(false); + expect(matcher('', 'my', 1)).toBe(true); + expect(matcher('', 'my', 2)).toBe(false); + }); + + it('should not parse other my: fields', () => { + const hiddenMatcher = makeUserMatcher('hidden'); + const watchedMatcher = makeUserMatcher('watched'); + + expect(hiddenMatcher('', 'my', 0)).toBe(false); + expect(hiddenMatcher('', 'my', 1)).toBe(false); + expect(hiddenMatcher('', 'my', 2)).toBe(false); + expect(watchedMatcher('', 'my', 0)).toBe(false); + expect(watchedMatcher('', 'my', 1)).toBe(false); + expect(watchedMatcher('', 'my', 2)).toBe(false); + }); +}); diff --git a/assets/js/query/boolean.ts b/assets/js/query/boolean.ts new file mode 100644 index 000000000..e46d77b5b --- /dev/null +++ b/assets/js/query/boolean.ts @@ -0,0 +1,35 @@ +import { AstMatcher } from './types'; + +export function matchAny(...matchers: AstMatcher[]): AstMatcher { + return (e: HTMLElement) => { + for (const matcher of matchers) { + if (matcher(e)) { + return true; + } + } + return false; + }; +} + +export function matchAll(...matchers: AstMatcher[]): AstMatcher { + return (e: HTMLElement) => { + for (const matcher of matchers) { + if (!matcher(e)) { + return false; + } + } + return true; + }; +} + +export function matchNot(matcher: AstMatcher): AstMatcher { + return (e: HTMLElement) => { + return !matcher(e); + }; +} + +export function matchNone(): AstMatcher { + return () => { + return false; + }; +} diff --git a/assets/js/query/date.ts b/assets/js/query/date.ts new file mode 100644 index 000000000..b77b8e7c2 --- /dev/null +++ b/assets/js/query/date.ts @@ -0,0 +1,144 @@ +import { FieldMatcher, RangeEqualQualifier } from './types'; + +type Year = number; +type Month = number; +type Day = number; +type Hours = number; +type Minutes = number; +type Seconds = number; +type AbsoluteDate = [Year, Month, Day, Hours, Minutes, Seconds]; +type TimeZoneOffset = [Hours, Minutes]; +type PosixTimeMs = number; + +function makeMatcher(bottomDate: PosixTimeMs, topDate: PosixTimeMs, qual: RangeEqualQualifier): FieldMatcher { + // The open-left, closed-right date range specified by the + // date/time format limits the types of comparisons that are + // done compared to numeric ranges. + switch (qual) { + case 'lte': + return v => new Date(v).getTime() < topDate; + case 'gte': + return v => new Date(v).getTime() >= bottomDate; + case 'lt': + return v => new Date(v).getTime() < bottomDate; + case 'gt': + return v => new Date(v).getTime() >= topDate; + case 'eq': + default: + return v => { + const t = new Date(v).getTime(); + return t >= bottomDate && t < topDate; + }; + } +} + +function makeRelativeDateMatcher(dateVal: string, qual: RangeEqualQualifier): FieldMatcher { + const match = /(\d+) (second|minute|hour|day|week|month|year)s? ago/.exec(dateVal); + const bounds: Record = { + second: 1000, + minute: 60000, + hour: 3600000, + day: 86400000, + week: 604800000, + month: 2592000000, + year: 31536000000 + }; + + if (match) { + const amount = parseInt(match[1], 10); + const scale = bounds[match[2]]; + + const now = new Date().getTime(); + const bottomDate = new Date(now - amount * scale).getTime(); + const topDate = new Date(now - (amount - 1) * scale).getTime(); + + return makeMatcher(bottomDate, topDate, qual); + } + + throw new Error(`Cannot parse date string: ${dateVal}`); +} + +function makeAbsoluteDateMatcher(dateVal: string, qual: RangeEqualQualifier): FieldMatcher { + const parseRes: RegExp[] = [ + /^(\d{4})/, + /^-(\d{2})/, + /^-(\d{2})/, + /^(?:\s+|T|t)(\d{2})/, + /^:(\d{2})/, + /^:(\d{2})/ + ]; + const timeZoneOffset: TimeZoneOffset = [0, 0]; + const timeData: AbsoluteDate = [0, 0, 1, 0, 0, 0]; + + const origDateVal: string = dateVal; + let localDateVal = origDateVal; + + const offsetMatch = /([+-])(\d{2}):(\d{2})$/.exec(localDateVal); + if (offsetMatch) { + timeZoneOffset[0] = parseInt(offsetMatch[2], 10); + timeZoneOffset[1] = parseInt(offsetMatch[3], 10); + if (offsetMatch[1] === '-') { + timeZoneOffset[0] *= -1; + timeZoneOffset[1] *= -1; + } + localDateVal = localDateVal.substr(0, localDateVal.length - 6); + } + else { + localDateVal = localDateVal.replace(/[Zz]$/, ''); + } + + let matchIndex = 0; + for (; matchIndex < parseRes.length; matchIndex += 1) { + if (localDateVal.length === 0) { + break; + } + + const componentMatch = parseRes[matchIndex].exec(localDateVal); + if (componentMatch) { + if (matchIndex === 1) { + // Months are offset by 1. + timeData[matchIndex] = parseInt(componentMatch[1], 10) - 1; + } + else { + // All other components are not offset. + timeData[matchIndex] = parseInt(componentMatch[1], 10); + } + + // Slice string. + localDateVal = localDateVal.substr( + componentMatch[0].length, localDateVal.length - componentMatch[0].length + ); + } + else { + throw new Error(`Cannot parse date string: ${origDateVal}`); + } + } + + if (localDateVal.length > 0) { + throw new Error(`Cannot parse date string: ${origDateVal}`); + } + + // Apply the user-specified time zone offset. The JS Date constructor + // is very flexible here. + timeData[3] -= timeZoneOffset[0]; + timeData[4] -= timeZoneOffset[1]; + + const asPosix = (data: AbsoluteDate) => { + return new Date(Date.UTC.apply(Date, data)).getTime(); + }; + + const bottomDate = asPosix(timeData); + timeData[matchIndex - 1] += 1; + const topDate = asPosix(timeData); + + return makeMatcher(bottomDate, topDate, qual); +} + +export function makeDateMatcher(dateVal: string, qual: RangeEqualQualifier): FieldMatcher { + try { + return makeAbsoluteDateMatcher(dateVal, qual); + } + catch (_) { + return makeRelativeDateMatcher(dateVal, qual); + } +} diff --git a/assets/js/query/factory.ts b/assets/js/query/factory.ts new file mode 100644 index 000000000..d8e67df27 --- /dev/null +++ b/assets/js/query/factory.ts @@ -0,0 +1,20 @@ +import { makeDateMatcher } from './date'; +import { makeLiteralMatcher } from './literal'; +import { makeNumberMatcher } from './number'; +import { makeUserMatcher } from './user'; + +import { FieldMatcher, RangeEqualQualifier } from './types'; + +export interface MatcherFactory { + makeDateMatcher: (dateVal: string, qual: RangeEqualQualifier) => FieldMatcher, + makeLiteralMatcher: (term: string, fuzz: number, wildcardable: boolean) => FieldMatcher, + makeNumberMatcher: (term: number, fuzz: number, qual: RangeEqualQualifier) => FieldMatcher, + makeUserMatcher: (term: string) => FieldMatcher +} + +export const defaultMatcher: MatcherFactory = { + makeDateMatcher, + makeLiteralMatcher, + makeNumberMatcher, + makeUserMatcher, +}; diff --git a/assets/js/query/fields.ts b/assets/js/query/fields.ts new file mode 100644 index 000000000..b675be465 --- /dev/null +++ b/assets/js/query/fields.ts @@ -0,0 +1,39 @@ +import { FieldName } from './types'; + +type AttributeName = string; + +export const numberFields: FieldName[] = + ['id', 'width', 'height', 'aspect_ratio', + 'comment_count', 'score', 'upvotes', 'downvotes', + 'faves', 'tag_count']; + +export const dateFields: FieldName[] = ['created_at']; + +export const literalFields = + ['tags', 'orig_sha512_hash', 'sha512_hash', + 'score', 'uploader', 'source_url', 'description']; + +export const termSpaceToImageField: Record = { + tags: 'data-image-tag-aliases', + score: 'data-score', + upvotes: 'data-upvotes', + downvotes: 'data-downvotes', + uploader: 'data-uploader', + // Yeah, I don't think this is reasonably supportable. + // faved_by: 'data-faved-by', + id: 'data-image-id', + width: 'data-width', + height: 'data-height', + /* eslint-disable camelcase */ + aspect_ratio: 'data-aspect-ratio', + comment_count: 'data-comment-count', + tag_count: 'data-tag-count', + source_url: 'data-source-url', + faves: 'data-faves', + sha512_hash: 'data-sha512', + orig_sha512_hash: 'data-orig-sha512', + created_at: 'data-created-at' + /* eslint-enable camelcase */ +}; + +export const defaultField = 'tags'; diff --git a/assets/js/query/lex.ts b/assets/js/query/lex.ts new file mode 100644 index 000000000..49863f8ad --- /dev/null +++ b/assets/js/query/lex.ts @@ -0,0 +1,199 @@ +import { assertNotUndefined } from '../utils/null'; +import { AstMatcher, TokenList } from './types'; + +type TokenName = string; +type Token = [TokenName, RegExp]; + +const tokenList: Token[] = [ + ['fuzz', /^~(?:\d+(\.\d+)?|\.\d+)/], + ['boost', /^\^[-+]?\d+(\.\d+)?/], + ['quoted_lit', /^\s*"(?:[^"]|\\")+"/], + ['lparen', /^\s*\(\s*/], + ['rparen', /^\s*\)\s*/], + ['and_op', /^\s*(?:&&|AND)\s+/], + ['and_op', /^\s*,\s*/], + ['or_op', /^\s*(?:\|\||OR)\s+/], + ['not_op', /^\s*NOT(?:\s+|(?=\())/], + ['not_op', /^\s*[!-]\s*/], + ['space', /^\s+/], + ['word', /^(?:\\[\s,()^~]|[^\s,()^~])+/], + ['word', /^(?:\\[\s,()]|[^\s,()])+/] +]; + +export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher; + +export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList { + const opQueue: string[] = [], + groupNegate: boolean[] = [], + tokenStack: TokenList = []; + + let searchTerm: string | null = null; + let boostFuzzStr = ''; + let localSearchStr: string = searchStr; + let negate = false; + let boost = 1; + let fuzz = 0; + let lparenCtr = 0; + + const pushTerm = () => { + if (searchTerm !== null) { + // Push to stack. + tokenStack.push(parseTerm(searchTerm, fuzz, boost)); + // Reset term and options data. + boost = 1; + fuzz = 0; + searchTerm = null; + boostFuzzStr = ''; + lparenCtr = 0; + } + + if (negate) { + tokenStack.push('not_op'); + negate = false; + } + }; + + while (localSearchStr.length > 0) { + for (const [tokenName, tokenRe] of tokenList) { + const match = tokenRe.exec(localSearchStr); + + if (!match) { + continue; + } + + const token = match[0]; + + if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) { + pushTerm(); + } + + switch (tokenName) { + case 'and_op': + while (opQueue[0] === 'and_op') { + tokenStack.push(assertNotUndefined(opQueue.shift())); + } + opQueue.unshift('and_op'); + break; + case 'or_op': + while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') { + tokenStack.push(assertNotUndefined(opQueue.shift())); + } + opQueue.unshift('or_op'); + break; + case 'not_op': + if (searchTerm) { + // We're already inside a search term, so it does not apply, obv. + searchTerm += token; + } + else { + negate = !negate; + } + break; + case 'lparen': + if (searchTerm) { + // If we are inside the search term, do not error out just yet; + // instead, consider it as part of the search term, as a user convenience. + searchTerm += token; + lparenCtr += 1; + } + else { + opQueue.unshift('lparen'); + groupNegate.push(negate); + negate = false; + } + break; + case 'rparen': + if (lparenCtr > 0) { + if (searchTerm) { + searchTerm += token; + } + else { + searchTerm = token; + } + lparenCtr -= 1; + } + else { + while (opQueue.length > 0) { + const op = assertNotUndefined(opQueue.shift()); + if (op === 'lparen') { + break; + } + tokenStack.push(op); + } + if (groupNegate.length > 0 && groupNegate.pop()) { + tokenStack.push('not_op'); + } + } + break; + case 'fuzz': + if (searchTerm) { + // For this and boost operations, we store the current match so far + // to a temporary string in case this is actually inside the term. + fuzz = parseFloat(token.substr(1)); + boostFuzzStr += token; + } + else { + searchTerm = token; + } + break; + case 'boost': + if (searchTerm) { + boost = parseFloat(token.substr(1)); + boostFuzzStr += token; + } + else { + searchTerm = token; + } + break; + case 'quoted_lit': + if (searchTerm) { + searchTerm += token; + } + else { + searchTerm = token; + } + break; + case 'word': + if (searchTerm) { + if (fuzz !== 0 || boost !== 1) { + boost = 1; + fuzz = 0; + searchTerm += boostFuzzStr; + boostFuzzStr = ''; + } + searchTerm += token; + } + else { + searchTerm = token; + } + break; + default: + // Append extra spaces within search terms. + if (searchTerm) { + searchTerm += token; + } + } + + // Truncate string and restart the token tests. + localSearchStr = localSearchStr.substr( + token.length, localSearchStr.length - token.length + ); + + // Break since we have found a match. + break; + } + } + + // Append final tokens to the stack. + pushTerm(); + + if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) { + throw new Error('Mismatched parentheses.'); + } + + // Memory-efficient concatenation of remaining operators queue to the + // token stack. + tokenStack.push.apply(tokenStack, opQueue); + + return tokenStack; +} diff --git a/assets/js/query/literal.ts b/assets/js/query/literal.ts new file mode 100644 index 000000000..76bfd54cc --- /dev/null +++ b/assets/js/query/literal.ts @@ -0,0 +1,113 @@ +import { FieldMatcher } from './types'; + +function extractValues(v: string, name: string) { + return name === 'tags' ? v.split(', ') : [v]; +} + +function makeExactMatcher(term: string): FieldMatcher { + return (v, name) => { + const values = extractValues(v, name); + + for (const val of values) { + if (val.toLowerCase() === term.toLowerCase()) { + return true; + } + } + + return false; + }; +} + +function makeWildcardMatcher(term: string): FieldMatcher { + // Transforms wildcard match into regular expression. + // A custom NFA with caching may be more sophisticated but not + // likely to be faster. + const wildcard = new RegExp( + `^${term.replace(/([.+^$[\]\\(){}|-])/g, '\\$1') + .replace(/([^\\]|[^\\](?:\\\\)+)\*/g, '$1.*') + .replace(/^(?:\\\\)*\*/g, '.*') + .replace(/([^\\]|[^\\](?:\\\\)+)\?/g, '$1.?') + .replace(/^(?:\\\\)*\?/g, '.?')}$`, 'i' + ); + + return (v, name) => { + const values = extractValues(v, name); + + for (const val of values) { + if (wildcard.test(val)) { + return true; + } + } + + return false; + }; +} + +function fuzzyMatch(term: string, targetStr: string, fuzz: number): boolean { + const targetDistance = fuzz < 1.0 ? targetStr.length * (1.0 - fuzz) : fuzz; + const targetStrLower = targetStr.toLowerCase(); + + // Work vectors, representing the last three populated + // rows of the dynamic programming matrix of the iterative + // optimal string alignment calculation. + let v0: number[] = []; + let v1: number[] = []; + let v2: number[] = []; + let temp: number[]; + + for (let i = 0; i <= targetStrLower.length; i += 1) { + v1.push(i); + } + + for (let i = 0; i < term.length; i += 1) { + v2[0] = i; + for (let j = 0; j < targetStrLower.length; j += 1) { + const cost = term[i] === targetStrLower[j] ? 0 : 1; + v2[j + 1] = Math.min( + // Deletion. + v1[j + 1] + 1, + // Insertion. + v2[j] + 1, + // Substitution or No Change. + v1[j] + cost + ); + if (i > 1 && j > 1 && term[i] === targetStrLower[j - 1] && + targetStrLower[i - 1] === targetStrLower[j]) { + v2[j + 1] = Math.min(v2[j], v0[j - 1] + cost); + } + } + // Rotate dem vec pointers bra. + temp = v0; + v0 = v1; + v1 = v2; + v2 = temp; + } + + return v1[targetStrLower.length] <= targetDistance; +} + +function makeFuzzyMatcher(term: string, fuzz: number): FieldMatcher { + return (v, name) => { + const values = extractValues(v, name); + + for (const val of values) { + if (fuzzyMatch(term, val, fuzz)) { + return true; + } + } + + return false; + }; +} + +export function makeLiteralMatcher(term: string, fuzz: number, wildcardable: boolean): FieldMatcher { + if (fuzz === 0 && !wildcardable) { + return makeExactMatcher(term); + } + + if (!wildcardable) { + return makeFuzzyMatcher(term, fuzz); + } + + return makeWildcardMatcher(term); +} diff --git a/assets/js/query/number.ts b/assets/js/query/number.ts new file mode 100644 index 000000000..8c42db300 --- /dev/null +++ b/assets/js/query/number.ts @@ -0,0 +1,30 @@ +import { FieldMatcher, RangeEqualQualifier } from './types'; + +export function makeNumberMatcher(term: number, fuzz: number, qual: RangeEqualQualifier): FieldMatcher { + // Range matching. + return v => { + const attrVal = parseFloat(v); + + if (isNaN(attrVal)) { + return false; + } + + if (fuzz !== 0) { + return term - fuzz <= attrVal && term + fuzz >= attrVal; + } + + switch (qual) { + case 'lt': + return attrVal < term; + case 'gt': + return attrVal > term; + case 'lte': + return attrVal <= term; + case 'gte': + return attrVal >= term; + case 'eq': + default: + return attrVal === term; + } + }; +} diff --git a/assets/js/query/parse.ts b/assets/js/query/parse.ts new file mode 100644 index 000000000..cf47c7b16 --- /dev/null +++ b/assets/js/query/parse.ts @@ -0,0 +1,52 @@ +import { matchAll, matchAny, matchNone, matchNot } from './boolean'; +import { AstMatcher, TokenList } from './types'; + +export function parseTokens(lexicalArray: TokenList): AstMatcher { + const operandStack: AstMatcher[] = []; + + lexicalArray.forEach((token, i) => { + if (token === 'not_op') { + return; + } + + let intermediate: AstMatcher; + + if (typeof token === 'string') { + const op2 = operandStack.pop(); + const op1 = operandStack.pop(); + + if (typeof op1 === 'undefined' || typeof op2 === 'undefined') { + throw new Error('Missing operand.'); + } + + if (token === 'and_op') { + intermediate = matchAll(op1, op2); + } + else { + intermediate = matchAny(op1, op2); + } + } + else { + intermediate = token; + } + + if (lexicalArray[i + 1] === 'not_op') { + operandStack.push(matchNot(intermediate)); + } + else { + operandStack.push(intermediate); + } + }); + + if (operandStack.length > 1) { + throw new Error('Missing operator.'); + } + + const op1 = operandStack.pop(); + + if (typeof op1 === 'undefined') { + return matchNone(); + } + + return op1; +} diff --git a/assets/js/query/term.ts b/assets/js/query/term.ts new file mode 100644 index 000000000..8c42c55a8 --- /dev/null +++ b/assets/js/query/term.ts @@ -0,0 +1,90 @@ +import { MatcherFactory } from './factory'; + +import { numberFields, dateFields, literalFields, termSpaceToImageField, defaultField } from './fields'; +import { FieldName, FieldMatcher, RangeEqualQualifier, TermType, AstMatcher } from './types'; + +type RangeInfo = [FieldName, RangeEqualQualifier, TermType]; + +function normalizeTerm(term: string, wildcard: boolean) { + if (!wildcard) { + return term.replace('"', '"'); + } + return term.replace(/\\([^*?])/g, '$1'); +} + +function parseRangeField(field: string): RangeInfo | null { + if (numberFields.indexOf(field) !== -1) { + return [field, 'eq', 'number']; + } + + if (dateFields.indexOf(field) !== -1) { + return [field, 'eq', 'date']; + } + + const qual = /^(\w+)\.([lg]te?|eq)$/.exec(field); + + if (qual) { + const fieldName: FieldName = qual[1]; + const rangeQual = qual[2] as RangeEqualQualifier; + + if (numberFields.indexOf(fieldName) !== -1) { + return [fieldName, rangeQual, 'number']; + } + + if (dateFields.indexOf(fieldName) !== -1) { + return [fieldName, rangeQual, 'date']; + } + } + + return null; +} + +function makeTermMatcher(term: string, fuzz: number, factory: MatcherFactory): [FieldName, FieldMatcher] { + let rangeParsing, candidateTermSpace, termCandidate; + let localTerm = term; + const wildcardable = fuzz === 0 && !/^"([^"]|\\")+"$/.test(localTerm); + + if (!wildcardable && !fuzz) { + localTerm = localTerm.substr(1, localTerm.length - 2); + } + + localTerm = normalizeTerm(localTerm, wildcardable); + + // N.B.: For the purposes of this parser, boosting effects are ignored. + const matchArr = localTerm.split(':'); + + if (matchArr.length > 1) { + candidateTermSpace = matchArr[0]; + termCandidate = matchArr.slice(1).join(':'); + rangeParsing = parseRangeField(candidateTermSpace); + + if (rangeParsing) { + const [fieldName, rangeType, fieldType] = rangeParsing; + + if (fieldType === 'date') { + return [fieldName, factory.makeDateMatcher(termCandidate, rangeType)]; + } + + return [fieldName, factory.makeNumberMatcher(parseFloat(termCandidate), fuzz, rangeType)]; + + } + else if (literalFields.indexOf(candidateTermSpace) !== -1) { + return [candidateTermSpace, factory.makeLiteralMatcher(localTerm, fuzz, wildcardable)]; + } + else if (candidateTermSpace === 'my') { + return [candidateTermSpace, factory.makeUserMatcher(termCandidate)]; + } + } + + return [defaultField, factory.makeLiteralMatcher(localTerm, fuzz, wildcardable)]; +} + +export function parseTerm(term: string, fuzz: number, factory: MatcherFactory): AstMatcher { + const [fieldName, matcher] = makeTermMatcher(term, fuzz, factory); + + return (e: HTMLElement) => { + const value = e.getAttribute(termSpaceToImageField[fieldName]) || ''; + const documentId = parseInt(e.getAttribute(termSpaceToImageField.id) || '0', 10); + return matcher(value, fieldName, documentId); + }; +} diff --git a/assets/js/query/types.ts b/assets/js/query/types.ts new file mode 100644 index 000000000..8277783f0 --- /dev/null +++ b/assets/js/query/types.ts @@ -0,0 +1,10 @@ +export type TermType = 'number' | 'date' | 'literal' | 'my'; +export type RangeQualifier = 'gt' | 'gte' | 'lt' | 'lte'; +export type RangeEqualQualifier = RangeQualifier | 'eq'; + +export type FieldValue = string; +export type FieldName = string; +export type FieldMatcher = (value: FieldValue, name: FieldName, documentId: number) => boolean; + +export type AstMatcher = (e: HTMLElement) => boolean; +export type TokenList = (string | AstMatcher)[]; diff --git a/assets/js/query/user.ts b/assets/js/query/user.ts new file mode 100644 index 000000000..12fc01300 --- /dev/null +++ b/assets/js/query/user.ts @@ -0,0 +1,31 @@ +import { Interaction, InteractionType, InteractionValue } from '../../types/booru-object'; +import { FieldMatcher } from './types'; + +function interactionMatch(imageId: number, type: InteractionType, value: InteractionValue, interactions: Interaction[]): boolean { + for (const v of interactions) { + if (v.image_id === imageId && v.interaction_type === type && (value === null || v.value === value)) { + return true; + } + } + + return false; +} + +export function makeUserMatcher(term: string): FieldMatcher { + // Should work with most my:conditions except watched. + return (value, field, documentId) => { + switch (term) { + case 'faves': + return interactionMatch(documentId, 'faved', null, window.booru.interactions); + case 'upvotes': + return interactionMatch(documentId, 'voted', 'up', window.booru.interactions); + case 'downvotes': + return interactionMatch(documentId, 'voted', 'down', window.booru.interactions); + case 'watched': + case 'hidden': + default: + // Other my: interactions aren't supported, return false to prevent them from triggering spoiler. + return false; + } + }; +} diff --git a/assets/js/utils/__tests__/image.spec.ts b/assets/js/utils/__tests__/image.spec.ts index 40d03a5a7..6aa89790e 100644 --- a/assets/js/utils/__tests__/image.spec.ts +++ b/assets/js/utils/__tests__/image.spec.ts @@ -3,6 +3,7 @@ import { getRandomArrayItem } from '../../../test/randomness'; import { mockStorage } from '../../../test/mock-storage'; import { createEvent, fireEvent } from '@testing-library/dom'; import { EventType } from '@testing-library/dom/types/events'; +import { SpoilerType } from '../../../types/booru-object'; describe('Image utils', () => { const hiddenClass = 'hidden'; diff --git a/assets/js/utils/__tests__/null.spec.ts b/assets/js/utils/__tests__/null.spec.ts new file mode 100644 index 000000000..c195bb3d3 --- /dev/null +++ b/assets/js/utils/__tests__/null.spec.ts @@ -0,0 +1,25 @@ +import { assertNotNull, assertNotUndefined } from '../null'; + +describe('Null type utilities', () => { + describe('assertNotNull', () => { + it('should return non-null values', () => { + expect(assertNotNull(1)).toEqual(1); + expect(assertNotNull('anything')).toEqual('anything'); + }); + + it('should throw when passed a null value', () => { + expect(() => assertNotNull(null)).toThrow('Expected non-null value'); + }); + }); + + describe('assertNotUndefined', () => { + it('should return non-undefined values', () => { + expect(assertNotUndefined(1)).toEqual(1); + expect(assertNotUndefined('anything')).toEqual('anything'); + }); + + it('should throw when passed an undefined value', () => { + expect(() => assertNotUndefined(undefined)).toThrow('Expected non-undefined value'); + }); + }); +}); diff --git a/assets/js/utils/__tests__/tag.spec.ts b/assets/js/utils/__tests__/tag.spec.ts index 9f2158c19..44bc565fd 100644 --- a/assets/js/utils/__tests__/tag.spec.ts +++ b/assets/js/utils/__tests__/tag.spec.ts @@ -2,6 +2,7 @@ import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHi import { mockStorage } from '../../../test/mock-storage'; import { getRandomArrayItem } from '../../../test/randomness'; import parseSearch from '../../match_query'; +import { SpoilerType } from '../../../types/booru-object'; describe('Tag utilities', () => { const tagStorageKeyPrefix = 'bor_tags_'; diff --git a/assets/js/utils/null.ts b/assets/js/utils/null.ts new file mode 100644 index 000000000..b70fc18b0 --- /dev/null +++ b/assets/js/utils/null.ts @@ -0,0 +1,16 @@ +export function assertNotNull(value: T | null): T { + if (value === null) { + throw new Error('Expected non-null value'); + } + + return value; +} + +export function assertNotUndefined(value: T | undefined): T { + // eslint-disable-next-line no-undefined + if (value === undefined) { + throw new Error('Expected non-undefined value'); + } + + return value; +} diff --git a/assets/js/utils/tag.ts b/assets/js/utils/tag.ts index 8dc932b71..036d1d553 100644 --- a/assets/js/utils/tag.ts +++ b/assets/js/utils/tag.ts @@ -1,5 +1,6 @@ import { escapeHtml } from './dom'; import { getTag } from '../booru'; +import { AstMatcher } from 'query/types'; export interface TagData { id: number; @@ -51,8 +52,8 @@ export function imageHitsTags(img: HTMLImageElement, matchTags: TagData[]): TagD return matchTags.filter(t => imageTags.indexOf(t.id) !== -1); } -export function imageHitsComplex(img: HTMLImageElement, matchComplex: { hitsImage: (img: HTMLImageElement) => boolean }) { - return matchComplex.hitsImage(img); +export function imageHitsComplex(img: HTMLImageElement, matchComplex: AstMatcher) { + return matchComplex(img); } export function displayTags(tags: TagData[]): string { diff --git a/assets/test/jest-setup.ts b/assets/test/jest-setup.ts index 1f7fb536c..82c545f8f 100644 --- a/assets/test/jest-setup.ts +++ b/assets/test/jest-setup.ts @@ -1,11 +1,5 @@ import '@testing-library/jest-dom'; - -const blankFilter = { - leftOperand: null, - negate: false, - op: null, - rightOperand: null, -}; +import { matchNone } from '../js/query/boolean'; window.booru = { csrfToken: 'mockCsrfToken', @@ -18,7 +12,8 @@ window.booru = { userCanEditFilter: false, userIsSignedIn: false, watchedTagList: [], - hiddenFilter: blankFilter, - spoileredFilter: blankFilter, + hiddenFilter: matchNone(), + spoileredFilter: matchNone(), + interactions: [], tagsVersion: 5 }; diff --git a/assets/types/booru-object.d.ts b/assets/types/booru-object.d.ts index 979080b70..b4aead083 100644 --- a/assets/types/booru-object.d.ts +++ b/assets/types/booru-object.d.ts @@ -1,5 +1,17 @@ +import { AstMatcher } from 'query/types'; + type SpoilerType = 'click' | 'hover' | 'static' | 'off'; +type InteractionType = 'voted' | 'faved' | 'hidden'; +type InteractionValue = 'up' | 'down' | null; + +interface Interaction { + image_id: number; + user_id: number; + interaction_type: InteractionType; + value: 'up' | 'down' | null; +} + interface BooruObject { csrfToken: string; /** @@ -36,24 +48,20 @@ interface BooruObject { */ userCanEditFilter: boolean; /** - * SearchAST instance for hidden tags, converted from raw AST data in {@see import('../js/booru.js')} + * AST matcher instance for filter hidden query * - * TODO Properly type after TypeScript migration - * - * @type {import('../js/match_query.js').SearchAST} */ - hiddenFilter: unknown; + hiddenFilter: AstMatcher; /** - * SearchAST instance for spoilered tags, converted from raw AST data in {@see import('../js/booru.js')} - * - * TODO Properly type after TypeScript migration - * - * @type {import('../js/match_query.js').SearchAST} + * AST matcher instance for filter spoilered query */ - spoileredFilter: unknown; + spoileredFilter: AstMatcher; tagsVersion: number; + interactions: Interaction[]; } -interface Window { - booru: BooruObject; +declare global { + interface Window { + booru: BooruObject; + } }