Skip to content

Commit

Permalink
Add support for must clauses to custom query columns, closes #4
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Aug 4, 2020
1 parent 2e2f68a commit 783cfb4
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/python/paperai/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,23 @@ def __call__(self, uid, queue):
# Build question-context pairs
names, questions, contexts, snippets = [], [], [], []
for name, query, question, snippet in queue:
# Get list of required tokens
must = [token.strip("+") for token in query.split() if token.startswith("+")]

# Tokenize search query
query = Tokenizer.tokenize(query)

# List of matches
matches = []

scores = self.embeddings.similarity(query, tokenlist)
for x, score in enumerate(scores):
matches.append(sections[x] + (score,))
# Get section text
text = sections[x][1]

# Add result if all required tokens are present or there are not required tokens
if not must or all([token.lower() in text.lower() for token in must]):
matches.append(sections[x] + (score,))

# Build context using top n best matching sections
topn = sorted(matches, key=lambda x: x[2], reverse=True)[:3]
Expand Down

0 comments on commit 783cfb4

Please sign in to comment.