-
Notifications
You must be signed in to change notification settings - Fork 9
/
gca-filter
executable file
·60 lines (47 loc) · 2.09 KB
/
gca-filter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals
from gca.core import Abstract
from gca.util import simplify_str
import sys
import argparse
import codecs
def make_filter(string, exact=False):
x = string.split('=')
assert(len(x) == 2)
field = x[0].strip()
fl = x[1].strip()
if exact:
return lambda abstract: abstract.select_field(field, fold=True) == fl
else:
fl = simplify_str(fl)
return lambda abstract: simplify_str(abstract.select_field(field, fold=True)) == fl
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCA Filter - filter list of abstract by files')
parser.add_argument('file', type=str, default='-')
parser.add_argument('filter', type=str, nargs='+', help='filter to apply to each abstract')
parser.add_argument('-e', '--exact', default=False, action='store_true', help='Do use non-modified strings for comparisons. Default is to compare lower case and simplified strings.')
parser.add_argument('--start-at', dest='P', default=None, type=int, help='start at abstract number')
parser.add_argument('--max-num', dest='N', default=None, type=int, help='maximum number of abstracts to take')
parser.add_argument('--or', dest='ft_or', default=False, action='store_true', help='use OR instead of AND')
args = parser.parse_args()
fd = codecs.open(args.file, 'r', encoding='utf-8') if args.file != '-' else sys.stdin
abstracts = Abstract.from_data(fd.read())
fd.close()
filter_list = [make_filter(f, args.exact) for f in args.filter]
if args.ft_or:
result = []
for f in filter_list:
result += filter(f, abstracts)
de_dup = {k.uuid: k for k in result}
abstracts = [a for k, a in de_dup.items()]
else:
for f in filter_list:
abstracts = filter(f, abstracts)
if args.P is not None:
abstracts = abstracts[args.P-1:]
if args.N is not None:
abstracts = abstracts[:args.N]
data = Abstract.to_json(abstracts)
sys.stdout.write(data)