diff --git a/lokar/alma.py b/lokar/alma.py index fdb8e68..10e6e22 100644 --- a/lokar/alma.py +++ b/lokar/alma.py @@ -21,7 +21,7 @@ def bibs(self, mms_id): bib = Bib(self, response.text) if bib.mms_id != mms_id: raise RuntimeError('Response does not contain the requested MMS ID. %s != %s' - % (doc.mms_id, mms_id)) + % (bib.mms_id, mms_id)) return bib def get(self, url, *args, **kwargs): diff --git a/lokar/bib.py b/lokar/bib.py index 2113632..9cc2d1f 100644 --- a/lokar/bib.py +++ b/lokar/bib.py @@ -39,5 +39,5 @@ def save(self, diff=False, dry_run=False): def dump(self, filename): # Dump record to file - with open(filename, 'wb') as f: - f.write(etree.tostring(self.doc, pretty_print=True)) + with open(filename, 'wb') as file: + file.write(etree.tostring(self.doc, pretty_print=True)) diff --git a/lokar/concept.py b/lokar/concept.py index daac4a0..7f2c681 100644 --- a/lokar/concept.py +++ b/lokar/concept.py @@ -1,6 +1,7 @@ +import logging from copy import copy, deepcopy + from future.utils import python_2_unicode_compatible -import logging log = logging.getLogger(__name__) @@ -27,33 +28,32 @@ def __init__(self, term, vocabulary, tag='650'): raise RuntimeError('Strings with more than two components are not supported') def __copy__(self): - c = Concept(self.term, self.vocabulary, self.tag) - c.sf = copy(self.sf) # to include $0 and any other subfields not part of the term - return c + concept = Concept(self.term, self.vocabulary, self.tag) + concept.sf = copy(self.sf) # to include $0 and any other subfields not part of the term + return concept def __deepcopy__(self, memodict): - c = Concept(self.term, deepcopy(self.vocabulary), self.tag) - c.sf = copy(self.sf) # to include $0 and any other subfields not part of the term - return c + concept = Concept(self.term, deepcopy(self.vocabulary), self.tag) + concept.sf = copy(self.sf) # to include $0 and any other subfields not part of the term + return concept @property def components(self): - return [v for k, v in self.sf.items() if k in ['a', 'b', 'x', 'y', 'z'] and v is not None] + return [value for key, value in self.sf.items() if key in ['a', 'b', 'x', 'y', 'z'] and value is not None] @property def term(self): return ' : '.join(self.components) def __str__(self): - c = ['${} {}'.format(x, self.sf[x]) for x in ['a', 'x', '0'] if self.sf[x] is not None] - return ' '.join(c) + return ' '.join(['${} {}'.format(key, self.sf[key]) for key in ['a', 'x', '0'] if self.sf[key] is not None]) def authorize(self, skosmos): - c = skosmos.authorize_term(self.term, self.tag) - if c is not None: - cid = c['localname'].strip('c') + concept = skosmos.authorize_term(self.term, self.tag) + if concept is not None: + cid = concept['localname'].strip('c') self.sf['0'] = self.vocabulary.marc_prefix + cid log.info('Authorized %s %s', self.tag, self) def field(self): - return {'tag': self.tag, 'sf': self.sf} \ No newline at end of file + return {'tag': self.tag, 'sf': self.sf} diff --git a/lokar/job.py b/lokar/job.py index 1c494fd..7f199d0 100644 --- a/lokar/job.py +++ b/lokar/job.py @@ -26,7 +26,8 @@ class Job(object): - def __init__(self, action, source_concept, target_concepts=None, sru=None, alma=None, mailer=None, list_options=None): + def __init__(self, action, source_concept, target_concepts=None, sru=None, alma=None, mailer=None, + list_options=None): self.dry_run = False self.interactive = True self.show_progress = True @@ -82,9 +83,7 @@ def generate_steps(self): self.steps.append(InteractiveReplaceTask(self.source_concept, self.target_concepts)) elif self.action == 'list': - task = ListTask(self.source_concept) - task.set_options(**self.list_options) - self.steps.append(task) + self.steps.append(ListTask(self.source_concept, **self.list_options)) elif self.action == 'rename': src = self.source_concept @@ -156,8 +155,8 @@ def start(self): if self.alma.name is not None: log.info('Alma environment: %s', self.alma.name) - for n, step in enumerate(self.steps): - log.info('Step %d of %d: %s', n + 1, len(self.steps), step) + for i, step in enumerate(self.steps): + log.info('Step %d of %d: %s', i + 1, len(self.steps), step) if self.dry_run: log.info('Dry run: No catalog records will be touched!') @@ -215,9 +214,9 @@ def start(self): # Del 2: Nå har vi en liste over MMS-IDer for bibliografiske poster vi vil endre. # Vi går gjennom dem én for én, henter ut posten med Bib-apiet, endrer og poster tilbake. - for n, mms_id in enumerate(valid_records): + for i, mms_id in enumerate(valid_records): if self.action != 'list': - print(' %3d/%d: %s' % (n + 1, len(valid_records), mms_id)) + print(' %3d/%d: %s' % (i + 1, len(valid_records), mms_id)) bib = self.alma.bibs(mms_id) self.update_record(bib) diff --git a/lokar/lokar.py b/lokar/lokar.py index 3869311..7990e0a 100644 --- a/lokar/lokar.py +++ b/lokar/lokar.py @@ -2,42 +2,39 @@ from __future__ import unicode_literals import argparse +import getpass import logging.handlers -from io import open -import sys import re -import getpass - -from raven import Client - -from email.mime.text import MIMEText +import sys from email.header import Header +from email.mime.text import MIMEText +from io import open # pylint: disable=redefined-builtin from subprocess import Popen, PIPE import requests import yaml +from raven import Client from six import binary_type from . import __version__ -from .job import Job -from .concept import Concept from .alma import Alma +from .concept import Concept +from .job import Job from .sru import SruClient - -logger = logging.getLogger() -logger.setLevel(logging.INFO) +log = logging.getLogger() +log.setLevel(logging.INFO) logging.getLogger('requests').setLevel(logging.WARNING) -formatter = logging.Formatter('[%(asctime)s %(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%I:%S') - +formatter = logging.Formatter('[%(asctime)s %(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%I:%S') console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) -logger.addHandler(console_handler) +log.addHandler(console_handler) -supported_tags = ['084', '648', '650', '651', '655'] +SUPPORTED_TAGS = ['084', '648', '650', '651', '655'] -class Vocabulary(object): +class Vocabulary(object): # pylint: disable=too-few-public-methods marc_code = '' skosmos_code = '' @@ -68,8 +65,8 @@ def send_using_sendmail(self, subject, body): msg['From'] = self.config.get('sender') msg['To'] = self.config.get('recipient') msg['Subject'] = Header(subject, 'utf-8') - p = Popen(['sendmail', '-t'], stdin=PIPE) - p.communicate(msg.as_string()) + process = Popen(['sendmail', '-t'], stdin=PIPE) + process.communicate(msg.as_string()) def send_using_mailgun(self, subject, body): request_url = 'https://api.mailgun.net/v2/{0}/messages'.format(self.config['domain']) @@ -86,7 +83,7 @@ def parse_args(args, default_env=None): parser = argparse.ArgumentParser(prog='lokar', description=''' Edit or remove subject fields in Alma catalog records. Supported fields: {} - '''.format(', '.join(supported_tags))) + '''.format(', '.join(SUPPORTED_TAGS))) parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-e', '--env', dest='env', nargs='?', @@ -150,10 +147,10 @@ def parse_args(args, default_env=None): if args.new_term2 != '': args.new_terms.append(args.new_term2) - def normalize_arg(x): - if type(x) == binary_type: - return x.decode('utf-8') - return x + def normalize_arg(arg): + if isinstance(arg, binary_type): + return arg.decode('utf-8') + return arg args.term = normalize_arg(args.term) args.env = normalize_arg(args.env) @@ -163,15 +160,15 @@ def normalize_arg(x): def get_concept(term, vocabulary, default_tag='650', default_term=None): - m = re.match('^({})$'.format('|'.join(supported_tags)), term) - if m: + match = re.match('^({})$'.format('|'.join(SUPPORTED_TAGS)), term) + if match: if default_term is None: raise RuntimeError('No source term specified') - return Concept(default_term, vocabulary, m.group(1)) + return Concept(default_term, vocabulary, match.group(1)) - m = re.match('^({}) (.+)$'.format('|'.join(supported_tags)), term) - if m: - return Concept(m.group(2), vocabulary, m.group(1)) + match = re.match('^({}) (.+)$'.format('|'.join(SUPPORTED_TAGS)), term) + if match: + return Concept(match.group(2), vocabulary, match.group(1)) return Concept(term, vocabulary, default_tag) @@ -214,14 +211,14 @@ def job_args(config=None, args=None): def main(config=None, args=None): try: - with config or open('lokar.yml') as f: - config = yaml.load(f) + with config or open('lokar.yml') as file: + config = yaml.load(file) except IOError: - logger.error('Fant ikke lokar.yml. Se README.md for mer info.') + log.error('Fant ikke lokar.yml. Se README.md for mer info.') return username = getpass.getuser() - logger.info('Running as %s', username) + log.info('Running as %s', username) try: if config.get('sentry') is not None: raven = Client(config['sentry']['dsn']) @@ -233,13 +230,13 @@ def main(config=None, args=None): jargs = job_args(config, args) if args.verbose: - logger.setLevel(logging.DEBUG) + log.setLevel(logging.DEBUG) if not args.dry_run: file_handler = logging.FileHandler('lokar.log') file_handler.setFormatter(formatter) file_handler.setLevel(logging.INFO) - logger.addHandler(file_handler) + log.addHandler(file_handler) if args.env is None: raise RuntimeError('No environment specified in config file') @@ -257,12 +254,12 @@ def main(config=None, args=None): job.show_diffs = args.show_diffs job.start() - logger.info('{:=^70}'.format(' Job complete ')) + log.info('Job complete') - except Exception: + except Exception: # # pylint: disable=broad-except if config.get('sentry') is not None: raven.captureException() - logger.exception('Uncaught exception:') + log.exception('Uncaught exception:') if __name__ == '__main__': diff --git a/lokar/marc.py b/lokar/marc.py index 32fe497..a186ebf 100644 --- a/lokar/marc.py +++ b/lokar/marc.py @@ -1,11 +1,13 @@ # coding=utf-8 from __future__ import unicode_literals -from future.utils import python_2_unicode_compatible import logging from copy import deepcopy + +from colorama import Fore, Style +from future.utils import python_2_unicode_compatible + from .util import term_match, parse_xml -from colorama import Fore, Back, Style log = logging.getLogger(__name__) @@ -18,12 +20,11 @@ def __init__(self, node): self.node = node def __str__(self): - txt = [self.node.get('tag'), - self.node.attrib['ind1'].replace(' ', '#') + self.node.attrib['ind2'].replace(' ', '#')] - for sf in self.node: - txt.append('$%s %s' % (sf.attrib['code'], sf.text)) - txt = ' %s' % ' '.join(txt) - return txt + items = [self.node.get('tag'), + self.node.attrib['ind1'].replace(' ', '#') + self.node.attrib['ind2'].replace(' ', '#')] + for subfield in self.node: + items.append('$%s %s' % (subfield.attrib['code'], subfield.text)) + return ' %s' % ' '.join(items) def subfield_text(self, code): return self.node.findtext('subfield[@code="{}"]'.format(code)) @@ -55,23 +56,23 @@ def update(self, subfield_query): modified = 0 if self.match(subfield_query): - for ch in self.node.findall('subfield'): - code = ch.attrib.get('code') - if code in subfield_query.keys() and term_match(subfield_query[code]['search'], ch.text): - x = subfield_query[code] + for node in self.node.findall('subfield'): + code = node.attrib.get('code') + if code in subfield_query.keys() and term_match(subfield_query[code]['search'], node.text): + query_sf = subfield_query[code] del subfield_query[code] - if 'replace' not in x: + if 'replace' not in query_sf: # Just used for search, value should not be updated continue - elif x['replace'] is None: + elif query_sf['replace'] is None: # Subfield should be removed - log.debug('Removing component $%s %s', code, ch.text) - self.node.remove(ch) + log.debug('Removing component $%s %s', code, node.text) + self.node.remove(node) modified += 1 else: # Subfield value should be updated - log.debug('Changing $%s from "%s" tot "%s"', code, ch.text, x['replace']) - ch.text = x['replace'] + log.debug('Changing $%s from "%s" tot "%s"', code, node.text, query_sf['replace']) + node.text = query_sf['replace'] modified += 1 # Add new subfields for code, value in subfield_query.items(): @@ -97,7 +98,7 @@ def id(self): return self.el.findtext('./controlfield[@tag="001"]') def fields(self, tags, subfield_query): - if type(tags) != list: + if isinstance(tags, list) is False: tags = [tags] fields = [] for tag in tags: @@ -117,30 +118,30 @@ def remove_field(self, field): def title(self): - x = self.el.find('./datafield[@tag="245"]/subfield[@code="a"]').text + out = self.el.find('./datafield[@tag="245"]/subfield[@code="a"]').text - sf = self.el.find('./datafield[@tag="245"]/subfield[@code="b"]') - if sf is not None: - x = x.rstrip(' :') + ' : ' + sf.text + node = self.el.find('./datafield[@tag="245"]/subfield[@code="b"]') + if node is not None: + out = out.rstrip(' :') + ' : ' + node.text - sf = self.el.find('./datafield[@tag="245"]/subfield[@code="p"]') - if sf is not None: - x = x.rstrip(' /:.') + '. ' + sf.text + node = self.el.find('./datafield[@tag="245"]/subfield[@code="p"]') + if node is not None: + out = out.rstrip(' /:.') + '. ' + node.text - sf = self.el.find('./datafield[@tag="245"]/subfield[@code="n"]') - if sf is not None: - x = x.rstrip(' /:.') + '. ' + sf.text + node = self.el.find('./datafield[@tag="245"]/subfield[@code="n"]') + if node is not None: + out = out.rstrip(' /:.') + '. ' + node.text - sf = self.el.find('./datafield[@tag="245"]/subfield[@code="c"]') - if sf is not None: - x = x.rstrip(' /') + ' / ' + sf.text + node = self.el.find('./datafield[@tag="245"]/subfield[@code="c"]') + if node is not None: + out = out.rstrip(' /') + ' / ' + node.text - x = x.rstrip('.') + '.' + out = out.rstrip('.') + '.' - sf = self.el.find('./datafield[@tag="264"]/subfield[@code="c"]') - if sf is None: - sf = self.el.find('./datafield[@tag="260"]/subfield[@code="c"]') - if sf is not None: - x += ' ' + sf.text + node = self.el.find('./datafield[@tag="264"]/subfield[@code="c"]') + if node is None: + node = self.el.find('./datafield[@tag="260"]/subfield[@code="c"]') + if node is not None: + out += ' ' + node.text - return x + return out diff --git a/lokar/sru.py b/lokar/sru.py index b452df8..0518d18 100644 --- a/lokar/sru.py +++ b/lokar/sru.py @@ -1,14 +1,16 @@ # coding=utf-8 from __future__ import unicode_literals -import requests + import logging +import requests + from .marc import Record from .util import parse_xml log = logging.getLogger(__name__) -nsmap = { +NSMAP = { 'e20': 'http://explain.z3950.org/dtd/2.0/', 'e21': 'http://explain.z3950.org/dtd/2.1/', 'srw': 'http://www.loc.gov/zing/srw/', @@ -46,19 +48,19 @@ def search(self, query): }) root = parse_xml(response.text) # Takes ~ 4 seconds for 50 records! - for diagnostic in root.findall('srw:diagnostics/diag:diagnostic', namespaces=nsmap): - raise SruErrorResponse(diagnostic.findtext('diag:message', namespaces=nsmap)) + for diagnostic in root.findall('srw:diagnostics/diag:diagnostic', namespaces=NSMAP): + raise SruErrorResponse(diagnostic.findtext('diag:message', namespaces=NSMAP)) - self.num_records = int(root.findtext('srw:numberOfRecords', namespaces=nsmap)) + self.num_records = int(root.findtext('srw:numberOfRecords', namespaces=NSMAP)) if self.num_records > 10000: raise TooManyResults() - for record in root.iterfind('srw:records/srw:record', namespaces=nsmap): - self.record_no = int(record.findtext('srw:recordPosition', namespaces=nsmap)) + for record in root.iterfind('srw:records/srw:record', namespaces=NSMAP): + self.record_no = int(record.findtext('srw:recordPosition', namespaces=NSMAP)) - yield Record(record.find('srw:recordData/record', namespaces=nsmap)) + yield Record(record.find('srw:recordData/record', namespaces=NSMAP)) - nrp = root.find('srw:nextRecordPosition', namespaces=nsmap) + nrp = root.find('srw:nextRecordPosition', namespaces=NSMAP) if nrp is not None: start_record = nrp.text else: diff --git a/lokar/task.py b/lokar/task.py index 2c3b22f..32e8493 100644 --- a/lokar/task.py +++ b/lokar/task.py @@ -1,11 +1,13 @@ # coding=utf-8 from __future__ import unicode_literals, print_function -from future.utils import python_2_unicode_compatible -from collections import OrderedDict + import logging +from collections import OrderedDict + import six +from colorama import Fore, Style +from future.utils import python_2_unicode_compatible from lxml import etree -from colorama import Fore, Back, Style from .concept import Concept from .util import parse_xml, ANY_VALUE, normalize_term @@ -18,19 +20,19 @@ def pick(options): print() print('Options:') - for n, op in enumerate(options): - print(' [{}] {}'.format(n + 1, op)) - valid.append(str(n + 1)) + for i, option in enumerate(options): + print(' [{}] {}'.format(i + 1, option)) + valid.append(str(i + 1)) while True: print() - q = six.moves.input('Choice: ') - if q in valid: + answer = six.moves.input('Choice: ') + if answer in valid: break print('Please choose from the following: {}'.format(', '.join(valid))) print('To exit, press Ctrl-C') - return options[int(q) - 1] + return options[int(answer) - 1] class Task(object): @@ -41,7 +43,7 @@ class Task(object): def __init__(self, source, targets=None): targets = targets or [] assert isinstance(source, Concept) - assert type(targets) == list + assert isinstance(targets, list) for target in targets: assert isinstance(target, Concept) self.source = source @@ -51,10 +53,10 @@ def make_query(self, target=None): query = OrderedDict([ ('2', {'search': self.source.sf['2']}) ]) - for n in ['a', 'b', 'x', 'y', 'z']: - query[n] = {'search': self.source.sf.get(n)} + for code in ['a', 'b', 'x', 'y', 'z']: + query[code] = {'search': self.source.sf.get(code)} if target is not None: - query[n]['replace'] = target.sf.get(n) + query[code]['replace'] = target.sf.get(code) if target is not None and target.sf.get('0') is not None: query['0'] = {'search': ANY_VALUE, 'replace': target.sf.get('0')} @@ -81,15 +83,15 @@ def remove_duplicates(self, marc_record, tag, query): keys = [] query2 = {} - for k, v in query.items(): - if v is None: - query2[k] = None - elif six.text_type(v) == v: - query2[k] = v - elif 'replace' in v: - query2[k] = v['replace'] + for key, value in query.items(): + if value is None: + query2[key] = None + elif six.text_type(value) == value: + query2[key] = value + elif 'replace' in value: + query2[key] = value['replace'] else: - query2[k] = v['search'] + query2[key] = value['search'] for field in marc_record.fields(tag, query2): key = self.get_simple_subject_repr(field) @@ -122,26 +124,26 @@ def make_component_query(self, target=None): query = OrderedDict([ ('2', {'search': self.source.sf.get('2')}) ]) - for n in ['a', 'b', 'x', 'y', 'z']: - if self.source.sf.get(n) is not None: - query[n] = {'search': self.source.sf.get(n)} + for code in ['a', 'b', 'x', 'y', 'z']: + if self.source.sf.get(code) is not None: + query[code] = {'search': self.source.sf.get(code)} if target is not None: - query[n]['replace'] = target.sf.get(n) + query[code]['replace'] = target.sf.get(code) return query def __str__(self): - s = [] - t = [] - for k, v in self.make_query(self.targets[0]).items(): - if k != '2': - if v.get('search') is not None: - s.append('${} {}'.format(k, v['search'])) - if v.get('replace') is not None: - t.append('${} {}'.format(k, v['replace'])) - - return 'Replace {} with {} in {} $2 {}'.format(' '.join(s), - ' '.join(t), + sources = [] + targets = [] + for key, value in self.make_query(self.targets[0]).items(): + if key != '2': + if value.get('search') is not None: + sources.append('${} {}'.format(key, value['search'])) + if value.get('replace') is not None: + targets.append('${} {}'.format(key, value['replace'])) + + return 'Replace {} with {} in {} $2 {}'.format(' '.join(sources), + ' '.join(targets), self.source.tag, self.source.sf.get('2')) @@ -222,16 +224,15 @@ class ListTask(Task): field "$a Fish $x Behaviour". """ - def __str__(self): - return 'List titles having {} {} $2 {}'.format(self.source.tag, self.source, self.source.sf['2']) - - def set_options(self, show_titles=False, show_subjects=False): + def __init__(self, source, show_titles=False, show_subjects=False): + super(ListTask, self).__init__(source) self.show_titles = show_titles self.show_subjects = show_subjects - def run(self, marc_record): - modified = 0 + def __str__(self): + return 'List titles having {} {} $2 {}'.format(self.source.tag, self.source, self.source.sf['2']) + def run(self, marc_record): if self.show_titles: print('%s\t%s' % (marc_record.id, marc_record.title())) else: @@ -328,7 +329,7 @@ def __str__(self): term = '$a {}'.format(self.source.sf.get('a')) if self.source.sf.get('x') is not None: term += ' $x {}'.format(self.source.sf.get('x')) - return 'Move {} {} $2 {} to {}'.format(self.tag, term, self.source.sf.get('2'), self.dest_tag) + return 'Move {} {} $2 {} to {}'.format(self.source.tag, term, self.source.sf.get('2'), self.dest_tag) def run(self, marc_record): query = self.make_query() diff --git a/lokar/util.py b/lokar/util.py index 3829da7..1b821cf 100644 --- a/lokar/util.py +++ b/lokar/util.py @@ -1,11 +1,11 @@ -from six import text_type, binary_type +# coding=utf-8 import difflib -import vkbeautify import sys -from colorama import Fore, Back, Style, init -# coding=utf-8 +import vkbeautify +from colorama import Fore from lxml import etree +from six import text_type ANY_VALUE = '___any_value___' diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..61aae23 --- /dev/null +++ b/pylintrc @@ -0,0 +1,5 @@ +[BASIC] +good-names=log,console_handler,formatter,el,id,log_capture_string,capture_handler,i + +[FORMAT] +max-line-length=120 diff --git a/setup.cfg b/setup.cfg index eeeca7b..31ad82b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [aliases] -test = pytest \ No newline at end of file +test = pytest diff --git a/tests/test_lokar.py b/tests/test_lokar.py index 432227d..76fa382 100644 --- a/tests/test_lokar.py +++ b/tests/test_lokar.py @@ -16,7 +16,7 @@ from lokar.bib import Bib from lokar.lokar import main, job_args, parse_args, Vocabulary, Mailer -from lokar.sru import SruClient, SruErrorResponse, TooManyResults, nsmap +from lokar.sru import SruClient, SruErrorResponse, TooManyResults, NSMAP from lokar.alma import Alma from lokar.job import Job from lokar.concept import Concept @@ -558,7 +558,7 @@ def __init__(self, **kwargs): def setup_sru_mock(xml_response_file, mock=None): mock = mock or Mock(spec=SruClient) - recs = get_sample(xml_response_file, True).findall('srw:records/srw:record/srw:recordData/record', nsmap) + recs = get_sample(xml_response_file, True).findall('srw:records/srw:record/srw:recordData/record', NSMAP) recs = [Record(rec) for rec in recs] def search(arg): @@ -697,7 +697,7 @@ def conf(): @staticmethod def sru_search_mock(*args, **kwargs): - recs = get_sample('sru_sample_response_1.xml', True).findall('srw:records/srw:record/srw:recordData/record', nsmap) + recs = get_sample('sru_sample_response_1.xml', True).findall('srw:records/srw:record/srw:recordData/record', NSMAP) for n, rec in enumerate(recs): yield rec