diff --git a/sippers/adapters/cnmc_v2.py b/sippers/adapters/cnmc_v2.py new file mode 100644 index 0000000..089a496 --- /dev/null +++ b/sippers/adapters/cnmc_v2.py @@ -0,0 +1,71 @@ +from sippers.adapters import SipsAdapter, MeasuresAdapter +from sippers.models.cnmc_v2 import CnmcV2SipsSchema, CnmcV2MeasuresSchema +from marshmallow import pre_load, fields + + +class CnmcV2SipsAdapter(SipsAdapter, CnmcV2SipsSchema): + '''A self.fields tenim els camps per defecte, els de SipsSchema base''' + + @pre_load + def add_distri_description(self, data): + cod_distri = data.get('codigoEmpresaDistribuidora') + if cod_distri == '059': + data['nombreEmpresaDistribuidora'] = 'GRUPO ELECTRIFICACION RURAL BINEFAR' + return data + + @pre_load + def parse_selections(self, data): + selection_fields = [ + 'codigoTarifaATREnVigor', + 'codigoTipoSuministro', + 'codigoPropiedadICP', + 'codigoDHEquipoDeMedida', + 'codigoTensionMedida', + 'tipoIdTitular', + 'codigoClasificacionPS', + 'codigoTelegestion', + 'codigoTensionV', + 'codigoTipoContrato', + 'codigoPeriodicidadFacturacion', + 'codigoPropiedadEquipoMedida' + ] + for select_field in selection_fields: + if data.get(select_field) == '': + data[select_field] = None + return data + + @pre_load + def fix_dates(self, data): + for attr, field in self.fields.iteritems(): + if isinstance(field, fields.DateTime): + # si ve alguna data, assumim que ve correcta + if data[attr] == u'': + data[attr] = None + else: + data[attr] += 'T00:00:00' + return data + +class CnmcV2MeasuresAdapter(MeasuresAdapter, CnmcV2MeasuresSchema): + + @pre_load + def parse_selections(self, data): + selection_fields = [ + 'codigoTarifaATR', + 'codigoDHEquipoDeMedida', + 'codigoTipoLectura' + ] + for select_field in selection_fields: + if data.get(select_field) == '': + data[select_field] = None + return data + + @pre_load + def fix_dates(self, data): + for attr, field in self.fields.iteritems(): + if isinstance(field, fields.DateTime): + # si ve alguna data, assumim que ve correcta + if data[attr] == u'': + data[attr] = None + else: + data[attr] += 'T00:00:00' + return data \ No newline at end of file diff --git a/sippers/backends/mongodb.py b/sippers/backends/mongodb.py index 93e7968..fe78b7a 100644 --- a/sippers/backends/mongodb.py +++ b/sippers/backends/mongodb.py @@ -21,32 +21,76 @@ def __init__(self, uri=None): self.db[self.measures_collection].ensure_index( "name", background=True, ) + self.db['cnmc_sips'].ensure_index( + "cups", unique=True, background=True + ) + self.db['cnmc_sips_consums'].ensure_index( + [ + ("cups", pymongo.ASCENDING), + ("fechaFinMesConsumo", pymongo.DESCENDING) + ], background=True, + ) def insert(self, document): + batch_insert = False + docuemnt_list = document + if isinstance(document, list): + document = document[0] + batch_insert = True ps = document.get('ps') if ps: - ps.backend = self - self.insert_ps(ps.backend_data) + if batch_insert: + ps_data = [] + for x in docuemnt_list: + x.get('ps').backend = self + ps_data.append(x.get('ps').backend_data) + else: + ps.backend = self + ps_data = ps.backend_data + self.insert_ps(ps_data, collection=document.get('collection')) measures = document.get('measures') post_measures = [] measure_cnmc = document.get('measure_cnmc') if measures: + if batch_insert: + raise Exception("Batch insert not implemented for measures") for measure in measures: measure.backend = self post_measures.append(measure.backend_data) self.insert_measures(post_measures) elif measure_cnmc: - measure_cnmc.backend = self - self.insert_cnmc_measure(measure_cnmc.backend_data) + if batch_insert: + measure_list = [] + for doc in docuemnt_list: + doc.get('measure_cnmc').backend = self + measure_list.append(doc.get('measure_cnmc').backend_data) + else: + measure_cnmc.backend = self + measure_list = measure_cnmc.backend_data + self.insert_cnmc_measure(measure_list, collection=document.get('collection')) def get(self, collection, filters, fields=None): return [x for x in self.db[collection].find(filters, fields=fields)] - def insert_ps(self, ps): - collection = self.ps_collection - oid = self.db[collection].update( - {'name': ps['name']}, ps, upsert=True - ) + def insert_ps(self, ps, collection=None): + if not collection: + collection = self.ps_collection + if collection == self.ps_collection: + key = 'name' + else: + key = 'cups' + + if isinstance(ps, list): + oid = False + for doc in ps: + oid = self.db[collection].update( + {key: doc[key]}, doc, upsert=True + ) + else: + oid = self.db[collection].update( + {key: ps[key]}, ps, upsert=True + ) + return oid def insert_measures(self, values): @@ -58,20 +102,16 @@ def insert_measures(self, values): oids.extend(self.db[collection].insert(values)) return oids - def insert_cnmc_measure(self, value): + def insert_cnmc_measure(self, value, collection=None): '''cnmc measures come a measure per line, cannot replace the whole block as in insert_measures''' - collection = self.measures_collection - self.db[collection].remove( - {"name" : value["name"], - "data_final": value["data_final"] - } - ) + oid = self.db[collection].insert(value) + return oid def disconnect(self): - self.connection.disconnect() + self.connection.close() def __enter__(self): return self diff --git a/sippers/models/cnmc_v2.py b/sippers/models/cnmc_v2.py new file mode 100644 index 0000000..5cd9eef --- /dev/null +++ b/sippers/models/cnmc_v2.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +from marshmallow import Schema, fields +from sippers.utils import TABLA_6, TABLA_9, TABLA_17, TABLA_30, TABLA_32, TABLA_35, TABLA_62, TABLA_64, TABLA_108, TABLA_111 +from marshmallow.validate import OneOf + + +class CnmcV2SipsSchema(Schema): + """Base model for SIPS + """ + CNAE = fields.String(allow_none=True) + aplicacionBonoSocial = fields.String(allow_none=True) + codigoAPM = fields.String(allow_none=True) + codigoAccesibilidadContador = fields.String(allow_none=True) + codigoAutoconsumo = fields.String(allow_none=True) + codigoBIE = fields.String(allow_none=True) + codigoClaseExpediente = fields.String(allow_none=True) + codigoClasificacionPS = fields.String(validate=OneOf(TABLA_30), allow_none=True) + codigoComercializadora = fields.String(allow_none=True) + codigoDHEquipoDeMedida = fields.String(validate=OneOf(TABLA_35), allow_none=True) + codigoDisponibilidadICP = fields.String(allow_none=True) + codigoEmpresaDistribuidora = fields.String(allow_none=True) + codigoFasesEquipoMedida = fields.String(allow_none=True) + codigoModoControlPotencia = fields.String(allow_none=True) + codigoMotivoExpediente = fields.String(allow_none=True) + codigoPSContratable = fields.String(allow_none=True) + codigoPeriodicidadFacturacion = fields.String(validate=OneOf(TABLA_108), allow_none=True) + codigoPostalPS = fields.String(allow_none=True) + codigoPropiedadEquipoMedida = fields.String(validate=OneOf(TABLA_32), allow_none=True) + codigoPropiedadICP = fields.String(validate=OneOf(TABLA_32), allow_none=True) + codigoProvinciaPS = fields.String(allow_none=True) + codigoTarifaATREnVigor = fields.String(validate=OneOf(TABLA_17), allow_none=True) + codigoTelegestion = fields.String(validate=OneOf(TABLA_111), allow_none=True) + codigoTensionMedida = fields.String(validate=OneOf(TABLA_64), allow_none=True) + codigoTensionV = fields.String(validate=OneOf(TABLA_64), allow_none=True) + codigoTipoContrato = fields.String(validate=OneOf(TABLA_9), allow_none=True) + codigoTipoSuministro = fields.String(validate=OneOf(TABLA_62), allow_none=True) + cups = fields.String(required=True, allow_none=False) + esViviendaHabitual = fields.String(allow_none=True) + fechaAltaSuministro = fields.DateTime(allow_none=True) + fechaCaducidadAPM = fields.DateTime(allow_none=True) + fechaCaducidadBIE = fields.DateTime(allow_none=True) + fechaEmisionAPM = fields.DateTime(allow_none=True) + fechaEmisionBIE = fields.DateTime(allow_none=True) + fechaLimiteDerechosReconocidos = fields.DateTime(allow_none=True) + fechaUltimaLectura = fields.DateTime(allow_none=True) + fechaUltimoCambioComercializador = fields.DateTime(allow_none=True) + fechaUltimoMovimientoContrato = fields.DateTime(allow_none=True) + importeDepositoGarantiaEuros = fields.String(allow_none=True) + informacionImpagos = fields.String(allow_none=True) + motivoEstadoNoContratable = fields.String(allow_none=True) + municipioPS = fields.String(allow_none=True) + nombreEmpresaDistribuidora = fields.String(allow_none=True) + potenciaCGPW = fields.String(allow_none=True) + potenciaMaximaAPMW = fields.String(allow_none=True) + potenciaMaximaBIEW = fields.String(allow_none=True) + potenciasContratadasEnWP1 = fields.String(allow_none=True) + potenciasContratadasEnWP2 = fields.String(allow_none=True) + potenciasContratadasEnWP3 = fields.String(allow_none=True) + potenciasContratadasEnWP4 = fields.String(allow_none=True) + potenciasContratadasEnWP5 = fields.String(allow_none=True) + potenciasContratadasEnWP6 = fields.String(allow_none=True) + relacionTransformacionIntensidad = fields.String(allow_none=True) + tipoIdTitular = fields.String(validate=OneOf(TABLA_6), allow_none=True) + tipoPerfilConsumo = fields.String(allow_none=True) + valorDerechosAccesoW = fields.String(allow_none=True) + valorDerechosExtensionW = fields.String(allow_none=True) + +class CnmcV2MeasuresSchema(Schema): + + cups = fields.String(position=0, required=True, allow_none=False) + codigoDHEquipoDeMedida = fields.String(position=22, allow_none=True) + codigoTarifaATR = fields.String(position=3, allow_none=True) + codigoTipoLectura = fields.String(position=23, allow_none=True) + consumoEnergiaActivaEnWhP1 = fields.String(position=4) + consumoEnergiaActivaEnWhP2 = fields.String(position=5) + consumoEnergiaActivaEnWhP3 = fields.String(position=6) + consumoEnergiaActivaEnWhP4 = fields.String(position=7) + consumoEnergiaActivaEnWhP5 = fields.String(position=8) + consumoEnergiaActivaEnWhP6 = fields.String(position=9) + consumoEnergiaReactivaEnVArhP1 = fields.String(position=10) + consumoEnergiaReactivaEnVArhP2 = fields.String(position=11) + consumoEnergiaReactivaEnVArhP3 = fields.String(position=12) + consumoEnergiaReactivaEnVArhP4 = fields.String(position=13) + consumoEnergiaReactivaEnVArhP5 = fields.String(position=14) + consumoEnergiaReactivaEnVArhP6 = fields.String(position=15) + fechaFinMesConsumo = fields.DateTime(required=True, position=2, allow_none=False) + fechaInicioMesConsumo = fields.DateTime(required=True, position=1, allow_none=False) + potenciaDemandadaEnWP1 = fields.String(position=16) + potenciaDemandadaEnWP2 = fields.String(position=17) + potenciaDemandadaEnWP3 = fields.String(position=18) + potenciaDemandadaEnWP4 = fields.String(position=19) + potenciaDemandadaEnWP5 = fields.String(position=20) + potenciaDemandadaEnWP6 = fields.String(position=21) diff --git a/sippers/parsers/__init__.py b/sippers/parsers/__init__.py index 0c747da..15d4ae3 100644 --- a/sippers/parsers/__init__.py +++ b/sippers/parsers/__init__.py @@ -1,4 +1,5 @@ from endesa import Endesa from iberdrola import Iberdrola from cnmc import Cnmc -from hidrocantabrico import Hidrocantabrico \ No newline at end of file +from hidrocantabrico import Hidrocantabrico +from cnmc_v2 import CnmcV2 \ No newline at end of file diff --git a/sippers/parsers/cnmc_v2.py b/sippers/parsers/cnmc_v2.py new file mode 100644 index 0000000..657fad4 --- /dev/null +++ b/sippers/parsers/cnmc_v2.py @@ -0,0 +1,148 @@ +from __future__ import absolute_import + +import csv +import StringIO + +from sippers import logger +from sippers.utils import build_dict +from sippers.parsers.parser import Parser, register +from sippers.adapters.cnmc_v2 import CnmcV2SipsAdapter, CnmcV2MeasuresAdapter +from sippers.models.cnmc_v2 import CnmcV2MeasuresSchema + +class CnmcV2(Parser): + + # En aquest cas els llegim amb el csv.DictReader en comptes de fer-ho amb + # un Schema de marshmallow, ja que el csv pot contenir comes dins d'un + # camp si van wrapped entre cometes i el marshmallow aixo no ho contempla. + + # amb csv.DictReader obtindrem un diccionari amb els headers_ps + # que li indiquem aqui, en comptes d'anar-los a buscar a l'Schema. + # El diccionari el podem utilitzar per passar-li al + # self.adapter.load igual que el resultat de l'esquema + + pattern = '[0-9]{6}_SIPS2_PS_ELECTRICIDAD_[a-z]*_?[a-z]*\.csv' + encoding = "UTF-8" + _collection = 'cnmc_sips' + + def __init__(self, strict=False): + + # l'ordre dels camps segons format cnmc + self.headers_ps = [ + 'codigoEmpresaDistribuidora', + 'cups', + 'nombreEmpresaDistribuidora', + 'codigoPostalPS', + 'municipioPS', + 'codigoProvinciaPS', + 'fechaAltaSuministro', + 'codigoTarifaATREnVigor', + 'codigoTensionV', + 'potenciaMaximaBIEW', + 'potenciaMaximaAPMW', + 'codigoClasificacionPS', + 'codigoDisponibilidadICP', + 'tipoPerfilConsumo', + 'valorDerechosExtensionW', + 'valorDerechosAccesoW', + 'codigoPropiedadEquipoMedida', + 'codigoPropiedadICP', + 'potenciasContratadasEnWP1', + 'potenciasContratadasEnWP2', + 'potenciasContratadasEnWP3', + 'potenciasContratadasEnWP4', + 'potenciasContratadasEnWP5', + 'potenciasContratadasEnWP6', + 'fechaUltimoMovimientoContrato', + 'fechaUltimoCambioComercializador', + 'fechaLimiteDerechosReconocidos', + 'fechaUltimaLectura', + 'informacionImpagos', + 'importeDepositoGarantiaEuros', + 'tipoIdTitular', + 'esViviendaHabitual', + 'codigoComercializadora', + 'codigoTelegestion', + 'codigoFasesEquipoMedida', + 'codigoAutoconsumo', + 'codigoTipoContrato', + 'codigoPeriodicidadFacturacion', + 'codigoBIE', + 'fechaEmisionBIE', + 'fechaCaducidadBIE', + 'codigoAPM', + 'fechaEmisionAPM', + 'fechaCaducidadAPM', + 'relacionTransformacionIntensidad', + 'CNAE', + 'codigoModoControlPotencia', + 'potenciaCGPW', + 'codigoDHEquipoDeMedida', + 'codigoAccesibilidadContador', + 'codigoPSContratable', + 'motivoEstadoNoContratable', + 'codigoTensionMedida', + 'codigoClaseExpediente', + 'codigoMotivoExpediente', + 'codigoTipoSuministro', + 'aplicacionBonoSocial' + ] + self.adapter = CnmcV2SipsAdapter(strict=strict) + + def parse_line(self, line): + + # passar previament la linia pel csv reader + # per que agafi be els camps tot i les comes dins del camp direccio + # per fer-ho cal passar-la a StringIO + l = StringIO.StringIO(line) + reader = csv.DictReader(l, fieldnames=self.headers_ps, delimiter=',') + linia = reader.next() # nomes n'hi ha una + + parsed = {'ps': {}, 'orig': line, 'collection': self._collection} + result, errors = self.adapter.load(linia) + + if errors: + logger.error(errors) + parsed['ps'] = result + return parsed, errors + +register(CnmcV2) + +class CnmcV2Cons(Parser): + + # En el cas de les mesures, usem Schema per mantenir el format i + # perque no hi trobarem mes comes que les delimiters + pattern = '[0-9]{6}_SIPS2_CONSUMOS_ELECTRICIDAD_[a-z]*_?[a-z]*\.csv' + encoding = "UTF-8" + _collection = 'cnmc_sips_consums' + + + def __init__(self, strict=False): + self.schema = CnmcV2MeasuresSchema(strict=strict) + self.adapter = CnmcV2MeasuresAdapter(strict=strict) + self.measures_adapter = self.adapter + self.fields = [] + self.headers = [] + for f in sorted(self.schema.fields, + key=lambda f: self.schema.fields[f].metadata['position']): + field = self.schema.fields[f] + self.fields.append((f, field.metadata)) + self.headers.append(f) + + def parse_line(self, line): + + l = StringIO.StringIO(line) + reader = csv.DictReader(l, fieldnames=self.headers, delimiter=',') + linia = reader.next() # nomes n'hi ha una + + parsed = {'ps': {}, 'measure_cnmc': [], 'orig': line, 'collection': self._collection} + + result, errors = self.adapter.load(linia) + + if errors: + logger.error(errors) + + parsed['measure_cnmc'] = result + + return parsed, errors + +register(CnmcV2Cons) diff --git a/sippers/utils.py b/sippers/utils.py index dea9f40..9b60625 100644 --- a/sippers/utils.py +++ b/sippers/utils.py @@ -56,3 +56,16 @@ def build_dict(headers, data): :param list data: List of data """ return dict(zip(headers, data)) + +### New sips table validations ### + +TABLA_6 = ['NI', 'NV', 'OT', 'PS', 'NE'] +TABLA_9 = ['01', '02', '03', '05', '07', '08', '09', '10', '11', '12'] +TABLA_17 = ['001','003','004','005','006','007','008','011','012','013','014','015','016','017','018','019','020','021','022','023','024','025'] +TABLA_30 = ['01','02','03','04','05'] +TABLA_32 = ['1', '2', '3', '4'] +TABLA_35 = ['1','2','3','4','6','8','9','A'] +TABLA_62 = ['AL', 'AP', 'AS', 'AT', 'BA', 'CM', 'EA', 'ES', 'FT', 'FV', 'GA', 'GB', 'HP', 'IN', 'IT', 'KC', 'LB', 'LC', 'OF', 'PC', 'RA', 'RT', 'SA', 'SC', 'SE', 'SG', 'SM', 'SO', 'TL', 'TR', 'UF', 'UV', 'VI', 'VE'] +TABLA_64 = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '99'] +TABLA_108 = ['01','02'] +TABLA_111 = ['01', '02', '03']