Skip to content

Commit

Permalink
Alter way to catch missing columns error #469
Browse files Browse the repository at this point in the history
Instead of getting the column name from the
error message, the missing columns are now received by comparing
the existing database table with the new dataframe.
  • Loading branch information
FlorianK13 committed Oct 6, 2023
1 parent 9e58a89 commit 0be8d9f
Showing 1 changed file with 37 additions and 30 deletions.
67 changes: 37 additions & 30 deletions open_mastr/xml_download/utils_write_to_database.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import sqlite3
from shutil import Error
from zipfile import ZipFile

import lxml
import numpy as np
import pandas as pd
import sqlite3
import sqlalchemy
from sqlalchemy import select
from sqlalchemy.sql import text

from open_mastr.utils.config import setup_logger
from open_mastr.utils.helpers import data_to_include_tables
from open_mastr.utils.orm import tablename_mapping
from open_mastr.xml_download.utils_cleansing_bulk import cleanse_bulk_data
from open_mastr.utils.config import setup_logger


def write_mastr_xml_to_database(
Expand Down Expand Up @@ -98,7 +99,6 @@ def is_first_file(file_name: str) -> bool:


def cast_date_columns_to_datetime(xml_tablename: str, df: pd.DataFrame) -> pd.DataFrame:

sqlalchemy_columnlist = tablename_mapping[xml_tablename][
"__class__"
].__table__.columns.items()
Expand Down Expand Up @@ -186,7 +186,6 @@ def add_table_to_database(
if_exists: str,
engine: sqlalchemy.engine.Engine,
) -> None:

# get a dictionary for the data types

table_columns_list = list(
Expand All @@ -212,18 +211,18 @@ def add_table_to_database(
)
continueloop = False
except sqlalchemy.exc.OperationalError as err:
add_missing_column_to_table(err, engine, xml_tablename)
add_missing_column_to_table(err, engine, xml_tablename, df)

except sqlalchemy.exc.ProgrammingError as err:
add_missing_column_to_table(err, engine, xml_tablename)
add_missing_column_to_table(err, engine, xml_tablename, df)

except sqlite3.OperationalError as err:
add_missing_column_to_table(err, engine, xml_tablename)
add_missing_column_to_table(err, engine, xml_tablename, df)

except sqlalchemy.exc.DataError as err:
delete_wrong_xml_entry(err, df)

except sqlalchemy.exc.IntegrityError as err:
except sqlalchemy.exc.IntegrityError:
# error resulting from Unique constraint failed
df = write_single_entries_until_not_unique_comes_up(
df=df, xml_tablename=xml_tablename, engine=engine
Expand All @@ -232,7 +231,8 @@ def add_table_to_database(

def add_zero_as_first_character_for_too_short_string(df: pd.DataFrame) -> pd.DataFrame:
"""Some columns are read as integer even though they are actually strings starting with
a 0. This function converts those columns back to strings and adds a 0 as first character."""
a 0. This function converts those columns back to strings and adds a 0 as first character.
"""

dict_of_columns_and_string_length = {
"Gemeindeschluessel": 8,
Expand Down Expand Up @@ -302,44 +302,51 @@ def write_single_entries_until_not_unique_comes_up(


def add_missing_column_to_table(
err: Error, engine: sqlalchemy.engine.Engine, xml_tablename: str
err: Error,
engine: sqlalchemy.engine.Engine,
xml_tablename: str,
df: pd.DataFrame,
) -> None:
"""
Some files introduce new columns for existing tables.
If this happens, the error from writing entries into
non-existing columns is caught and the column is created.
non-existing columns is caught and non-existing
columns are created.
Parameters
----------
err
engine
xml_tablename
df
Returns
-------
"""
log = setup_logger()

if engine.name == "postgresql":
missing_column = err.args[0].split("»")[1].split("«")[0]
elif engine.name == "sqlite":
missing_column = err.args[0].split()[-1]
else:
# only a guess, can fail with other db systems
missing_column = err.args[0].split()[-1]
table = tablename_mapping[xml_tablename]["__class__"].__table__
# get the columns name from the existing database
inspector = sqlalchemy.inspect(engine)
table_name = tablename_mapping[xml_tablename]["__class__"].__table__.name
columns = inspector.get_columns(table_name)
column_names_from_database = [column["name"] for column in columns]

alter_query = 'ALTER TABLE %s ADD "%s" VARCHAR NULL;' % (
table.name,
missing_column,
)
with engine.connect().execution_options(autocommit=True) as con:
with con.begin():
con.execute(text(alter_query).execution_options(autocommit=True))
log.info(
"From the downloaded xml files following new attribute was "
f"introduced: {table.name}.{missing_column}"
)
column_names_from_df = df.columns.tolist()

missing_columns = set(column_names_from_df) - set(column_names_from_database)

for column_name in missing_columns:
alter_query = 'ALTER TABLE %s ADD "%s" VARCHAR NULL;' % (
table_name,
column_name,
)
with engine.connect().execution_options(autocommit=True) as con:
with con.begin():
con.execute(text(alter_query).execution_options(autocommit=True))
log.info(
"From the downloaded xml files following new attribute was "
f"introduced: {table_name}.{column_name}"
)


def delete_wrong_xml_entry(err: Error, df: pd.DataFrame) -> None:
Expand Down

0 comments on commit 0be8d9f

Please sign in to comment.