Skip to content

Commit

Permalink
update pipeline to reflect function changes
Browse files Browse the repository at this point in the history
  • Loading branch information
c-tomlin committed Nov 20, 2023
1 parent a482742 commit 1a2ef09
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 2 deletions.
4 changes: 4 additions & 0 deletions pipeline/1_Stage_1/01_stage_1_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pes_match.parameters import (
CEN_CLEAN_DATA,
CHECKPOINT_PATH,
CLERICAL_PATH,
CLERICAL_VARIABLES,
PES_CLEAN_DATA,
cen_variable_types,
Expand All @@ -15,6 +16,8 @@

if not os.path.exists(CHECKPOINT_PATH):
os.makedirs(CHECKPOINT_PATH)
if not os.path.exists(CLERICAL_PATH):
os.makedirs(CLERICAL_PATH)

# Cleaned data
CEN = pd.read_csv(
Expand Down Expand Up @@ -64,6 +67,7 @@
id_column="puid",
suffix_1="_cen",
suffix_2="_pes",
output_folder=CLERICAL_PATH + "Stage_1_CROW_Files",
file_name="Stage_1_Matchkey_CROW_Conflicts",
no_of_files=1,
)
5 changes: 4 additions & 1 deletion pipeline/1_Stage_1/02_stage_1_associative.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pes_match.parameters import (
CEN_CLEAN_DATA,
CHECKPOINT_PATH,
CLERICAL_PATH,
CLERICAL_VARIABLES,
OUTPUT_VARIABLES,
PES_CLEAN_DATA,
Expand All @@ -28,7 +29,9 @@
)

# Combine matches made in CROW into single dataset
crow_matches = combine_crow_results(stage="Stage_1")
crow_matches = combine_crow_results(
stage="Stage_1", results_path=CLERICAL_PATH + "Stage_1_CROW_Files"
)

# Update format of matches and add flags
crow_matches = crow_output_updater(
Expand Down
2 changes: 2 additions & 0 deletions pipeline/2_Stage_2/01_stage_2_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pes_match.parameters import (
CEN_CLEAN_DATA,
CHECKPOINT_PATH,
CLERICAL_PATH,
CLERICAL_VARIABLES,
OUTPUT_PATH,
PES_CLEAN_DATA,
Expand Down Expand Up @@ -68,6 +69,7 @@
id_column="puid",
suffix_1="_cen",
suffix_2="_pes",
output_folder=CLERICAL_PATH + "Stage_2_CROW_Files",
file_name="Stage_2_Matchkey_CROW_Conflicts",
no_of_files=1,
)
5 changes: 4 additions & 1 deletion pipeline/2_Stage_2/02_stage_2_associative.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pes_match.parameters import (
CEN_CLEAN_DATA,
CHECKPOINT_PATH,
CLERICAL_PATH,
CLERICAL_VARIABLES,
OUTPUT_PATH,
OUTPUT_VARIABLES,
Expand All @@ -29,7 +30,9 @@
)

# Combine matches made in CROW into single dataset
crow_matches = combine_crow_results(stage="Stage_2")
crow_matches = combine_crow_results(
stage="Stage_2", results_path=CLERICAL_PATH + "Stage_2_CROW_Files"
)

# Update format of matches and add flags
crow_matches = crow_output_updater(
Expand Down

0 comments on commit 1a2ef09

Please sign in to comment.