Skip to content

Commit

Permalink
Merge pull request #19 from eastgenomics/TAT_standard_3_days
Browse files Browse the repository at this point in the history
Remove duplicates in final CSV (#19)
  • Loading branch information
jethror1 authored Aug 16, 2023
2 parents 00df7d8 + 8c1a05e commit a4b04c5
Showing 1 changed file with 44 additions and 6 deletions.
50 changes: 44 additions & 6 deletions TAT_audit/utils/TAT_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2243,12 +2243,31 @@ def add_in_cancelled_runs(self, all_assays_df, cancelled_runs):
# Append the list of dicts as new rows
all_assays_df = all_assays_df.append(cancelled_runs, ignore_index=True)

# Sort assay types so match the report
# Remove duplicates if a failed run is still named as a '002' project
# otherwise both the failed 002 project and failed ticket would be
# returned
all_assays_df.drop_duplicates(
subset=['run_name'], keep='last', inplace=True
)

# Create new date column extracted from the run name
all_assays_df['date'] = all_assays_df['run_name'].str.split('_').str[0]
# Convert date column to datetime
all_assays_df['date'] = pd.to_datetime(
all_assays_df['date'], format="%y%m%d"
)

# Sort chronologically by date for each assay type
all_assays_df.sort_values(by=['assay_type', 'date'], inplace=True)
# Sort assay types so order matches the report
custom_dict = {'CEN': 0, 'MYE': 1, 'TSO500': 2, 'TWE': 3, 'SNP': 4}
all_assays_df = all_assays_df.sort_values(
by=['assay_type'], key=lambda x: x.map(custom_dict)
)

# Remove the date column
all_assays_df.drop(columns=['date'], inplace=True)

return all_assays_df


Expand Down Expand Up @@ -2341,6 +2360,28 @@ def create_upload_day_fig(self, assay_df, assay_type):
return html_fig


def write_to_csv(self, all_assays_df, audit_start, audit_end) -> None:
"""
Write the dataframe of all runs in the audit period and all of
the associated info to CSV
Parameters
----------
all_assays_df : pd.DataFrame
dataframe with all of the runs in the audit period and
all of the relevant info
audit_start : str
the date of the audit start
audit_end : str
the date of the audit end
"""
all_assays_df.to_csv(
f'audit_info_{audit_start}_{audit_end}.csv',
float_format='%.3f',
index=False
)


def main():
"""Main function to create html report"""
tatq = QueryPlotFunctions()
Expand Down Expand Up @@ -2400,11 +2441,8 @@ def main():
)

all_assays_df = tatq.add_in_cancelled_runs(all_assays_df, cancelled_runs)
all_assays_df.to_csv(
f'audit_info_{tatq.audit_start}_{tatq.audit_end}.csv',
float_format='%.3f',
index=False
)
tatq.write_to_csv(all_assays_df, tatq.audit_start, tatq.audit_end)

# Load Jinja2 template
# Add the charts, tables and issues into the template
environment = Environment(loader=FileSystemLoader(
Expand Down

0 comments on commit a4b04c5

Please sign in to comment.