Skip to content

Commit

Permalink
Merge pull request #48 from cgostic/master
Browse files Browse the repository at this point in the history
Final updates to visualizations, edits on report
  • Loading branch information
Keanna-K authored Feb 8, 2020
2 parents 8e20ffb + af679ce commit 84fc446
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 35 deletions.
24 changes: 12 additions & 12 deletions docs/05_generate_report.Rmd

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions docs/05_generate_report.html

Large diffs are not rendered by default.

Binary file modified results/ngram_length_counts.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified results/train_val_error.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 6 additions & 6 deletions scripts/03_EDA.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,18 @@ def main(file_path_raw, file_path_pro, accepted_plates_csv, rejected_plates_csv,
# Add column with length of n-gram
counts['ng_length'] = counts['ngrams'].str.len()

n_g_len_chart = (alt.Chart(counts.query('ng_length%2 == 0')).mark_bar().encode(
n_g_len_chart = (alt.Chart(counts.query('ng_length%2 == 0')).mark_bar(color = 'darkorange').encode(
x = alt.X('counts:O',
title = "Frequency of given ngram in training data"),
title = "N-gram freq. in training data", axis=alt.Axis(labelAngle=0)),
#scale=alt.Scale(domain = (0,89))),
y = alt.Y("count()", scale=alt.Scale(type='log', base=10), title = 'Ngrams with X Freq.'),
y = alt.Y("count()", scale=alt.Scale(type='log', base=10), title = 'N-grams with X freq.'),
facet = alt.Facet('ng_length:N', title = 'n-gram length')
).configure_axis(labelFontSize=15,titleFontSize=10
).configure_axis(labelFontSize=10,titleFontSize=12
).configure_header(labelFontSize=15
).configure_title(fontSize=20, anchor = 'middle'
).properties(title = "Counts of n-gram frequency by length",
width = 400,
height = 80,
width = 175,
height = 125,
# columns = 1,
background = 'white'))

Expand Down
9 changes: 7 additions & 2 deletions scripts/04_data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def main(file_path_read, filename_x_train, filename_x_validate, filename_x_test,
line = alt.Chart(tr_v_plot_df).mark_line().encode(
x = alt.X('Number of Features:Q'),
y = alt.Y('value:Q', title = 'Accuracy score'),
color = alt.Color('variable:N', legend = alt.Legend(title = "", orient = 'bottom'))
color = alt.Color('variable:N', legend = alt.Legend(title = ""))
)

point = alt.Chart(tr_v_plot_df).mark_point().encode(
Expand All @@ -127,8 +127,13 @@ def main(file_path_read, filename_x_train, filename_x_validate, filename_x_test,
(line + point + text).configure_axis(labelFontSize=15,titleFontSize=15
).configure_header(labelFontSize=15
).configure_title(fontSize=20, anchor = 'middle'
).configure_legend(
orient = 'none',
fillColor = 'white',
legendX = 475,
legendY = 250
).properties(width = 700,
background = 'white', title = 'Training and Validation Error by n-gram Range and Number of Features'
background = 'white', title = 'CV Training and Validation Error by Number of Features'
).save(file_path_write + 'train_val_error.png', scale_factor = 2)

# Train model with chosen n-gram length range (2,2)
Expand Down

0 comments on commit 84fc446

Please sign in to comment.