Skip to content

Commit

Permalink
Fix params. Add Adanced Settings expander.
Browse files Browse the repository at this point in the history
  • Loading branch information
BSalita committed Jan 26, 2024
1 parent 76251d3 commit 8cb3d99
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 41 deletions.
45 changes: 23 additions & 22 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ def app_info():
st.caption(f"Project lead is Robert Salita research@AiPolice.org. Code written in Python. UI written in Streamlit. AI API is OpenAI. Data engine is Pandas. Query engine is Duckdb. Chat UI uses streamlit-chat. Self hosted using Cloudflare Tunnel. Repo:https://github.com/BSalita/Bridge_Game_Postmortem_Chatbot Club data scraped from public ACBL webpages. Tournament data from ACBL API.")
# fastai:{fastai.__version__} pytorch:{fastai.__version__} sklearn:{sklearn.__version__} safetensors:{safetensors.__version__}
st.caption(
f"App:{st.session_state.app_datetime} Python:{'.'.join(map(str, sys.version_info[:3]))} Streamlit:{st.__version__} Pandas:{pd.__version__} duckdb:{duckdb.__version__} Default AI model:{DEFAULT_AI_MODEL} OpenAI client:{openai.__version__} Query Params:{st.query_params}")
f"App:{st.session_state.app_datetime} Python:{'.'.join(map(str, sys.version_info[:3]))} Streamlit:{st.__version__} Pandas:{pd.__version__} duckdb:{duckdb.__version__} Default AI model:{DEFAULT_AI_MODEL} OpenAI client:{openai.__version__} Query Params:{st.query_params.to_dict()}")


def create_sidebar():
Expand Down Expand Up @@ -1099,27 +1099,26 @@ def create_sidebar():
ups.append(up)
ask_questions_without_context(ups, st.session_state.ai_api)

st.sidebar.divider()
st.sidebar.write('Advanced Settings')
with st.sidebar.expander('Advanced Settings', False):

if st.session_state.debug_favorites is not None:
# favorite prompts selectboxes
st.session_state.debug_player_number_names = st.session_state.debug_favorites[
'SelectBoxes']['Player_Numbers']['options']
if len(st.session_state.debug_player_number_names):
st.sidebar.selectbox("Debug Player List", options=st.session_state.debug_player_number_names, placeholder=st.session_state.debug_favorites['SelectBoxes']['Player_Numbers']['placeholder'],
on_change=debug_player_number_names_change, key='debug_player_number_names_selectbox')
if st.session_state.debug_favorites is not None:
# favorite prompts selectboxes
st.session_state.debug_player_number_names = st.session_state.debug_favorites[
'SelectBoxes']['Player_Numbers']['options']
if len(st.session_state.debug_player_number_names):
st.selectbox("Debug Player List", options=st.session_state.debug_player_number_names, placeholder=st.session_state.debug_favorites['SelectBoxes']['Player_Numbers']['placeholder'],
on_change=debug_player_number_names_change, key='debug_player_number_names_selectbox')

st.sidebar.checkbox(
"Ninja Coder Mode (Show SQL Queries)", on_change=show_sql_query_change, key='sql_query_checkbox')
st.checkbox(
"Ninja Coder Mode (Show SQL Queries)", on_change=show_sql_query_change, key='sql_query_checkbox')

if len(st.session_state.ai_apis):
st.sidebar.selectbox("AI API Model Used for Prompts", index=st.session_state.ai_apis.index(st.session_state.ai_api),options=st.session_state.ai_apis,
on_change=ai_api_selectbox_change, key='ai_api_selectbox')
if len(st.session_state.ai_apis):
st.selectbox("AI API Model Used for Prompts", index=st.session_state.ai_apis.index(st.session_state.ai_api),options=st.session_state.ai_apis,
on_change=ai_api_selectbox_change, key='ai_api_selectbox')

# Not at all fast to calculate. approximately .25 seconds per unique pbn overhead is minimum + .05 seconds per observation per unique pbn. e.g. time for 24 boards = 24 * (.25 + num of observations * .05).
st.sidebar.number_input("Single Dummy Random Trials", min_value=1, max_value=100,
value=st.session_state.sd_observations, on_change=sd_observations_changed, key='sd_observations_number_input')
# Not at all fast to calculate. approximately .25 seconds per unique pbn overhead is minimum + .05 seconds per observation per unique pbn. e.g. time for 24 boards = 24 * (.25 + num of observations * .05).
st.number_input("Single Dummy Random Trials", min_value=1, max_value=100,
value=st.session_state.sd_observations, on_change=sd_observations_changed, key='sd_observations_number_input')


def create_tab_bar():
Expand Down Expand Up @@ -1397,10 +1396,12 @@ def main():
# obsolete? st.experimental_set_query_params(player_number=2663279)
# http://localhost:8501/?player_number=2663279
if 'player_number' in st.query_params:
player_number_l = st.query_params['player_number']
assert isinstance(player_number_l, list) and len(
player_number_l) == 1, player_number_l
player_number = player_number_l[0]
player_number = st.query_params['player_number']
if not isinstance(player_number, str):
st.stop()
#assert isinstance(player_number_l, list) and len(
# player_number_l) == 1, player_number_l
#player_number = player_number_l[0]
if not chat_initialize(player_number, None):
st.stop()

Expand Down
65 changes: 46 additions & 19 deletions mlBridgeLib/mlBridgeAi.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,42 @@

import pandas as pd
import os
from fastai.tabular.all import nn, load_learner, tabular_learner, cont_cat_split, TabularDataLoaders, RegressionBlock, Categorify, FillMissing, Normalize, EarlyStoppingCallback, RandomSplitter, range_of, MSELossFlat, rmse, accuracy
from fastai.tabular.all import nn, load_learner, tabular_learner, cont_cat_split, TabularDataLoaders, TabularPandas, CategoryBlock, RegressionBlock, Categorify, FillMissing, Normalize, EarlyStoppingCallback, RandomSplitter, range_of, MSELossFlat, rmse, accuracy

def train_classifier(df, y_names, cat_names, cont_names, procs=None, valid_pct=0.2, seed=42, bs=1024*5, layers=[512,512,512], epochs=3, device='cuda'):
splits_ilocs = RandomSplitter(valid_pct=valid_pct, seed=seed)(range_of(df))
to = TabularPandas(df, procs=procs,
cat_names=cat_names,
cont_names=cont_names,
y_names=y_names,
splits=splits_ilocs,
#num_workers=10,
y_block=CategoryBlock())

# Create a DataLoader
dls = to.dataloaders(bs=bs, layers=layers, device=device) # cpu or cuda

# Create a tabular learner
learn = tabular_learner(dls, metrics=accuracy)

# Train the model
learn.fit_one_cycle(epochs) # 1 or 2 epochs is enough to get a good accuracy for large datasets
return to, dls, learn

def load_data(df, y_names=None, cont_names=None, cat_names=None, procs=None, y_block=None, bs=None, valid_pct=None, max_card=None, device='cpu'):
def load_data(df, y_names=None, cont_names=None, cat_names=None, procs=None, y_block=None, bs=None, layers=[1024]*4, valid_pct=None, seed=42, max_card=None, device='cuda'):
"""
Load and preprocess data using FastAI.
"""

print(f"{y_names=} {cont_names=} {cat_names=} {bs=} {valid_pct=} {max_card=}")
# Determine number of CPU cores and set workers to cores-1
num_workers = os.cpu_count() - 1
print(f"{y_names=} {bs=} {valid_pct=} {num_workers=}")
if cont_names is not None:
print(f"{len(cont_names)=} {cont_names=}")
if cat_names is not None:
print(f"{len(cat_names)=} {cat_names=}")
assert df.select_dtypes(include=['object','string']).columns.size == 0, df.select_dtypes(include=['object','string']).columns
# doesn't work for Contract. assert df.select_dtypes(include=['object','string']).columns.size == 0, df.select_dtypes(include=['object','string']).columns
assert not df.isna().any().any()
assert y_names in df, y_names

Expand All @@ -31,37 +53,42 @@ def load_data(df, y_names=None, cont_names=None, cat_names=None, procs=None, y_b
assert df[cont_names].select_dtypes(include=['category']).columns.size == 0, df[cont_names].select_dtypes(include=['category']).columns

# Split the data into training and validation sets
splits = RandomSplitter(valid_pct=valid_pct)(range_of(df))
splits_ilocs = RandomSplitter(valid_pct=valid_pct, seed=seed)(range_of(df))
#display(df.iloc[splits_ilocs[0]])
#display(df.iloc[splits_ilocs[1]])

# Load data into FastAI's TabularDataLoaders
# todo: experiment with specifying a dict of Category types for cat_names: ordinal_var_dict = {'size': ['small', 'medium', 'large']}
# todo: accept default class of y_block. RegressionBlock for regression, CategoryBlock for classification.

dls = TabularDataLoaders.from_df(df, y_names=y_names,
cat_names=cat_names, cont_names=cont_names, procs=procs, y_block=y_block,
bs=bs, splits=splits, num_workers=num_workers, device=device)
to = TabularPandas(df, procs=procs,
cat_names=cat_names,
cont_names=cont_names,
y_names=y_names,
splits=splits_ilocs,
#num_workers=10,
y_block=y_block,
)

return dls
dls = to.dataloaders(bs=bs, layers=layers, device=device) # cpu or cuda

return dls # return to?

def train_classification(dls, epochs=20, layers=[1024]*4, monitor='accuracy', min_delta=0.001, patience=3):
def train_classification(dls, epochs=3, monitor='accuracy', min_delta=0.001, patience=3):
"""
Train a tabular model for classification.
"""
print(f"{epochs=} {layers=} {monitor=} {min_delta=} {patience=}")
# todo: check that y_names is category, not numeric.
print(f"{epochs=} {monitor=} {min_delta=} {patience=}")

learn = tabular_learner(dls, layers=layers, metrics=accuracy)
# Create a tabular learner
learn = tabular_learner(dls, metrics=accuracy)

# Use mixed precision training. slower and error.
# error: Can't get attribute 'AMPMode' on <module 'fastai.callback.fp16'
#learn.to_fp16() # to_fp32() or to_bf16()

# Use one cycle policy for training with early stopping
learn.fit_one_cycle(epochs, cbs=EarlyStoppingCallback(monitor=monitor, min_delta=min_delta, patience=patience)) # todo: experiment with using lr_max?
# Train the model
learn.fit_one_cycle(epochs) #, cbs=EarlyStoppingCallback(monitor=monitor, min_delta=min_delta, patience=patience)) # sometimes only a couple epochs is optimal

return learn

def train_regression(dls, epochs=20, layers=[200]*10, y_range=(0,1), monitor='valid_loss', min_delta=0.001, patience=3):
def train_regression(dls, epochs=20, layers=[200]*10, y_range=(0,1), monitor='valid_loss', min_delta=0.001, patience=3):
"""
Train a tabular model for regression.
"""
Expand Down

0 comments on commit 8cb3d99

Please sign in to comment.