Skip to content

Commit

Permalink
Working on autometrics
Browse files Browse the repository at this point in the history
  • Loading branch information
NotBioWaste905 committed Nov 8, 2024
1 parent 25da331 commit df82bf2
Show file tree
Hide file tree
Showing 8 changed files with 541 additions and 122 deletions.
6 changes: 5 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ from chatsky_llm_autoconfig.autometrics.registry import AlgorithmRegistry
class DialogueSampler(DialogueGenerator)
```
2. Make sure that `input_type` and `output_type` are matching with the signature of `.invoke()` method

3. Run
```bash
poetry run python dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/run_autometrics.py
```
4. If metrics didn't drop then everything is fine. Add, commit and push as usual.

## How to Contribute
1. Make your changes and test hypothesis in the `./experiments` folder as it is described in **Conducting experiments** section
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,3 @@ def __init__(self):

def invoke(self, dialogue: Dialogue = None, graph: BaseGraph = None, topic: str = "") -> BaseGraph:
raise NotImplementedError


class CycleGraphGenerator(GraphGenerator):
def invoke(self, dialogue: Dialogue = None, graph: BaseGraph = None, topic: str = "") -> BaseGraph:

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from chatsky_llm_autoconfig.autometrics.registry import AlgorithmRegistry


@AlgorithmRegistry.register(input_type=BaseGraph, output_type=list[Dialogue])
@AlgorithmRegistry.register(input_type=BaseGraph, output_type=Dialogue)
class DialogueSampler(DialogueGenerator):

def invoke(self, graph: BaseGraph, start_node: int = 1, end_node: int = -1, topic="") -> list[Dialogue]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,66 @@
{
"2024-11-06 01:03:16.054569": {
"2024-11-08 08:36:09.361006": {
"DialogueSampler": {
"all_paths_sampled": true,
"all_utterances_present": true
"all_paths_sampled": [
true,
true,
true
],
"all_utterances_present": [
true,
false,
false
],
"all_paths_sampled_avg": 1.0,
"all_utterances_present_avg": 0.3333333333333333
}
},
"2024-11-06 12:50:44.905469": {
"2024-11-08 08:46:12.796705": {
"DialogueSampler": {
"all_paths_sampled": true,
"all_utterances_present": true
"all_paths_sampled": [
true,
true,
true
],
"all_utterances_present": [
true,
true,
false
],
"all_paths_sampled_avg": 1.0,
"all_utterances_present_avg": 0.6666666666666666
}
},
"2024-11-06 14:24:21.022803": {
"2024-11-08 08:59:00.654385": {
"DialogueSampler": {
"all_paths_sampled": true,
"all_utterances_present": true
},
"DialogueSampler2": {
"all_paths_sampled": true,
"all_utterances_present": true
"all_paths_sampled": [
true,
true,
true
],
"all_utterances_present": [
true,
true,
false
],
"all_paths_sampled_avg": 1.0,
"all_utterances_present_avg": 0.6666666666666666
}
},
"2024-11-08 09:00:40.185875": {
"DialogueSampler": {
"all_paths_sampled": [
true,
true,
true
],
"all_utterances_present": [
true,
true,
false
],
"all_paths_sampled_avg": 1.0,
"all_utterances_present_avg": 0.6666666666666666
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from chatsky_llm_autoconfig.autometrics.registry import AlgorithmRegistry
import chatsky_llm_autoconfig.sample_dialogue
import chatsky_llm_autoconfig.algorithms.dialogue_generation
import chatsky_llm_autoconfig.algorithms.dialogue_augmentation
import chatsky_llm_autoconfig.algorithms.graph_generation

import json
from chatsky_llm_autoconfig.graph import Graph, BaseGraph
from chatsky_llm_autoconfig.dialogue import Dialogue
Expand All @@ -9,11 +12,8 @@

with open("dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/data.json") as f:
test_data = json.load(f)
test_dialogue = test_data["dialogue"]
test_dialogue = Dialogue(dialogue=test_dialogue)
test_graph = test_data["graph"]
test_graph = Graph(graph_dict=test_graph)

graph_to_dialogue = test_data["graph_to_dialogue"]
dialogue_to_dialogue = test_data["dialogue_to_dialogue"]

def run_all_algorithms():
# Get all registered classes
Expand All @@ -23,28 +23,86 @@ def run_all_algorithms():
for class_ in algorithms:
class_instance = algorithms[class_]["type"]()
metrics = {}
if algorithms[class_]["input_type"] is BaseGraph:
result = class_instance.invoke(test_graph)

if algorithms[class_]["input_type"] is BaseGraph and algorithms[class_]["output_type"] is Dialogue:
metrics = {
"all_paths_sampled": all_paths_sampled(test_graph, result[0]),
"all_utterances_present": all_utterances_present(test_graph, result),
"all_paths_sampled": [],
"all_utterances_present": [],
}
for case in graph_to_dialogue:
test_dialogue = Dialogue(dialogue=case["dialogue"])
test_graph = Graph(graph_dict=case["graph"])
result = class_instance.invoke(test_graph)

metrics["all_paths_sampled"].append(all_paths_sampled(test_graph, result[0]))
metrics["all_utterances_present"].append(all_utterances_present(test_graph, result))

metrics["all_paths_sampled_avg"] = sum(metrics["all_paths_sampled"])/len(metrics["all_paths_sampled"])
metrics["all_utterances_present_avg"] = sum(metrics["all_utterances_present"])/len(metrics["all_utterances_present"])

elif algorithms[class_]["input_type"] is Dialogue and algorithms[class_]["output_type"] is Dialogue:
metrics = {
"all_roles_correct": [],
"is_correct_lenght": [],
}
for case in dialogue_to_dialogue:
test_dialogue = Dialogue(dialogue=case["dialogue"])
result = class_instance.invoke(test_dialogue)

elif algorithms[class_]["input_type"] is list[Dialogue]:
metrics["all_roles_correct"].append(all_roles_correct(test_dialogue, result))
metrics["is_correct_lenght"].append(is_correct_lenght(test_dialogue, result))

metrics["all_roles_correct_avg"] = sum(metrics["all_roles_correct"])/len(metrics["all_roles_correct"])
metrics["is_correct_lenght_avg"] = sum(metrics["is_correct_lenght"])/len(metrics["is_correct_lenght"])

elif algorithms[class_]["input_type"] is str and algorithms[class_]["output_type"] is BaseGraph:
result = class_instance.invoke(test_dialogue)
metrics = {

}

total_metrics[class_] = metrics

return total_metrics

def compare_results(date, old_data):

current_metrics = old_data.get(date, {})
previous_date = list(old_data.keys())[-2]
previous_metrics = old_data.get(previous_date, {})

differences = {}
for algorithm, metrics in current_metrics.items():
if algorithm in previous_metrics:
differences[algorithm] = {
"all_paths_sampled_diff": metrics.get("all_paths_sampled_avg", 0) - previous_metrics[algorithm].get("all_paths_sampled_avg", 0),
"all_utterances_present_diff": metrics.get("all_utterances_present_avg", 0) - previous_metrics[algorithm].get("all_utterances_present_avg", 0),
"all_roles_correct_diff": metrics.get("all_roles_correct_avg", 0) - previous_metrics[algorithm].get("all_roles_correct_avg", 0),
"is_correct_length_diff": metrics.get("is_correct_lenght_avg", 0) - previous_metrics[algorithm].get("is_correct_lenght_avg", 0),
"total_diff": (
metrics.get("all_paths_sampled_avg", 0) +
metrics.get("all_utterances_present_avg", 0) +
metrics.get("all_roles_correct_avg", 0) +
metrics.get("is_correct_lenght_avg", 0)
) - (
previous_metrics[algorithm].get("all_paths_sampled_avg", 0) +
previous_metrics[algorithm].get("all_utterances_present_avg", 0) +
previous_metrics[algorithm].get("all_roles_correct_avg", 0) +
previous_metrics[algorithm].get("is_correct_lenght_avg", 0)
)
}

return differences


if __name__ == "__main__":
with open("dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/results/results.json") as f:
old_data = json.load(f)

new_metrics = {str(datetime.datetime.now()): run_all_algorithms()}

date = str(datetime.datetime.now())
new_metrics = {date: run_all_algorithms()}
old_data.update(new_metrics)
print(compare_results(date, old_data))

with open("dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/results/results.json", "w") as f:
f.write(json.dumps(old_data, indent=2, ensure_ascii=False))
Loading

0 comments on commit df82bf2

Please sign in to comment.