From b145b9698715b345595d7a24507af4553f00626d Mon Sep 17 00:00:00 2001 From: TablewareBox <1700011741@pku.edu.cn> Date: Wed, 13 Mar 2024 06:00:40 +0800 Subject: [PATCH] remove error pr (tmp) --- .../data/00_scipaper_enzyme_km/samples.jsonl | 14 -------------- .../00_scipaper_enzyme_substrate/samples.jsonl | 14 -------------- evals/registry/data/05_biochart/samples.jsonl | 15 --------------- .../data/05_biochart/samples_single.jsonl | 15 --------------- .../data/05_biochart/samples_single_gemini.jsonl | 15 --------------- 5 files changed, 73 deletions(-) delete mode 100755 evals/registry/data/00_scipaper_enzyme_km/samples.jsonl delete mode 100755 evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl delete mode 100644 evals/registry/data/05_biochart/samples.jsonl delete mode 100644 evals/registry/data/05_biochart/samples_single.jsonl delete mode 100644 evals/registry/data/05_biochart/samples_single_gemini.jsonl diff --git a/evals/registry/data/00_scipaper_enzyme_km/samples.jsonl b/evals/registry/data/00_scipaper_enzyme_km/samples.jsonl deleted file mode 100755 index 8d6e2ce091..0000000000 --- a/evals/registry/data/00_scipaper_enzyme_km/samples.jsonl +++ /dev/null @@ -1,14 +0,0 @@ -{"file_name": "../uni-finder/enzyme/km/paper/10.1007_s00425-014-2102-6.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1007_s00425-014-2102-6.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1007_s00425-014-2102-6.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1007_s00425-014-2102-6.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1007_s10725-019-00528-9.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1007_s10725-019-00528-9.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1007_s10725-019-00528-9.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1007_s10725-019-00528-9.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1016_j.bbrep.2016.11.003.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_j.bbrep.2016.11.003.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1016_j.bbrep.2016.11.003.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_j.bbrep.2016.11.003.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1016_s0005-2728__97__00090-x.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0005-2728__97__00090-x.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1016_s0005-2728__97__00090-x.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0005-2728__97__00090-x.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1016_s0021-9258__18__96277-0.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0021-9258__18__96277-0.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1016_s0021-9258__18__96277-0.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0021-9258__18__96277-0.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1016_s0021-9258__18__96427-6.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0021-9258__18__96427-6.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1016_s0021-9258__18__96427-6.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0021-9258__18__96427-6.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1016_S0076-6879__75__41082-5.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_S0076-6879__75__41082-5.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1016_S0076-6879__75__41082-5.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_S0076-6879__75__41082-5.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1016_s0141-8130__01__00188-x.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0141-8130__01__00188-x.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1016_s0141-8130__01__00188-x.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1016_s0141-8130__01__00188-x.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1021_acs.biochem.6b00536.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1021_acs.biochem.6b00536.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1021_acs.biochem.6b00536.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1021_acs.biochem.6b00536.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1080_09168451.2020.1751582.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1080_09168451.2020.1751582.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1080_09168451.2020.1751582.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1080_09168451.2020.1751582.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1080_09168451.2020.1799749.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1080_09168451.2020.1799749.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1080_09168451.2020.1799749.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1080_09168451.2020.1799749.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1104_pp.19.01225.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1104_pp.19.01225.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1104_pp.19.01225.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1104_pp.19.01225.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/10.1139_b07-081.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1139_b07-081.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/10.1139_b07-081.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/10.1139_b07-081.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/km/paper/j.1432-1033.1986.tb09548.x.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/j.1432-1033.1986.tb09548.x.pdf", "answerfile_name": "../uni-finder/enzyme/km/answer/j.1432-1033.1986.tb09548.x.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/j.1432-1033.1986.tb09548.x.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"} diff --git a/evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl b/evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl deleted file mode 100755 index ef5f199679..0000000000 --- a/evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl +++ /dev/null @@ -1,14 +0,0 @@ -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1007_s00425-014-2102-6.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1007_s00425-014-2102-6.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1007_s00425-014-2102-6.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1007_s00425-014-2102-6.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1007_s10725-019-00528-9.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1007_s10725-019-00528-9.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1007_s10725-019-00528-9.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1007_s10725-019-00528-9.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1007_s11103-006-0040-9.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1007_s11103-006-0040-9.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1007_s11103-006-0040-9.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1007_s11103-006-0040-9.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1016_j.bbrep.2016.11.003.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_j.bbrep.2016.11.003.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1016_j.bbrep.2016.11.003.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_j.bbrep.2016.11.003.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1016_s0005-2728__97__00090-x.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_s0005-2728__97__00090-x.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1016_s0005-2728__97__00090-x.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_s0005-2728__97__00090-x.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1016_s0021-9258__18__96277-0.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_s0021-9258__18__96277-0.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1016_s0021-9258__18__96277-0.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_s0021-9258__18__96277-0.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1016_s0021-9258__18__96427-6.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_s0021-9258__18__96427-6.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1016_s0021-9258__18__96427-6.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_s0021-9258__18__96427-6.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1016_S0076-6879__75__41082-5.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_S0076-6879__75__41082-5.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1016_S0076-6879__75__41082-5.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1016_S0076-6879__75__41082-5.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1021_acs.biochem.6b00536.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1021_acs.biochem.6b00536.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1021_acs.biochem.6b00536.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1021_acs.biochem.6b00536.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1080_09168451.2020.1751582.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1080_09168451.2020.1751582.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1080_09168451.2020.1751582.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1080_09168451.2020.1751582.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1080_09168451.2020.1799749.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1080_09168451.2020.1799749.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1080_09168451.2020.1799749.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1080_09168451.2020.1799749.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1104_pp.19.01225.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1104_pp.19.01225.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1104_pp.19.01225.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1104_pp.19.01225.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_10.1139_b07-081.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1139_b07-081.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_10.1139_b07-081.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_10.1139_b07-081.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} -{"file_name": "../uni-finder/enzyme/substrate/paper/s_j.1432-1033.1986.tb09548.x.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_j.1432-1033.1986.tb09548.x.pdf", "answerfile_name": "../uni-finder/enzyme/substrate/answer/s_j.1432-1033.1986.tb09548.x.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/s_j.1432-1033.1986.tb09548.x.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"} diff --git a/evals/registry/data/05_biochart/samples.jsonl b/evals/registry/data/05_biochart/samples.jsonl deleted file mode 100644 index 54089c2448..0000000000 --- a/evals/registry/data/05_biochart/samples.jsonl +++ /dev/null @@ -1,15 +0,0 @@ -{"file_name": "../bio_chart_data/10.48550_arXiv.2310.00926.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 3, which has a higher accurate score, with the graph encoder or without? \n\na) with graph encoder \nb) w/o graph encoder"}], "ideal": "a) with graph encoder"} -{"file_name": "../bio_chart_data/10.1093_bib_bbab147.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, what contributes most to the gain in cell similarity? \n\na) Gene+KEGG \nb) Gene+Reactome \nc) Gene+de novo pathway \nd) Gene+Wikipathways"}], "ideal": "d) Gene+Wikipathways"} -{"file_name": "../bio_chart_data/10.1038_s41592-023-01938-4.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. 4, in the PCA analysis, which group of cell lines is classified as intermediate? \n\na) MM001 \nb) MM031 \nc)MM057 \nd) M074"}], "ideal": "c)MM057"} -{"file_name": "../bio_chart_data/10.1371_journal. pcbi.1008379.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. 3.d, which method performs the best (has the highest AUC)? \n\na) PCOR \nb) PROB \nc) LEAP \nd) CLR"}], "ideal": "b) PROB"} -{"file_name": "../bio_chart_data/10.1101_2022.08.20.504663.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Supp Fig 1, which number is closest to the number of cells that received two perturbations? \n\na) 10000 \nb) 40000 \nc) 50000 \nd) 60000"}], "ideal": "b) 40000"} -{"file_name": "../bio_chart_data/10.1016_j.crmeth.2023.100411.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, what is the Average Rank (F1-score) of ForSyn? \n\na) 2 \nb) 4 \nc) 5 \nd) 10"}], "ideal": "a) 2"} -{"file_name": "../bio_chart_data/10.1101_gr.277488.122.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 3B, which network backbone gets the best AUPR for pre-mRNA? \n\na) Linear \nb) cycle \nc) bifurcating \nd) converging"}], "ideal": "d) converging"} -{"file_name": "../bio_chart_data/10.1038_s41467-023-37897-9.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 6 part A, what is the observed trend in the time-lapse response of the pooled malathion reporters with increasing concentrations of malathion as shown in part (a) of the figure? \n\na) increase \nb)decrease"}], "ideal": "a) increase"} -{"file_name": "../bio_chart_data/10.1101_gr.275870.121.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, which method has a lower NLL value? \n\na) pi-CNN \nb) Epi-GraphReg"}], "ideal": "b) Epi-GraphReg"} -{"file_name": "../bio_chart_data/10.1016_j.cels.2020.11.013.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "Figure 3 part A: Under the single-to-combo condition, which method has the lowest model performance (correlation)? \n\na) NN \nb) Co-exp \nc) BP \nd) CellBox"}], "ideal": "c) BP"} -{"file_name": "../bio_chart_data/10.1101_2023.03.24.533888.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "What is the impact of increasing the Kh value on the predicted signaling strengths of the gene interactions as shown in Figure S1 part A? A) It significantly increases the predicted signaling strengths. B) It does not significantly change the predicted signaling strengths. C) It significantly decreases the predicted signaling strengths. D) It causes the predicted signaling strengths to fluctuate unpredictably."}], "ideal": "C) It significantly decreases the predicted signaling strengths."} -{"file_name": "../bio_chart_data/10.1093_bib_bbab366.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 3 C in this paper, when the gene number is 8000, which one has the highest ARI score? \n\na) Raw \nb) SCORE \nc) CSN \nd) SCENIC"}], "ideal": "b) SCORE"} -{"file_name": "../bio_chart_data/10.15252_msb.202211176.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 2 part A in this paper, what trend does Uy exhibit as Ux increases? \n\na) Decrease \nb) Increase"}], "ideal": "b) Increase"} -{"file_name": "../bio_chart_data/10.1101_2022.04.17.488600.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 5 part d in this paper, which distance is greater, Alpha, Beta, or Delta? \n\na) Alpha \nb) Beta \nc) Delta"}], "ideal": "b) Beta"} -{"file_name": "../bio_chart_data/10.21203_rs.3.rs-2675584_v1.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. SR1 of this paper, in the validation MSE (Mean Squared Error) results for the SIM350 system, which model had the lowest average MSE under high noise conditions? \n\na) PHOENIX \nb) Unregularized PHOENIX \nc) OOTB (tanh) \nd) OOTB (sigmoid) E. OOTB (ReLU)"}], "ideal": "a) PHOENIX"} diff --git a/evals/registry/data/05_biochart/samples_single.jsonl b/evals/registry/data/05_biochart/samples_single.jsonl deleted file mode 100644 index 5bbaea5267..0000000000 --- a/evals/registry/data/05_biochart/samples_single.jsonl +++ /dev/null @@ -1,15 +0,0 @@ -{"file_name": "../bio_chart_data/10.48550_arXiv.2310.00926_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 3, which has a higher accurate score, with the graph encoder or without? \n\na) with graph encoder \nb) w/o graph encoder"}], "ideal": "a) with graph encoder"} -{"file_name": "../bio_chart_data/10.1093_bib_bbab147_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, what contributes most to the gain in cell similarity? \n\na) Gene+KEGG \nb) Gene+Reactome \nc) Gene+de novo pathway \nd) Gene+Wikipathways"}], "ideal": "d) Gene+Wikipathways"} -{"file_name": "../bio_chart_data/10.1038_s41592-023-01938-4_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. 4, in the PCA analysis, which group of cell lines is classified as intermediate? \n\na) MM001 \nb) MM031 \nc)MM057 \nd) M074"}], "ideal": "c)MM057"} -{"file_name": "../bio_chart_data/10.1371_journal.pcbi.1008379_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. 3.d, which method performs the best (has the highest AUC)? \n\na) PCOR \nb) PROB \nc) LEAP \nd) CLR"}], "ideal": "b) PROB"} -{"file_name": "../bio_chart_data/10.1101_2022.08.20.504663_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Supp Fig 1, which number is closest to the number of cells that received two perturbations? \n\na) 10000 \nb) 40000 \nc) 50000 \nd) 60000"}], "ideal": "b) 40000"} -{"file_name": "../bio_chart_data/10.1016_j.crmeth.2023.100411_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, what is the Average Rank (F1-score) of ForSyn? \n\na) 2 \nb) 4 \nc) 5 \nd) 10"}], "ideal": "a) 2"} -{"file_name": "../bio_chart_data/10.1101_gr.277488.122_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 3B, which network backbone gets the best AUPR for pre-mRNA? \n\na) Linear \nb) cycle \nc) bifurcating \nd) converging"}], "ideal": "d) converging"} -{"file_name": "../bio_chart_data/10.1038_s41467-023-37897-9_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 6 part A, what is the observed trend in the time-lapse response of the pooled malathion reporters with increasing concentrations of malathion as shown in part (a) of the figure? \n\na) increase \nb)decrease"}], "ideal": "a) increase"} -{"file_name": "../bio_chart_data/10.1101_gr.275870.121_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, which method has a lower NLL value? \n\na) pi-CNN \nb) Epi-GraphReg"}], "ideal": "b) Epi-GraphReg"} -{"file_name": "../bio_chart_data/10.1016_j.cels.2020.11.013_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "Figure 3 part A: Under the single-to-combo condition, which method has the lowest model performance (correlation)? \n\na) NN \nb) Co-exp \nc) BP \nd) CellBox"}], "ideal": "c) BP"} -{"file_name": "../bio_chart_data/10.1101_2023.03.24.533888_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "What is the impact of increasing the Kh value on the predicted signaling strengths of the gene interactions as shown in Figure S1 part A? A) It significantly increases the predicted signaling strengths. B) It does not significantly change the predicted signaling strengths. C) It significantly decreases the predicted signaling strengths. D) It causes the predicted signaling strengths to fluctuate unpredictably."}], "ideal": "C) It significantly decreases the predicted signaling strengths."} -{"file_name": "../bio_chart_data/10.1093_bib_bbab366_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 3 C in this paper, when the gene number is 8000, which one has the highest ARI score? \n\na) Raw \nb) SCORE \nc) CSN \nd) SCENIC"}], "ideal": "b) SCORE"} -{"file_name": "../bio_chart_data/10.15252_msb.202211176_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 2 part A in this paper, what trend does Uy exhibit as Ux increases? \n\na) Decrease \nb) Increase"}], "ideal": "b) Increase"} -{"file_name": "../bio_chart_data/10.1101_2022.04.17.488600_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 5 part d in this paper, which distance is greater, Alpha, Beta, or Delta? \n\na) Alpha \nb) Beta \nc) Delta"}], "ideal": "b) Beta"} -{"file_name": "../bio_chart_data/10.21203_rs.3.rs-2675584_v1_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. SR1 of this paper, in the validation MSE (Mean Squared Error) results for the SIM350 system, which model had the lowest average MSE under high noise conditions? \n\na) PHOENIX \nb) Unregularized PHOENIX \nc) OOTB (tanh) \nd) OOTB (sigmoid) E. OOTB (ReLU)"}], "ideal": "a) PHOENIX"} diff --git a/evals/registry/data/05_biochart/samples_single_gemini.jsonl b/evals/registry/data/05_biochart/samples_single_gemini.jsonl deleted file mode 100644 index d0ba1e2f55..0000000000 --- a/evals/registry/data/05_biochart/samples_single_gemini.jsonl +++ /dev/null @@ -1,15 +0,0 @@ -{"file_name": "../bio_chart_data/10.48550_arXiv.2310.00926_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 3, which has a higher accurate score, with the graph encoder or without? \n\na) with graph encoder \nb) w/o graph encoder"}], "ideal": "a) with graph encoder"} -{"file_name": "../bio_chart_data/10.1093_bib_bbab147_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, what contributes most to the gain in cell similarity? \n\na) Gene+KEGG \nb) Gene+Reactome \nc) Gene+de novo pathway \nd) Gene+Wikipathways"}], "ideal": "d) Gene+Wikipathways"} -{"file_name": "../bio_chart_data/10.1038_s41592-023-01938-4_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. 4, in the PCA analysis, which group of cell lines is classified as intermediate? \n\na) MM001 \nb) MM031 \nc) MM057 \nd) M074"}], "ideal": "c)MM057"} -{"file_name": "../bio_chart_data/10.1371_journal.pcbi.1008379_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. 3.d, which method performs the best (has the highest AUC)? \n\na) PCOR \nb) PROB \nc) LEAP \nd) CLR"}], "ideal": "b) PROB"} -{"file_name": "../bio_chart_data/10.1101_2022.08.20.504663_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Supp Fig 1, which number is closest to the number of cells that received two perturbations? \n\na) 10000 \nb) 40000 \nc) 50000 \nd) 60000"}], "ideal": "b) 40000"} -{"file_name": "../bio_chart_data/10.1016_j.crmeth.2023.100411_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, what is the Average Rank (F1-score) of ForSyn? \n\na) 2 \nb) 4 \nc) 5 \nd) 10"}], "ideal": "a) 2"} -{"file_name": "../bio_chart_data/10.1101_gr.277488.122_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 3B, which network backbone gets the best AUPR for pre-mRNA? \n\na) Linear \nb) cycle \nc) bifurcating \nd) converging"}], "ideal": "d) converging"} -{"file_name": "../bio_chart_data/10.1101_gr.275870.121_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Figure 2 part A, which method has a lower NLL value? \n\na) pi-CNN \nb) Epi-GraphReg"}], "ideal": "b) Epi-GraphReg"} -{"file_name": "../bio_chart_data/10.1016_j.cels.2020.11.013_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "Figure 3 part A: Under the single-to-combo condition, which method has the lowest model performance (correlation)? \n\na) NN \nb) Co-exp \nc) BP \nd) CellBox"}], "ideal": "c) BP"} -{"file_name": "../bio_chart_data/10.1101_2023.03.24.533888_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "What is the impact of increasing the Kh value on the predicted signaling strengths of the gene interactions as shown in Figure S1 part A? \na) It significantly increases the predicted signaling strengths. \nb) It does not significantly change the predicted signaling strengths. \nc) It significantly decreases the predicted signaling strengths. \nd) It causes the predicted signaling strengths to fluctuate unpredictably."}], "ideal": "c) It significantly decreases the predicted signaling strengths."} -{"file_name": "../bio_chart_data/10.1093_bib_bbab366_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 3 C in this paper, when the gene number is 8000, which one has the highest ARI score? \n\na) Raw \nb) SCORE \nc) CSN \nd) SCENIC"}], "ideal": "b) SCORE"} -{"file_name": "../bio_chart_data/10.15252_msb.202211176_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 2 part A in this paper, what trend does Uy exhibit as Ux increases? \n\na) Decrease \nb) Increase"}], "ideal": "b) Increase"} -{"file_name": "../bio_chart_data/10.1101_2022.04.17.488600_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "For Figure 5 part d in this paper, which distance is greater, Alpha, Beta, or Delta? \n\na) Alpha \nb) Beta \nc) Delta"}], "ideal": "b) Beta"} -{"file_name": "../bio_chart_data/10.21203_rs.3.rs-2675584_v1_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig. SR1 of this paper, in the validation MSE (Mean Squared Error) results for the SIM350 system, which model had the lowest average MSE under high noise conditions? \n\na) PHOENIX \nb) Unregularized PHOENIX \nc) OOTB (tanh) \nd) OOTB (sigmoid) \ne) OOTB (ReLU)"}], "ideal": "a) PHOENIX"} -{"file_name": "../bio_chart_data/10.1371_journal.pcbi.1007909_singlepage.pdf", "input": [{"role": "system", "content": "You are a highly intelligent biology scientist who answers the following multiple choice question correctly.\nOnly write the options and values down, such as 'b) 2045.1'."}, {"role": "user", "content": "In Fig 4 of this paper, Which of the following would AKTi activate, according to the signalling pathway shown in the image? \n\na) MEKi \nb) RAFi \nc) STAT6-pY641 \nd) c-JUN"}], "ideal": "c) STAT6-pY641"}