Skip to content

Commit

Permalink
new tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
Damien Sileo committed Jun 21, 2024
1 parent 2b0ae93 commit 3c65429
Show file tree
Hide file tree
Showing 3 changed files with 393 additions and 365 deletions.
20 changes: 17 additions & 3 deletions src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def _preprocess_chatgpt_detection(ex):
dataset_name="ColumbiaNLP/FLUTE")

strategy_qa = Classification('question',labels='answer',
dataset_name="metaeval/strategy-qa",splits=['train',None,None])
dataset_name="tasksource/strategy-qa",splits=['train',None,None])

summarize_from_feedback = MultipleChoice(get.info.post,
choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
Expand Down Expand Up @@ -951,6 +951,7 @@ def _udep_post_process(ds):
#for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
# print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")

"""
mbib_cognitive_bias = Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
mbib_fake_news = Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
mbib_gender_bias = Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
Expand All @@ -959,6 +960,7 @@ def _udep_post_process(ds):
mbib_political_bias = Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
mbib_racial_bias = Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
mbib_text_level_bias = Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
"""

robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")

Expand Down Expand Up @@ -1021,7 +1023,7 @@ def _icl_rand(x):
icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']),
dataset_name="tasksource/icl-symbol-tuning-instruct",
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char
)

space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
Expand Down Expand Up @@ -1234,4 +1236,16 @@ def _nlgraph_binarize(x):
lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long
lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long

ultrafeedback = MultipleChoice("question", choices=['response_j','reponse_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")

essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring')

argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback")

eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading")
grading__cohesion = eg('cohesion')
grading__syntax = eg('syntax')
grading__vocabulary = eg('vocabulary')
grading__phraseology = eg('phraseology')
grading__grammar = eg('grammar')
grading__conventions = eg('conventions')
20 changes: 17 additions & 3 deletions src/tasksource/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def _preprocess_chatgpt_detection(ex):
dataset_name="ColumbiaNLP/FLUTE")

strategy_qa = Classification('question',labels='answer',
dataset_name="metaeval/strategy-qa",splits=['train',None,None])
dataset_name="tasksource/strategy-qa",splits=['train',None,None])

summarize_from_feedback = MultipleChoice(get.info.post,
choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
Expand Down Expand Up @@ -951,6 +951,7 @@ def _udep_post_process(ds):
#for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
# print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")

"""
mbib_cognitive_bias = Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
mbib_fake_news = Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
mbib_gender_bias = Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
Expand All @@ -959,6 +960,7 @@ def _udep_post_process(ds):
mbib_political_bias = Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
mbib_racial_bias = Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
mbib_text_level_bias = Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
"""

robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")

Expand Down Expand Up @@ -1021,7 +1023,7 @@ def _icl_rand(x):
icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']),
dataset_name="tasksource/icl-symbol-tuning-instruct",
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char
)

space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
Expand Down Expand Up @@ -1234,4 +1236,16 @@ def _nlgraph_binarize(x):
lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long
lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long

ultrafeedback = MultipleChoice("question", choices=['response_j','reponse_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")

essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring')

argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback")

eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading")
grading__cohesion = eg('cohesion')
grading__syntax = eg('syntax')
grading__vocabulary = eg('vocabulary')
grading__phraseology = eg('phraseology')
grading__grammar = eg('grammar')
grading__conventions = eg('conventions')
Loading

0 comments on commit 3c65429

Please sign in to comment.