main.py

# -*- coding: utf-8 -*-
"""Main.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1OqjgZ6FVALYKFULSFyB5Y13CcysCf2qX
"""

import pandas as pd
import preprocessing_data
import generate_answers_allennlp
import generate_distractors_conceptnet
import generate_questions_t5
import generate_distractors_wordnet
import keyword_exctraction
import bdg_distractor_generation
import get_distractors__sense2vec

"""# Read text Data"""

def read_data(path):
  with open(path, 'rb') as f:
      skillString = f.read()
  skillString = skillString.decode("utf-8")
  return skillString

path = 'ML.txt'

if __name__ == '__main__':
  ## Call Preprocessing Functions
  text=read_data(path)
  text= preprocessing_data.lower_casing_text(text)
  text= preprocessing_data.remove_emailFormate(text)
  text= preprocessing_data.replace_comma(text)
  text= preprocessing_data.expand_contractions(text, contraction_mapping =  CONTRACTION_MAP)
  text= preprocessing_data.removing_Num_between_brackets(text)
  text= preprocessing_data.removing_special_characters(text)
  #text= preprocessing_data.removing_stopwords(text)
  text= preprocessing_data.spelling_correction(text)
  text= preprocessing_data.lemmatization(text)
  ###########################################################
  # Call Functions from Keyword_exctractions file to generate sentance Mapping
  keyword_sentence_mapping=keyword_exctraction.keyword_sentenceMapping(text)
  ############################################################
  # Generate Questions using T5 Model and Answers using keywords
  questions=[]
  contexts=[]
  answers=[]
  key_distractor_list = {}
  for k , list_of_values in keyword_sentence_mapping.items():
    for v in list_of_values:
      context = v
      answer = k
      question_model, question_tokenizer= generate_questions_t5.question_model_tokenizer()
      ques= generate_questions_t5.get_question(context,answer,question_model,question_tokenizer)
      questions.append(ques)
      contexts.append(context)
      answers.append(answer)
  ###################################################################
  # Get anthor answers using AllenNLP model
  for i in range(len(questions)):
    allen_Answer= generate_answers_allennlp.get_Answers_Allen()
  ##################################################################
    # Get Distractors using model 
    tokenizer = bdg_distractor_generation.tokenizer
    model = bdg_distractor_generation.model
    dg_model = bdg_distractor_generation.dg_model
    dg_model_pm = bdg_distractor_generation.dg_model_pm
    dg_model_both = bdg_distractor_generation.dg_model_both
    all_options = bdg_distractor_generation.get_all_Options()
    distractor = selection(contexts[i], questions[i], allen_Answer, all_options)

    Allen_answers=[]
    Questions=[]
    Contexts=[]
    Distractors=[] 
    Allen_answers.append(allen_Answer)
    Questions.append(questions[i])
    Contexts.append(contexts[i])
    Distractors.append(distractor)
  ############################################################################  
  # Create DataFrame 
  df = pd.DataFrame()
  df['context']=Contexts
  df['question']=Questions
  df['Answer']=Allen_answers
  df['Distractors']=Distractors
  df.head()