Implementation of "Early screening of potential breakthrough technologies with enhanced interpretability: A patent-specific hierarchical attention network model"
Our experiment setting is as follows:
pip install -r requirements.txt
from transformers import *
#PatentBERT
tokenizer = AutoTokenizer.from_pretrained("dheerajpai/patentbert")
model = AutoModelForMaskedLM.from_pretrained("dheerajpai/patentbert")
#BERT from arXiv preprint arXiv:1810.04805.
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
model = AutoModelForMaskedLM.from_pretrained("google-bert/bert-base-uncased")
#SciBERT from arXiv preprint arXiv:1903.10676.
tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
model = AutoModel.from_pretrained("allenai/scibert_scivocab_uncased")
#BioBERT
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.2")
model = AutoModelForMaskedLM.from_pretrained("dmis-lab/biobert-base-cased-v1.2")
#PubMedBERT
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
model = AutoModel.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")