-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
121 lines (102 loc) · 4.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import requests
import time
import os
import json
endpoint = 'https://api.assemblyai.com/v2/transcript'
# get token from environment variable
api_token = os.environ.get('ASSEMBLY_AI_TOKEN')
content_url = os.environ.get('CONTENT_URL')
headers = {
'authorization': api_token,
'content-type': 'application/json'
}
transcript_endpoint = 'https://api.assemblyai.com/v2/transcript'
def request_transcript():
transcript_request = {
'audio_url': content_url,
#'filter_profanity': True, # Profanity Filtering https://www.assemblyai.com/docs/core-transcription#profanity-filtering
#'punctuate': True, # Automate Punctuation and Casing https://www.assemblyai.com/docs/core-transcription#automatic-punctuation-and-casing
#'language_detection': True, # Automatic Language Detection https://www.assemblyai.com/docs/core-transcription#automatic-language-detection
#'auto_highlights': True, # Detect Important Phrases and Words https://www.assemblyai.com/docs/audio-intelligence#detect-important-phrases-and-words
#'content_safety': True, # Content Moderation https://www.assemblyai.com/docs/audio-intelligence#content-moderation
#'iab_categories': True, # Topic Detection(IAB Categories) https://www.assemblyai.com/docs/audio-intelligence#topic-detection-iab-classification
#'sentiment_analysis': True, # Sentiment Analysis https://www.assemblyai.com/docs/audio-intelligence#sentiment-analysis
#'summary_type': 'bullets', # Summary bullets https://www.assemblyai.com/docs/audio-intelligence#summarization
#'summary_type': 'gist', # Summary gist
#'summary_type': 'headline', # Summary headline
#'summary_type': 'paragraph', # Summary paragraph
#'auto_chapters': True, # Automatic Chapters https://www.assemblyai.com/docs/audio-intelligence#auto-chapters
#'entity_detection': True, # Entity Detection https://www.assemblyai.com/docs/audio-intelligence#entity-detection
'dual_channel': False
}
transcript_response = requests.post(
transcript_endpoint,
json=transcript_request,
headers=headers
)
return transcript_response.json()
def make_polling_endpoint(transcript_response):
polling_endpoint = 'https://api.assemblyai.com/v2/transcript/'
polling_endpoint += transcript_response['id']
print('Polling endpoint: ' + polling_endpoint)
return polling_endpoint
def wait_for_completion(polling_endpoint):
while True:
polling_response = requests.get(polling_endpoint, headers=headers)
polling_response = polling_response.json()
if polling_response['status'] == 'completed':
return polling_response
if polling_response['status'] == 'error':
print('Error: ' + polling_response['error'])
break
print ('Status: ' + polling_response['status'])
time.sleep(5)
def get_paragraphs(polling_endpoint):
response = requests.get(polling_endpoint + '/paragraphs', headers=headers)
response = response.json()
data = []
for para in response['paragraphs']:
data.append(para)
return data
def get_sentences(polling_endpoint):
response = requests.get(polling_endpoint + '/sentences', headers=headers)
response = response.json()
data = []
for para in response['sentences']:
data.append(para)
return data
def main():
# save request transcript
transcript_response = request_transcript()
json_object = json.dumps(transcript_response, indent=4)
with open('data.request.json', 'w') as outfile:
outfile.write(json_object)
# poll and wait
polling_endpoint = make_polling_endpoint(transcript_response)
transcription = wait_for_completion(polling_endpoint)
# save transcription
json_object = json.dumps(transcription, indent=4)
with open('data.transcription.json', 'w') as outfile:
outfile.write(json_object)
# save paragraphs
paragraphs = get_paragraphs(polling_endpoint)
json_object = json.dumps(paragraphs, indent=4)
with open('data.paragraphs.json', 'w') as outfile:
outfile.write(json_object)
# save sentences
sentences = get_sentences(polling_endpoint)
json_object = json.dumps(sentences, indent=4)
with open('data.sentences.json', 'w') as outfile:
outfile.write(json_object)
# save srt
response = requests.get(polling_endpoint + '/srt', headers=headers)
response = response.text
with open('data.srt.txt', 'w') as outfile:
outfile.write(response)
# save vtt
response = requests.get(polling_endpoint + '/vtt', headers=headers)
response = response.text
with open('data.vtt.txt', 'w') as outfile:
outfile.write(response)
if __name__ == '__main__':
main()