-
Notifications
You must be signed in to change notification settings - Fork 20
/
transcript-format.js
162 lines (137 loc) · 5.22 KB
/
transcript-format.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// This command is used to format the transcript files
// Useage
// Use command: node transcript-format [add transcript filename]
'use strict';
const fs = require('fs');
const dir = './src/transcripts/';
const panelists = require('./content/panelists.json');
const guests = require('./content/episodes.json');
const { functionsIn } = require('lodash');
let newContent;
let fileName = process.argv.slice(2)[0];
const guestNames = [];
// if fileName doesn't have .js file extension add it
if (!fileName.includes('.js')) {
fileName = fileName + '.js';
}
// get the file content
newContent = fs.readFileSync(dir + fileName, 'utf-8');
// remove transcribed by otter.ai
newContent = newContent.replace('Transcribed by https://otter.ai', '</p>');
// replace panelist names to formatt
//find panelist name strings
panelists.forEach(function(string) {
const panelistName = string.name;
const formatName = newContent.match(panelistName + ' ', 'g');
if (formatName !== null) {
formatName.forEach(check => {
const reg = new RegExp(check, 'g');
newContent = newContent.replace(reg, `</p>\n<p><strong>${panelistName}</strong><br />`);
});
}
});
// build an array of all guest names
guests.forEach(function(data) {
const guests = data.guests;
// only get names for for episodes with guests
if (guests.length !== 0) {
guests.forEach(function(guest) {
const guestName = guest.name;
if (!guestNames.includes(guestName)) {
guestNames.push(guestName);
}
});
}
});
//find guest name strings
guestNames.forEach(function(string) {
const guestName = string;
const formatGuestName = newContent.match(guestName + ' ', 'g');
if (formatGuestName !== null) {
formatGuestName.forEach(check => {
const regGuest = new RegExp(check, 'g');
newContent = newContent.replace(regGuest, `</p>\n<p><strong>${guestName}</strong><br />`);
});
}
});
// find and format references to All
const all = newContent.match(/All /g);
// check if there are twitter names in the transcript
if(all !== null) {
all.forEach(allString => {
newContent = newContent.replace(allString, `</p>\n<p><strong>All</strong><br />`);
});
}
// fix weird references to Ryan's twitter
const ryanTwitter = ['Burgess D Ryan'];
ryanTwitter.forEach(function(string) {
newContent = newContent.replace(string, '@burgessdryan');
});
// Fix references to Jem's name
const jemName = ['Jim', 'Gem', 'Gem.', 'Jim.', 'Gem,'];
jemName.forEach(function(jemString) {
newContent = newContent.replace(jemString, 'Jem');
});
// Fix references to Mars' name
const marsName = newContent.match(/Mars Julian/g);
if(marsName !== null) {
marsName.forEach(marsString => {
newContent = newContent.replace(marsString, `Mars Jullian`);
});
}
// Fix references to Augustus' name
const augName = newContent.match(/Augustus Yun/g);
if(augName !== null) {
augName.forEach(augString => {
newContent = newContent.replace(augString, `Augustus Yuan`);
});
}
// replace weird transcription errors
//website strings
const websiteStrings = ['front end happy hour comm', 'front happier.com', 'front and happy hour dot com', 'front end happier, calm'];
websiteStrings.forEach(function(string) {
newContent = newContent.replace(string, 'FrontEndHappyHour.com');
});
// replace references to front end h h to make it a twitter handle
const fehhTwitterStrings = [' front end hh', ' front end h h', ' front end, h h', ' front end HGH', 'front end, H H', 'front end ah ah'];
fehhTwitterStrings.forEach(function(string) {
newContent = newContent.replace(string, ' @frontendhh');
});
// replace chatGPT references
const chatGPTStrings = [' chat GVT', ' chat GPT', ' Chaturthi GPT', ' chat DBT'];
chatGPTStrings.forEach(function(string) {
newContent = newContent.replace(string, ' ChatGPT');
});
// create website link for FrontEndHappyHour.com
newContent = newContent.replace('FrontEndHappyHour.com', `<a href="https://frontendhappyhour.com">FrontEndHappyHour.com</a>`);
// create twitter links
const twitter = newContent.match(/@\w*/g);
// check if there are twitter names in the transcript
if(twitter !== null) {
twitter.forEach(twitterName => {
let noAt = twitterName.replace('@', '');
newContent = newContent.replace(twitterName, `<a href="https://twitter.com/${noAt}">${twitterName}</a>`);
});
}
// remove the first </p> tag
newContent = newContent.replace('</p>', '');
// add a console message if a speaker was missed and wasn't labeled
const unknownSpeaker = newContent.match(/Unknown Speaker /g);
if (unknownSpeaker !== null) {
// check to see how many unknown speakers were idenified.
if (unknownSpeaker.length <= 1) {
console.log(`Unknown speaker identified`);
}else {
console.log(`Unknown speaker identified ${unknownSpeaker.length} times`);
}
}else {
// If there are no unknown speakers identified finish content and wrap JS module
// add JS module exports to wrap content
newContent = `module.exports = function() {
const transcript =\`
${newContent}\`;
return transcript;
};`
}
// save updated content
fs.writeFileSync(dir + fileName, newContent, 'utf-8');