-
Notifications
You must be signed in to change notification settings - Fork 0
/
format_extrator.py
54 lines (42 loc) · 1.84 KB
/
format_extrator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import csv
import json
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
import multiprocessing
multiprocessing.set_start_method('forkserver')
def csvWriter(list):
with open("data/sorted_csv.csv", 'a') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(list)
myJsonString = open('data/train.json')
headers = ['created_at','location','followers_count','friends_count',
'listed_count','favourites_count','sentiment_class',
'sentiment_pos','sentiment_neg','retweet_count']
jsonObjects = json.load(myJsonString)
def extractFromJson(jsonObjects):
for jsonObject in jsonObjects:
jsonList = list()
jsonList.append(jsonObject['created_at'].replace("+0000 ",""))
jsonList.append(jsonObject['user']['location'])
jsonList.append(jsonObject['user']['followers_count'])
jsonList.append(jsonObject['user']['friends_count'])
jsonList.append(jsonObject['user']['listed_count'])
jsonList.append(jsonObject['user']['favourites_count'])
text = jsonObject['text']
blob = TextBlob(text,analyzer=NaiveBayesAnalyzer())
sentiment_values = blob.sentiment
jsonList.append(sentiment_values[0]) #class
jsonList.append(sentiment_values[1]) # pos
jsonList.append(sentiment_values[2]) # negative
jsonList.append(jsonObject['retweet_count'])
csvWriter(jsonList)
if __name__ == '__main__':
datasetCsv = 'data/sorted_dataset.csv'
datasetJson = 'data/train.json'
myJsonString = open(datasetJson)
csvHeaders = ['created_at', 'location', 'followers_count', 'friends_count','listed_count', 'favourites_count']
jsonObjects = json.load(myJsonString)
with open("data/nb_csv.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(headers)
outfile.close()