-
Notifications
You must be signed in to change notification settings - Fork 2
/
producer.py
30 lines (28 loc) · 1.17 KB
/
producer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
## sumaling game data produced in real time
import pandas as pd
import subprocess
import time
from os import listdir
from os.path import isfile, join
#
mypath = "C:/Users/karan/OneDrive/Documents/BigData-Spring2019/Project/data/data"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
print(len(onlyfiles))
onlyfiles.sort()
for file in onlyfiles:
print(file)
df = pd.read_csv("C:/Users/karan/OneDrive/Documents/BigData-Spring2019/Project/data/data/"+file)
df = df.dropna(axis=0, subset=['player_name'])
df.count()
grouped = df.groupby('match_id')
i = 0
for name,group in grouped:
gt = grouped.get_group(name)
gt.to_csv('C:/Users/karan/OneDrive/Documents/BigData-Spring2019/Project/data/temp/names'+str(i)+'.csv', header=False)
cmd = ["C:/opt/spark/kafka_2.12-2.2.0/bin/windows/kafka-console-producer.bat", "--broker-list" ,"localhost:9092", "--topic", "test"]
f = open('C:/Users/karan/OneDrive/Documents/BigData-Spring2019/Project/data/temp/names'+str(i)+'.csv')
out = subprocess.Popen(cmd, stdin=f, stdout=subprocess.PIPE)
stdout = out.communicate()
print(stdout)
i += 1
time.sleep(2)