-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_xml_to_csv.py
36 lines (35 loc) · 1.57 KB
/
convert_xml_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import xmltodict
import pandas as pd
path = '/home/kosim/final_result/group_sysmon/'
directory = os.fsencode(path)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith("benign.xml") or filename.endswith("malware.xml"):
with open(path+filename, 'r') as f:
label = 0
if filename.endswith("benign.xml"):
label = 0
elif filename.endswith("malware.xml"):
label = 1
list_dict = []
my_dict = {}
str_xml = "<Root>" + f.read() + "</Root>"
str_xml = str_xml.replace('&', '&')
e = xmltodict.parse(str_xml)["Root"]["Event"]
for item in e:
for syst in item['System']:
if syst == 'Provider' or syst == 'Execution' or syst == 'TimeCreated' or syst == 'Security':
tst = item['System'][syst].keys()
for i in tst:
my_dict[syst+i.replace('@','_')] = item['System'][syst][i]
else:
my_dict[syst] = item['System'][syst]
for edata in item['EventData']:
for child in item['EventData'][edata]:
if '#text' in child:
my_dict[child['@Name']] = child['#text']
my_dict['label'] = label
list_dict.append(my_dict)
my_dict= {}
pd.DataFrame(list_dict).to_csv('/home/kosim/final_result/csv/'+filename.replace(".xml", ".csv"), index=False)