-
Notifications
You must be signed in to change notification settings - Fork 1
/
preprocessing.py
69 lines (57 loc) · 2.59 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from graphqlclient import GraphQLClient
import ast
import io
import os
import ruamel.yaml
# aus datenbank alle möglichen services lesen
url = 'http://ec2-3-64-237-95.eu-central-1.compute.amazonaws.com:8082/'
client = GraphQLClient(url)
result = client.execute('''query { TiltNodes(first:10000) { edges { node { meta { name language} } } } } ''')
result_dict=ast.literal_eval(result)
result_dict=result_dict["data"]["TiltNodes"]["edges"]
# aus datenbank alle möglichen third parties lesen
result_company= client.execute('''query { TiltNodes { edges { node { dataDisclosed { recipients {name} } } } } } ''')
start_indeces = [i for i in range(len(result_company)) if result_company.startswith("null", i)]
count=0
for start_idx in start_indeces:
start_idx=start_idx+count
end_idx=start_idx+4
result_company=result_company[:start_idx]+ "\'" + result_company[start_idx:end_idx] + "\'" + result_company[end_idx:]
count=count+2
result_dict_company=ast.literal_eval(result_company)
result_dict_company=result_dict_company["data"]["TiltNodes"]["edges"]
# liste enthält alle services aus tilt hub
string_services=""
list_services=[]
for r in result_dict:
if r["node"]["meta"]["language"]=="de":
name=r["node"]["meta"]["name"]
list_services.append(name)
# list enthält alle third parties aus tilt hub
for r in result_dict_company:
name=r["node"]["dataDisclosed"]
for i in range(len(name)):
recipient=name[i]["recipients"]
for j in range(len(recipient)):
if type(recipient[j])==dict:
companies=recipient[j]["name"]
list_services.append(companies)
list_services = list(dict.fromkeys(list_services))
if "null" in list_services:
list_services.remove("null")
#combine services and third parties in string
for name in list_services:
string_services= string_services+ "- [" + str(name) + "](service_company)\n"
# intent mit allen services und companies(third parties) befüllen
with open(r'data/nlu.yml', 'r', encoding = "utf-8") as yaml_file:
code = ruamel.yaml.load(yaml_file, Loader=ruamel.yaml.RoundTripLoader)
for i in range(len(code["nlu"])):
for key in code["nlu"][i]:
if key=="intent":
if code["nlu"][i][key]=="services":
code["nlu"][i]["examples"]=string_services
with open(r'data/nlu.yml', 'w', encoding = "utf-8") as yaml_file:
dump = ruamel.yaml.dump(code, default_flow_style = False, allow_unicode = True, encoding = None, Dumper=ruamel.yaml.RoundTripDumper)
yaml_file.write( dump )
# rasa train ausführen
os.system("rasa train --fixed-model-name 'de-model'")