-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexampleGerman.py
73 lines (49 loc) · 2.84 KB
/
exampleGerman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from core.discovery import get_graphs
import networkx as nx
import pandas as pd
import os
from langchain.chat_models import ChatOpenAI
def load_germandataset(nodes):
'''
read dataset and preprocessing for german credit dataset
return data only for the nodes
'''
df = pd.read_csv("data/german_data_credit_dataset.csv")
#create quickaccess list with categorical variables labels
catvars = ['existingchecking', 'credithistory', 'purpose', 'savings', 'employmentsince',
'statussex', 'otherdebtors', 'property', 'otherinstallmentplans', 'housing', 'job',
'telephone', 'foreignworker']
#create quickaccess list with numerical variables labels
numvars = ['creditamount', 'duration', 'installmentrate', 'residencesince', 'age',
'existingcredits', 'peopleliable', 'classification']
# Binarize the target 0 = 'bad' credit; 1 = 'good' credit
df.classification.replace([1,2], [1,0], inplace=True)
# dic categories Index(['A11', 'A12', 'A13', 'A14'], dtype='object')
dict_categorical = {}
for c in catvars:
dict_categorical[c] = list(df[c].astype("category").cat.categories)
df[c] = df[c].astype("category").cat.codes
# create gender variable 1= female 0 = male
df.loc[df["statussex"] == 0, "gender"] = 0
df.loc[df["statussex"] == 1, "gender"] = 1
df.loc[df["statussex"] == 2, "gender"] = 0
df.loc[df["statussex"] == 3, "gender"] = 0
df.loc[df["statussex"] == 4, "gender"] = 1
# all features as float
df = df.astype("float64")
df["classification"] = df["classification"].astype("int32")
# save codes
with open('dict_german.txt', 'w') as f:
f.write(str(dict_categorical))
return df[nodes]
descriptions = {
"gender": "Individual's gender, potentially influencing risk profile, financial inclusion, and product design requirements in credit lending.",
"age": "Individual's age, affecting eligibility, health status, and financial behavior which could impact credit risk assessment.",
"creditamount": "Requested loan amount, providing insight into the individual's borrowing needs and repayment capacity.",
"duration": "Loan repayment period, reflecting the loan term from disbursement to final payment, crucial in assessing repayment capability.",
"classification": "Derived prediction of creditworthiness based on other features, used to gauge the individual's risk level for lenders and estimate probability of timely repayment or default. This feature cannot cause any variation in other features.",
}
df = load_germandataset(["gender", "age", "creditamount", "duration", "classification"])
immutable_features = ["gender", "age"]
llm = ChatOpenAI(temperature= 0, model="gpt-4")
result = get_graphs(df, descriptions, immutable_features, "credit lending in germany", "classification", "results", llm)