-
Notifications
You must be signed in to change notification settings - Fork 1
/
reader.py
174 lines (122 loc) · 4.37 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import csv, itertools
csvfile=open('for Sequence_mining (full).csv', 'r')
# open file with questionnaire for forming sequences
sreader = csv.reader(csvfile, delimiter=';', quotechar='|')
names=sreader.next()# stores attributes' names
#cnt=0
dataList=[]
for row in sreader:
# reads all lines in the csv file
# and stores them in a dictionary with the value of an attribute
dataDic={}
attrInd=0
for cell in row:
if cell<>'':
dataDic[names[attrInd]]=int(cell)
attrInd+=1
dataList.append(dataDic)
#cnt+=1
#if cnt==20: break
csvfile.close()
def DataListToPairsList(DataList, names):
#converts dictionaries with values to values 1 or 0 for pairs
# (a,b) depending on whether a precedes b
PairsList=[]
attrs=[]
for pair in itertools.product(names,names):
if pair[0]<>pair[1]: attrs.append(pair)
for row in DataList:
lst=[]
for attr in attrs:
if attr[0] in row.keys() and attr[1] in row.keys() and row[attr[0]]<=row[attr[1]]:
lst.append(attr)
PairsList.append(lst)
return PairsList
def DataListToSequenceList(DataList, names):
#maps attributes to a sequence based on sorting them by age in an ascending order
SequenceList=[]
for row in DataList:
SequenceList.append(sorted(row.keys(), key=row.get))
return SequenceList
def DataListToSequenceList2(DataList, names):
# maps attributes to a sequence based on sorting them by age in an ascending order
# taking into account equal ages
SequenceList=[]
#names=sorted(names)
for row in DataList:
tempSeq=sorted(row.keys(), key=row.get)
sequence=[]
prevEv=''
for ev in tempSeq:
if prevEv=='':
sequence.append([ev])
#print row
elif row[prevEv]==row[ev]:
#print row
sequence[-1].append(ev)
else:
#print row
sequence.append([ev])
prevEv=ev
SequenceList.append(sequence)
return SequenceList
def PairsToContext(pairs, names, filename):
outfile=open(filename+'.txt', 'w')
attrs=[]
for pair in itertools.product(names,names):
if pair[0]<>pair[1]: attrs.append(pair)
outfile.write('\t'.join([str(attr) for attr in attrs])+'\n\n')
for el in pairs:
line=[]
for attr in attrs:
if attr in el:
line.append('1')
else: line.append('0')
outfile.write('\t'.join(line)+'\n')
outfile.close()
return []
def SequenceToSPMF(SequenceList, filename):
outfile=open(filename+'.txt', 'w')
for seq in SequenceList:
for el in seq:
if len(el)==1:
outfile.write(el[0]+' -1 ')
else:
for el2 in el:
outfile.write(el2+' ')
outfile.write('-1 ')
outfile.write('-2\n')
print filename
outfile.close()
return []
def AttrAndSeqFusion(SequenceList, AttrFileName):
csvfile=open(AttrFileName, 'rb')
areader = csv.reader(csvfile, delimiter=';', quotechar='|')
names=areader.next()# stores attributes' names
#cnt=0
dataList=[]
for row in areader:
# reads all lines in the csv file
# and stores in a list the value of an attribute
attrList=[]
attrInd=0
for cell in row:
attrList.append([str(names[attrInd])+'='+str(cell)])
attrInd+=1
dataList.append(attrList)
#cnt+=1
#if cnt==20: break
csvfile.close()
[dataList[i].extend(SequenceList[i]) for i in range(len(dataList))]
return dataList
def exp1():
#dl= DataListToPairsList(dataList, names)
ds= DataListToSequenceList2(dataList, names)
#PairsToContext(dl, names, 'pairs_context')
#SequenceToSPMF(ds, 'sequence_context(p_and_m)')
fl=AttrAndSeqFusion(ds,'SocialAttributes.csv')
SequenceToSPMF(fl, 'socAttrAndSeqFusion(full)')
def exp2():
ds= DataListToSequenceList2(dataList, names)
SequenceToSPMF(ds, 'Seq')
exp2()