forked from bigdata-ustc/Neural_Cognitive_Diagnosis-NeuralCD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_loader.py
94 lines (81 loc) · 3.16 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
import torch
class TrainDataLoader(object):
'''
data loader for training
'''
def __init__(self):
self.batch_size = 32
self.ptr = 0
self.data = []
data_file = 'data/train_set.json'
config_file = 'config.txt'
with open(data_file, encoding='utf8') as i_f:
self.data = json.load(i_f)
with open(config_file) as i_f:
i_f.readline()
_, _, knowledge_n = i_f.readline().split(',')
self.knowledge_dim = int(knowledge_n)
def next_batch(self):
if self.is_end():
return None, None, None, None
input_stu_ids, input_exer_ids, input_knowedge_embs, ys = [], [], [], []
for count in range(self.batch_size):
log = self.data[self.ptr + count]
knowledge_emb = [0.] * self.knowledge_dim
for knowledge_code in log['knowledge_code']:
knowledge_emb[knowledge_code - 1] = 1.0
y = log['score']
input_stu_ids.append(log['user_id'] - 1)
input_exer_ids.append(log['exer_id'] - 1)
input_knowedge_embs.append(knowledge_emb)
ys.append(y)
self.ptr += self.batch_size
return torch.LongTensor(input_stu_ids), torch.LongTensor(input_exer_ids), torch.Tensor(input_knowedge_embs), torch.LongTensor(ys)
def is_end(self):
if self.ptr + self.batch_size > len(self.data):
return True
else:
return False
def reset(self):
self.ptr = 0
class ValTestDataLoader(object):
def __init__(self, d_type='validation'):
self.ptr = 0
self.data = []
self.d_type = d_type
if d_type == 'validation':
data_file = 'data/val_set.json'
else:
data_file = 'data/test_set.json'
config_file = 'config.txt'
with open(data_file, encoding='utf8') as i_f:
self.data = json.load(i_f)
with open(config_file) as i_f:
i_f.readline()
_, _, knowledge_n = i_f.readline().split(',')
self.knowledge_dim = int(knowledge_n)
def next_batch(self):
if self.is_end():
return None, None, None, None
logs = self.data[self.ptr]['logs']
user_id = self.data[self.ptr]['user_id']
input_stu_ids, input_exer_ids, input_knowledge_embs, ys = [], [], [], []
for log in logs:
input_stu_ids.append(user_id - 1)
input_exer_ids.append(log['exer_id'] - 1)
knowledge_emb = [0.] * self.knowledge_dim
for knowledge_code in log['knowledge_code']:
knowledge_emb[knowledge_code - 1] = 1.0
input_knowledge_embs.append(knowledge_emb)
y = log['score']
ys.append(y)
self.ptr += 1
return torch.LongTensor(input_stu_ids), torch.LongTensor(input_exer_ids), torch.Tensor(input_knowledge_embs), torch.LongTensor(ys)
def is_end(self):
if self.ptr >= len(self.data):
return True
else:
return False
def reset(self):
self.ptr = 0