-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
70 lines (47 loc) · 2.37 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import copy
import os
from typing import List
import anndata as ad
import scanpy as sc
import csv_handler as csv
import model as m
import util as u
def build_model_from_csv(filename: str, separator: str = ',', rstrip: bool = True) -> m.DataMatrix:
return m.DataMatrix.from_list_of_list(list_of_list=csv.readcsv(filename, separator=separator, rstrip=rstrip))
def build_model_from_selected_attributes(filename: str, attributes: List[str], separator: str = ',', rstrip: bool = True) -> m.DataMatrix:
return build_subset_from_selected_attributes(
build_model_from_csv(filename, separator=separator, rstrip=rstrip),
attributes
)
def bmfsa(f: str, a: List[str], s: str = ',', rs: bool = True) -> m.DataMatrix:
return build_model_from_selected_attributes(f, a, separator=s, rstrip=rs)
def build_subset_from_selected_attributes(datamatrix: m.DataMatrix, attributes: List[str]) -> m.DataMatrix:
new_samples: List[m.Sample] = list()
for sample in datamatrix.samples:
new_samples.append(m.Sample(
sample.get_datapoints([
datamatrix.attributes.index(attribute.strip()) for attribute in attributes
]),
associated_attributes=copy.deepcopy(attributes),
classlabel=sample.classlabel
))
return m.DataMatrix(
new_samples,
attributes=copy.deepcopy(attributes),
classlabels=copy.deepcopy(datamatrix.classlabels),
unique_classlabels=copy.deepcopy(datamatrix.unique_classlabels),
dataset_name=datamatrix.dataset_name
)
def bfsfa(d: m.DataMatrix, a: List[str]) -> m.DataMatrix:
return build_subset_from_selected_attributes(d, a)
def bmsa(filename: str, attributes: List[str], separator: str = ',', rstrip: bool = True) -> m.DataMatrix:
return build_model_from_selected_attributes(filename, attributes, separator=separator, rstrip=rstrip)
def read_as_anndata(list_of_list: List[List[float]], roundoff_decimal: int = 5, filename: str = None) -> ad.AnnData:
temp_folder: str = '__temp__'
complete_file_path: str = os.path.join(temp_folder, filename)
list_of_list = [[u.roundoff(value, roundoff_decimal) for value in row] for row in list_of_list]
u.create_path_if_not_exists(temp_folder)
csv.writecsv(filename, list_of_list, directory=temp_folder)
return sc.read_csv(complete_file_path)
def rad(list_of_list: List[List[float]], roundoff_decimal: int = 5, filename: str = None) -> ad.AnnData:
return read_as_anndata(list_of_list, roundoff_decimal=roundoff_decimal, filename=filename)