Skip to content

Commit

Permalink
add single cell preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
Debabrata Acharya authored and Debabrata Acharya committed Jul 14, 2020
1 parent 76687ec commit 6f9ad1b
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 14,304 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,5 @@ dmypy.json
pytoolkit.iml
test.py
output/

__temp__/
_temp_/
5,899 changes: 0 additions & 5,899 deletions __temp__/J6CASJX938-cell_filtered.csv

This file was deleted.

8,390 changes: 0 additions & 8,390 deletions __temp__/J6CASJX938.csv

This file was deleted.

6 changes: 0 additions & 6 deletions mathematics.py

This file was deleted.

16 changes: 9 additions & 7 deletions preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import model
import data
import normalization as nz
import mathematics as mt
import util as u
import statistics as st
import copy
Expand All @@ -13,6 +12,7 @@
from scipy import sparse
import csv_handler as csv
import os
import math


def filter_csv_by_sd(filename: str, attr_count: int, separator: str = ',', rstrip: bool = True) -> model.DataMatrix:
Expand All @@ -31,7 +31,7 @@ def zscore_normalize(datamatrix: model.DataMatrix, roundoff: bool = True, decima
for i in range(0, datamatrix.attribute_count()):
attributes: List[float] = datamatrix.get_float_attribute_list(i)
nz_attributes: List[float] = [
mt.roundoff(nz.zscore(attr, attributes), decimal_place) for attr in attributes
u.roundoff(nz.zscore(attr, attributes), decimal_place) for attr in attributes
] if roundoff else [
nz.zscore(attr, attributes) for attr in attributes
]
Expand All @@ -55,7 +55,7 @@ def minmax_normalize(
for i in range(0, datamatrix.attribute_count()):
attributes: List[float] = datamatrix.get_float_attribute_list(i)
nz_attributes: List[float] = [
mt.roundoff(nz.minmax(attr, min(attributes), max(attributes), scaled_min, scaled_max), decimal_place) for attr in attributes
u.roundoff(nz.minmax(attr, min(attributes), max(attributes), scaled_min, scaled_max), decimal_place) for attr in attributes
] if roundoff else [
nz.minmax(attr, min(attributes), max(attributes), scaled_min, scaled_max) for attr in attributes
]
Expand Down Expand Up @@ -130,7 +130,7 @@ def fasd(datamatrix: model.DataMatrix, attr_count: int) -> model.DataMatrix:
return filter_attributes_by_sd(datamatrix, attr_count)


def filter_cells(datamatrix: model.DataMatrix, min_counts: int) -> model.DataMatrix:
def filter_cells(datamatrix: model.DataMatrix, min_counts: int, roundoff_decimal: int = 5) -> model.DataMatrix:

list_of_list: List[List[float]] = datamatrix.get_list_of_list(append_attribute_labels=False, append_classlabels=False)
cell_filtered_lol: List[Union[List[str], List[Union[float, str]]]] = list()
Expand All @@ -141,6 +141,8 @@ def filter_cells(datamatrix: model.DataMatrix, min_counts: int) -> model.DataMat
cf_filename: str = filehash + '-cell_filtered.csv'
complete_file_path: str = os.path.join(temp_folder, filename)

list_of_list = [[u.roundoff(value, roundoff_decimal) for value in row] for row in list_of_list]

u.create_path_if_not_exists(temp_folder)
pd.DataFrame(list_of_list).to_csv(complete_file_path, index=False, index_label=False, header=False)

Expand All @@ -150,7 +152,7 @@ def filter_cells(datamatrix: model.DataMatrix, min_counts: int) -> model.DataMat
sc.pp.filter_cells(sc_object, min_counts=min_counts)

for row in sc_object.X:
cell_filtered_lol.append(row.tolist())
cell_filtered_lol.append([u.roundoff(value, roundoff_decimal) for value in row.tolist()])

# delete
count: int = 0
Expand Down Expand Up @@ -187,5 +189,5 @@ def filter_cells(datamatrix: model.DataMatrix, min_counts: int) -> model.DataMat



def fc(datamatrix: model.DataMatrix, min_counts: int) -> model.DataMatrix:
return filter_cells(datamatrix, min_counts)
def fc(datamatrix: model.DataMatrix, min_counts: int, rd: int = 5) -> model.DataMatrix:
return filter_cells(datamatrix, min_counts, roundoff_decimal=rd)
8 changes: 7 additions & 1 deletion util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import string
import random
import os
import math


def hash(length=10) -> str:
Expand All @@ -19,13 +20,18 @@ def create_path_if_not_exists(path: str):
os.mkdir(path)


def roundoff(value: float, decimal_place: int) -> float:
decimal_place: float = pow(10, decimal_place)
return math.ceil(value * decimal_place) / decimal_place


def equal_lists(list1: List[float], list2: List[float]) -> bool:

if len(list1) != len(list2):
return False

for i in range(len(list1)):
if str(list1[i]) != str(list2[i]):
if not math.isclose(list1[i], list2[i], rel_tol=0.00001) and not math.isclose(list1[i], list2[i], abs_tol=0.00001):
return False

return True
Expand Down

0 comments on commit 6f9ad1b

Please sign in to comment.