-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport_ethnicity.m
81 lines (69 loc) · 1.82 KB
/
import_ethnicity.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
%% Load data
filename = 'test3_stats.xlsx';
sheet = 1;
xlRange = 'A1:B616';
subsetA = xlsread(filename,sheet,xlRange);
%% Clean Data
nf = 6; % number of features loaded
subsetA = subsetA(~isnan(subsetA));
subsetA_M = reshape(subsetA',[6, 77])';
mean_ethn = mean(subsetA_M,1);
std_ethn = std(subsetA_M, 1);
%% First visualization
%% mean and standard deviation
figure
scatter(1:6,mean_ethn, 'filled');
hold on
scatter(1:6,std_ethn, 'filled');
%[a, b]= sort(std_ethn);
%% ethnicity distribution per CA
figure
plot(1:77,subsetA_M(:,3:5),'LineWidth', 1.5);
legend('black', 'hispanic', 'white');
figure
plot(1:77,subsetA_M(:,[2,6]), 'LineWidth', 1.5);
legend('asian', 'other');
%% Normalization
subsetA_M = bsxfun (@rdivide, subsetA_M, subsetA_M(:,1));
%% Features selection
subsetA_M = subsetA_M(:, [1, 3 , 4, 5]);
%% Get adjacency_matrix W from a exponential kernel
t = 1e-5;
std = 1e-1 * .5; %1e4*0.8;
W = get_adjacency_matrix(subsetA_M, std, t);
imagesc(W);
%% Graph initialization
G = gsp_graph(W,subsetA_M);
G = gsp_compute_fourier_basis(G);
G = gsp_jtv_graph(G,size(X,2));
%% Compute spectrum
X_GFT = gsp_gft(G, X);
X_JFT = gsp_jft(G, X);
%% Visualization
%% - GFT
figure
gsp_plot_signal_spectral(G, mean(X_GFT,2))
%% - JFT
X_JFT = X_JFT - mean(X_JFT(:));
figure
param = struct;
param.logscale = 1; % log/db scale
param.dim = '3d'; % plot a 3D figure
% param.NFFT = 1024; % upsample the number of frequency points in the time domain
gsp_plot_jft(G, X_JFT, param);
xlabel('\lambda_l')
ylabel('\omega')
%% Show graph and W
figure
imagesc(W);
figure;
param.vertex_size = 100;
param.show_edges = 1;
gsp_plot_signal(G, mean(X,2),param)
%% Import Labels
fid = fopen('test4.csv');
C_headers = textscan(fid,'%s',1);
format = '%s %s';
test_data = textscan(fid, '%s', 'headerlines',1,'delimiter',',');
fclose(fid);
%%