-
Notifications
You must be signed in to change notification settings - Fork 3
/
cumulativedistribution.py
33 lines (32 loc) · 1.46 KB
/
cumulativedistribution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import numpy as np
def cumulativedistribution(input_data,bins=50):
"""
% (C) Nick Holschuh - Amherst College -- 2024 (Nick.Holschuh@gmail.com)
% This function an input dataset and calculates a cumulative distribution function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The inputs are as follows:
%
% input_data -- the array of values to use to calculate the cdf
% bins -- the number of bins to assume in calculating the cdf.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The outputs are as follows:
%
% bin_vals -- These are the x coordinates on the cumulative distribution,
% that correspond to the values on the right edge of each bin
% cdf -- This is the cumulative distribution
% pdf -- [NOT STRICTLY CORRECT] This is the percentage of the distribution
% that falls within each bin. This changes with bin size, so it is
% not a true pdf (which, in principle, has infinite bins)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%
"""
# getting data of the histogram
count,bins_vals = np.histogram(input_data, bins=bins)
# finding the PDF of the histogram using count values
pdf=count/sum(count)
# using numpy np.cumsum to calculate the CDF
# We can also find using the PDF values by looping and adding
cdf=np.cumsum(pdf)
return bins_vals[1:],cdf,pdf