Class OpenStat.Data1
Exploratory Data Analysis of one variable
import numpy as np
import OpenStat as sta
data=np.loadtxt('filedata.txt')
d=sta.data1(data)
Attributes
----------
data : dataset (array)
datamin: minimum
datamax: maximum
r: range
mean: sample mean
std: sample standard deviation
cov: sample coefficient of variation
Q: sample quantile
Q25: sample quantile 25%
Q50: sample quantile 50%
Q75: sample quantile 75%
median: sample median
iqr: sample interquartile
g1: sample skewness
g2: sample kurtosis
prob: sample probability
xx: domain variable
px: domain probabilities
xemp: domain empirical distribution
Pemp: empirical CDF
Qemp: empirical quantile
​
​
Methods
----------
def __init__(self,data):
input: data (array)
​
def disp_summary(self):
"""
print_summary_dataset
input:
output: print summary
​
get_cdf_emp(self):
"""
evaluate the empirical cdf
input: \n
output: self.Femp,
self.Qemp
"""
get_cov(self):
"""
evaluate the cov of data
input:
output: self.cov
"""
​
get_datamax(self):
"""
evaluate the max of the dataset
input:
output: self.datamax
"""
​
get_datamin(self):
"""
evaluate the min of the data
input:
output: self.datamin
"""
​
get_iqr(self):
"""
evaluate the interquantile iqr of the data
input:
output: self.iqr
"""
​
get_kurtosis(self, fisher=False):
"""
evaluate the kurtosis of data
input:
output: self.g2
"""
​
get_mean(self):
"""
evaluate the mean of data
input:
output: self.mean
"""
​
get_median(self):
"""
evaluate the median of the data
input:
output: self.median
"""
​
get_prob(self, lambda0, method='lower'):
"""
evaluate the probability lower/upper than a threshold lambda0
input: lambda0,
method='lower'/'upper'
output: self.prob, p(x<=x0) or p(x>=x0) \n
"""
​
get_quantile(self, p):
"""
evaluate the quantile of the data for given probability p
input: confidence (e.g. p=0.25)
output: self.Q
"""
​
get_r (self):
"""
evaluate the range of data
input:
output: self.r
"""
​
get_skewness (self):
"""
evaluate the skewness of data
input:
output: self.g1
"""
​
get_std (self):
"""
evaluate the standard deviation of the data
input:
output: self.std
"""
​
get_summary (self):
"""
evaluate the statistics of data
input:
output: different statistics
"""
​
plot_cdf_emp(self):
"""
plot_cdf_emp: plot the empirical cdf
input:
output: plot, self.fig, self.ax
​
plot_box(self,orient=None):
"""
plot_box: plot boxplot
input: orient='h', 'v' (optional) \n
output: plot
self.fig
self.ax
"""
​
plot_hist(self,stat='count',bins='auto',
color='blue',legend=False):
"""
plot histograms from data
input: stat='count', 'density', 'probability'
bins: 'auto', edges
color:
legend
output: plot
self.fig \n
self.ax \n
"""
​
plot_quantile_emp(self):
"""
plot the empirical quantile function \n
input:
output: plot
self.fig
self.ax
"""