Class OpenStat.Data2
Exploratory Data Analysis of multiple variables
import numpy as np
import OpenStat as sta
dataset=np.loadtxt('filename.txt')
features=['f1','f2',...,'fn']
D=sta.data2(dataset)
Attributes
Attributes
-----------
dataset : dataset (array)
dataset_df: dataset (Dataframe)
features: features names (list)
d: features objects (list of sta.data1)
R: correlation matrix (array)
R_df: correlation matrix (dataframe)
r: correlation vector (array)
summary: summary of the dataset (dataframe)
fig: figure (fig)
ax: subplots (ax)
Methods
def __init__(self,dataset, features=None):
"""
obj: class data2
input: dataset (array)
features (list)
output: dataset (array)
dataset_df (dataframe)
features (list)
d: features content (list of data1)
R: correlation matrix (array)
R_df: correlation matrix (dataframe)
r: correlation vector (array)
"""
​
def disp_summary(self, content='reduced', output='yes'):
"""
disp_summary: display summary of dataset
input: output='yes','no': print on screen the summary
output: summary (dataframe): summary of dataset
"""
​
def plot(self,x0=0,y0=-1, hue=None, size=None, color='blue',
linecolor='red',linewidth=2, hist='no',regression='no',
pair='no', box='no'):
"""
plot: plot of the dataset
input: x0: x-feature, starting from "0" (int)
y0: y-feature, starting from "0" (int)
size: size scatter plot points (int)
color: color scatter plot points (str)
linecolor: color regression line in scatter plot (str)
linewidth: linewidth regression line in scatter plot (int)
hist='yes','no': plot histograms with scatter plot (if activated, use jointplot of seaborn) (str)
regression='yes','no': plot regression line with scatter plot (str)
pair='yes','no': plot the pairplot of seaborn (str)
box='yes','no': plot the boxplot of seaborn (str)
output: fig: figure (fig)
ax: subplot (ax)
"""
​
def plot_corr(self,heatmap='no',output='yes'):
"""
plot_corr: print/plot correlations
input: heatmap='yes','no': plot the heatmap of the correlation matrix (str)
output='yes','no': print the correlation matrix (str)
output: fig: figure (fig)
ax: subplot (ax)
"""
​