Note
Click here to download the full example code or to run this example in your browser via Binder
beach water quality
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-datasets/envs/latest/lib/python3.7/site-packages/sklearn/experimental/enable_hist_gradient_boosting.py:17: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
"Since version 1.0, "
**********Tensorflow models could not be imported **********
python version: 3.7.9 (default, Oct 19 2020, 15:13:17)
[GCC 7.5.0]
os version: posix
ai4water version: 1.06
easy_mpl version: 0.21.2
SeqMetrics version: 1.3.4
numpy version: 1.21.6
pandas version: 1.2.3
matplotlib version: 3.5.3
joblib version: 1.2.0
data = busan_beach(target=['ecoli', 'sul1_coppml', 'aac_coppml',
'tetx_coppml', 'blaTEM_coppml'])
print(data.shape)
(1446, 18)
data.isna().sum()
tide_cm 0
wat_temp_c 0
sal_psu 0
air_temp_c 0
pcp_mm 0
pcp3_mm 0
pcp6_mm 0
pcp12_mm 0
wind_dir_deg 0
wind_speed_mps 0
air_p_hpa 0
mslp_hpa 0
rel_hum 0
ecoli 1279
sul1_coppml 1228
aac_coppml 1228
tetx_coppml 1228
blaTEM_coppml 1228
dtype: int64
data.isna().sum()
tide_cm 0
wat_temp_c 0
sal_psu 0
air_temp_c 0
pcp_mm 0
pcp3_mm 0
pcp6_mm 0
pcp12_mm 0
wind_dir_deg 0
wind_speed_mps 0
air_p_hpa 0
mslp_hpa 0
rel_hum 0
ecoli 1279
sul1_coppml 1228
aac_coppml 1228
tetx_coppml 1228
blaTEM_coppml 1228
dtype: int64
eda = EDA(data, save=False)
eda.heatmap()
<AxesSubplot:ylabel='Examples'>
_ = eda.plot_missing()
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-datasets/envs/latest/lib/python3.7/site-packages/ai4water/eda/_main.py:377: UserWarning: FixedFormatter should only be used together with FixedLocator
ax1.set_yticklabels(ax1.get_yticks(), fontsize="18")
# _ = eda.plot_data(subplots=True, max_cols_in_plot=20, figsize=(14, 20))
#
# ###########################################################
eda.plot_data(subplots=True, max_cols_in_plot=20, figsize=(14, 20),
ignore_datetime_index=True)
_ = eda.plot_histograms()
eda.box_plot(max_features=18, palette="Set3")
<AxesSubplot:>
eda.box_plot(max_features=18, palette="Set3", violen=True)
<AxesSubplot:>
eda.correlation(figsize=(14, 14))
# ###########################################################
#
#
# eda.grouped_scatter(max_subplots=18)
<AxesSubplot:>
_ = eda.autocorrelation(n_lags=15)
cannot plot autocorrelation for ecoli feature
cannot plot autocorrelation for sul1_coppml feature
cannot plot autocorrelation for aac_coppml feature
_ = eda.partial_autocorrelation(n_lags=15)
cannot plot autocorrelation for ecoli feature
cannot plot autocorrelation for sul1_coppml feature
cannot plot autocorrelation for aac_coppml feature
_ = eda.lag_plot(n_lags=14, s=0.4)
_ = eda.plot_ecdf(figsize=(10, 14))
eda.normality_test()
Total running time of the script: ( 0 minutes 38.103 seconds)