[1]:
%matplotlib inline
Rainfall-runoff dataset of Laos
[2]:
# sphinx_gallery_thumbnail_number = -1
from easy_mpl import pie
from ai4water.eda import EDA
from ai4water.datasets import MtropicsLaos, ecoli_mekong
laos = MtropicsLaos(path="F:\\data\\MtropicsLaos")
**********Tensorflow models could not be imported **********
Not downloading the data since the directory
F:\data\MtropicsLaos already exists.
Use overwrite=True to remove previously saved files and download again
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\sklearn\experimental\enable_hist_gradient_boosting.py:16: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
warnings.warn(
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:901: UserWarning: preprocessing of shapefiles can not be done because no fiona installation is found.
warnings.warn("preprocessing of shapefiles can not be done because no fiona installation is found.")
precipitation
[3]:
pcp = laos.fetch_pcp()
print(pcp.shape)
#
(1665361, 1)
weather station
[4]:
w = laos.fetch_weather_station_data()
print(w.shape)
(166536, 4)
[5]:
wl, spm = laos.fetch_hydro()
print(wl.shape)
(454694, 1)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
return wl.loc[st:en], spm.loc[st:en]
[6]:
print(spm.shape)
(6428, 1)
[7]:
ecoli = laos.fetch_ecoli()
print(ecoli.shape)
(409, 1)
[8]:
print(ecoli.head())
Ecoli_mpn100
Date_Time
2011-05-25 10:00:00 NaN
2011-05-25 16:40:00 1100.0
2011-05-25 17:06:00 1400.0
2011-05-25 17:10:00 NaN
2011-05-25 17:24:00 14000.0
[9]:
print(ecoli.tail())
# # %%
ecoli_all = laos.fetch_ecoli(features='all')
print(ecoli_all.shape)
Ecoli_mpn100
Date_Time
2021-02-25 14:10:00 250.0
2021-03-07 14:10:00 200.0
2021-03-17 15:11:00 290.0
2021-03-27 15:22:00 720.0
2021-04-06 15:05:00 560.0
(409, 3)
[10]:
ecoli_all.head()
[10]:
Ecoli_LL_mpn100 | Ecoli_mpn100 | Ecoli_UL_mpn100 | |
---|---|---|---|
Date_Time | |||
2011-05-25 10:00:00 | NaN | NaN | NaN |
2011-05-25 16:40:00 | 715.0 | 1100.0 | 1550.0 |
2011-05-25 17:06:00 | 1000.0 | 1400.0 | 1900.0 |
2011-05-25 17:10:00 | NaN | NaN | NaN |
2011-05-25 17:24:00 | 9000.0 | 14000.0 | 22000.0 |
[11]:
phy_chem = laos.fetch_physiochem('T_deg')
print(phy_chem.shape)
(411, 1)
pysiochemical attributes
[12]:
phy_chem_all = laos.fetch_physiochem(features='all')
print(phy_chem_all.shape)
(411, 8)
rain gauages
[13]:
rg = laos.fetch_rain_gauges()
print(rg.shape)
(6939, 7)
regression
[14]:
df = laos.make_regression()
print(df.shape)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
return wl.loc[st:en], spm.loc[st:en]
(650741, 8)
[15]:
df.head()
[15]:
air_temp | rel_hum | wind_speed | sol_rad | water_level | pcp | susp_pm | Ecoli_mpn100 | |
---|---|---|---|---|---|---|---|---|
2011-05-25 14:00:00 | 22.536906 | 96.167816 | 7.0 | 256.0 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:06:00 | 22.537042 | 96.168046 | 6.7 | 246.2 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:12:00 | 22.537179 | 96.168276 | 6.4 | 236.4 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:18:00 | 22.537315 | 96.168506 | 6.1 | 226.6 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:24:00 | 22.537452 | 96.168736 | 5.8 | 216.8 | 133.0 | 0.0 | 0.9 | NaN |
[16]:
df = laos.make_regression(lookback_steps=30)
print(df.shape)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
return wl.loc[st:en], spm.loc[st:en]
(5948, 8)
[17]:
df.head()
[17]:
air_temp | rel_hum | wind_speed | sol_rad | water_level | pcp | susp_pm | Ecoli_mpn100 | |
---|---|---|---|---|---|---|---|---|
2011-05-25 14:00:00 | 22.536906 | 96.167816 | 7.0 | 256.0 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:06:00 | 22.537042 | 96.168046 | 6.7 | 246.2 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:12:00 | 22.537179 | 96.168276 | 6.4 | 236.4 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:18:00 | 22.537315 | 96.168506 | 6.1 | 226.6 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:24:00 | 22.537452 | 96.168736 | 5.8 | 216.8 | 133.0 | 0.0 | 0.9 | NaN |
[18]:
print(df.isna().sum())
air_temp 0
rel_hum 0
wind_speed 0
sol_rad 0
water_level 0
pcp 0
susp_pm 0
Ecoli_mpn100 5690
dtype: int64
[19]:
eda = EDA(data=df)
eda.plot_data(subplots=True, figsize=(14, 20),
ignore_datetime_index=True)
classification
[20]:
df = laos.make_classification(lookback_steps=30)
print(df.shape)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
return wl.loc[st:en], spm.loc[st:en]
(5948, 8)
[21]:
df.head()
[21]:
air_temp | rel_hum | wind_speed | sol_rad | water_level | pcp | susp_pm | Ecoli_mpn100 | |
---|---|---|---|---|---|---|---|---|
2011-05-25 14:00:00 | 22.536906 | 96.167816 | 7.0 | 256.0 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:06:00 | 22.537042 | 96.168046 | 6.7 | 246.2 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:12:00 | 22.537179 | 96.168276 | 6.4 | 236.4 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:18:00 | 22.537315 | 96.168506 | 6.1 | 226.6 | 133.0 | 0.0 | 0.9 | NaN |
2011-05-25 14:24:00 | 22.537452 | 96.168736 | 5.8 | 216.8 | 133.0 | 0.0 | 0.9 | NaN |
[22]:
print(df.isna().sum())
air_temp 0
rel_hum 0
wind_speed 0
sol_rad 0
water_level 0
pcp 0
susp_pm 0
Ecoli_mpn100 5690
dtype: int64
ecoli_mekong
[23]:
ecoli = ecoli_mekong()
print(ecoli.shape)
Not downloading the data since the directory
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\ecoli_houay_pano already exists.
Use overwrite=True to remove previously saved files and download again
Not downloading the data since the directory
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\ecoli_mekong_2016 already exists.
Use overwrite=True to remove previously saved files and download again
Not downloading the data since the directory
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\ecoli_mekong_loas already exists.
Use overwrite=True to remove previously saved files and download again
(1602, 10)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:1216: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
df = df.loc[st:en]
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:1216: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
df = df.loc[st:en]
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:1216: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
df = df.loc[st:en]
[24]:
print(ecoli.head())
station_name T EC DOpercent DO pH ORP \
Date_Time
2011-05-25 10:00:00 Houay Pano 24.11 273.0 87.5 7.34 7.67 53.5
2011-05-25 16:40:00 Houay Pano NaN NaN NaN NaN NaN NaN
2011-05-25 17:06:00 Houay Pano NaN NaN NaN NaN NaN NaN
2011-05-25 17:10:00 Houay Pano NaN NaN NaN NaN NaN NaN
2011-05-25 17:24:00 Houay Pano NaN NaN NaN NaN NaN NaN
Turbidity TSS E-coli_4dilutions
Date_Time
2011-05-25 10:00:00 13.47 NaN NaN
2011-05-25 16:40:00 1380.00 0.899281 1100.0
2011-05-25 17:06:00 1116.00 0.993190 1400.0
2011-05-25 17:10:00 1392.00 1.107880 NaN
2011-05-25 17:24:00 702.00 1.325030 14000.0
[25]:
_ = pie(df.dropna().iloc[:, -1].values.astype(int), explode=(0, 0.05))