[1]:
%matplotlib inline

Rainfall-runoff dataset of Laos

[2]:
# sphinx_gallery_thumbnail_number = -1

from easy_mpl import pie
from ai4water.eda import EDA
from ai4water.datasets import MtropicsLaos, ecoli_mekong

laos = MtropicsLaos(path="F:\\data\\MtropicsLaos")

**********Tensorflow models could not be imported **********


    Not downloading the data since the directory
    F:\data\MtropicsLaos already exists.
    Use overwrite=True to remove previously saved files and download again
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\sklearn\experimental\enable_hist_gradient_boosting.py:16: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
  warnings.warn(
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:901: UserWarning: preprocessing of shapefiles can not be done because no fiona installation is found.
  warnings.warn("preprocessing of shapefiles can not be done because no fiona installation is found.")

precipitation

[3]:
pcp = laos.fetch_pcp()
print(pcp.shape)
#
(1665361, 1)

weather station

[4]:
w = laos.fetch_weather_station_data()
print(w.shape)
(166536, 4)
[5]:
wl, spm = laos.fetch_hydro()
print(wl.shape)
(454694, 1)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  return wl.loc[st:en], spm.loc[st:en]
[6]:
print(spm.shape)
(6428, 1)
[7]:
ecoli = laos.fetch_ecoli()
print(ecoli.shape)
(409, 1)
[8]:
print(ecoli.head())
                     Ecoli_mpn100
Date_Time
2011-05-25 10:00:00           NaN
2011-05-25 16:40:00        1100.0
2011-05-25 17:06:00        1400.0
2011-05-25 17:10:00           NaN
2011-05-25 17:24:00       14000.0
[9]:
print(ecoli.tail())

# # %%
ecoli_all = laos.fetch_ecoli(features='all')
print(ecoli_all.shape)
                     Ecoli_mpn100
Date_Time
2021-02-25 14:10:00         250.0
2021-03-07 14:10:00         200.0
2021-03-17 15:11:00         290.0
2021-03-27 15:22:00         720.0
2021-04-06 15:05:00         560.0
(409, 3)
[10]:
ecoli_all.head()
[10]:
Ecoli_LL_mpn100 Ecoli_mpn100 Ecoli_UL_mpn100
Date_Time
2011-05-25 10:00:00 NaN NaN NaN
2011-05-25 16:40:00 715.0 1100.0 1550.0
2011-05-25 17:06:00 1000.0 1400.0 1900.0
2011-05-25 17:10:00 NaN NaN NaN
2011-05-25 17:24:00 9000.0 14000.0 22000.0
[11]:
phy_chem = laos.fetch_physiochem('T_deg')
print(phy_chem.shape)
(411, 1)

pysiochemical attributes

[12]:
phy_chem_all = laos.fetch_physiochem(features='all')
print(phy_chem_all.shape)
(411, 8)

rain gauages

[13]:
rg = laos.fetch_rain_gauges()
print(rg.shape)
(6939, 7)

regression

[14]:
df = laos.make_regression()
print(df.shape)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  return wl.loc[st:en], spm.loc[st:en]
(650741, 8)
[15]:
df.head()
[15]:
air_temp rel_hum wind_speed sol_rad water_level pcp susp_pm Ecoli_mpn100
2011-05-25 14:00:00 22.536906 96.167816 7.0 256.0 133.0 0.0 0.9 NaN
2011-05-25 14:06:00 22.537042 96.168046 6.7 246.2 133.0 0.0 0.9 NaN
2011-05-25 14:12:00 22.537179 96.168276 6.4 236.4 133.0 0.0 0.9 NaN
2011-05-25 14:18:00 22.537315 96.168506 6.1 226.6 133.0 0.0 0.9 NaN
2011-05-25 14:24:00 22.537452 96.168736 5.8 216.8 133.0 0.0 0.9 NaN
[16]:
df = laos.make_regression(lookback_steps=30)
print(df.shape)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  return wl.loc[st:en], spm.loc[st:en]
(5948, 8)
[17]:
df.head()
[17]:
air_temp rel_hum wind_speed sol_rad water_level pcp susp_pm Ecoli_mpn100
2011-05-25 14:00:00 22.536906 96.167816 7.0 256.0 133.0 0.0 0.9 NaN
2011-05-25 14:06:00 22.537042 96.168046 6.7 246.2 133.0 0.0 0.9 NaN
2011-05-25 14:12:00 22.537179 96.168276 6.4 236.4 133.0 0.0 0.9 NaN
2011-05-25 14:18:00 22.537315 96.168506 6.1 226.6 133.0 0.0 0.9 NaN
2011-05-25 14:24:00 22.537452 96.168736 5.8 216.8 133.0 0.0 0.9 NaN
[18]:
print(df.isna().sum())
air_temp           0
rel_hum            0
wind_speed         0
sol_rad            0
water_level        0
pcp                0
susp_pm            0
Ecoli_mpn100    5690
dtype: int64
[19]:
eda = EDA(data=df)
eda.plot_data(subplots=True, figsize=(14, 20),
              ignore_datetime_index=True)
../_images/_notebooks_mtropics_laos_24_0.png

classification

[20]:
df = laos.make_classification(lookback_steps=30)
print(df.shape)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:576: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  return wl.loc[st:en], spm.loc[st:en]
(5948, 8)
[21]:
df.head()
[21]:
air_temp rel_hum wind_speed sol_rad water_level pcp susp_pm Ecoli_mpn100
2011-05-25 14:00:00 22.536906 96.167816 7.0 256.0 133.0 0.0 0.9 NaN
2011-05-25 14:06:00 22.537042 96.168046 6.7 246.2 133.0 0.0 0.9 NaN
2011-05-25 14:12:00 22.537179 96.168276 6.4 236.4 133.0 0.0 0.9 NaN
2011-05-25 14:18:00 22.537315 96.168506 6.1 226.6 133.0 0.0 0.9 NaN
2011-05-25 14:24:00 22.537452 96.168736 5.8 216.8 133.0 0.0 0.9 NaN
[22]:
print(df.isna().sum())
air_temp           0
rel_hum            0
wind_speed         0
sol_rad            0
water_level        0
pcp                0
susp_pm            0
Ecoli_mpn100    5690
dtype: int64

ecoli_mekong

[23]:
ecoli = ecoli_mekong()
print(ecoli.shape)

    Not downloading the data since the directory
    D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\ecoli_houay_pano already exists.
    Use overwrite=True to remove previously saved files and download again

    Not downloading the data since the directory
    D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\ecoli_mekong_2016 already exists.
    Use overwrite=True to remove previously saved files and download again

    Not downloading the data since the directory
    D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\ecoli_mekong_loas already exists.
    Use overwrite=True to remove previously saved files and download again
(1602, 10)
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:1216: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  df = df.loc[st:en]
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:1216: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  df = df.loc[st:en]
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\mtropics.py:1216: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  df = df.loc[st:en]
[24]:
print(ecoli.head())
                    station_name      T     EC  DOpercent    DO    pH   ORP  \
Date_Time
2011-05-25 10:00:00   Houay Pano  24.11  273.0       87.5  7.34  7.67  53.5
2011-05-25 16:40:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN
2011-05-25 17:06:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN
2011-05-25 17:10:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN
2011-05-25 17:24:00   Houay Pano    NaN    NaN        NaN   NaN   NaN   NaN

                     Turbidity       TSS  E-coli_4dilutions
Date_Time
2011-05-25 10:00:00      13.47       NaN                NaN
2011-05-25 16:40:00    1380.00  0.899281             1100.0
2011-05-25 17:06:00    1116.00  0.993190             1400.0
2011-05-25 17:10:00    1392.00  1.107880                NaN
2011-05-25 17:24:00     702.00  1.325030            14000.0
[25]:
_ = pie(df.dropna().iloc[:, -1].values.astype(int), explode=(0, 0.05))
../_images/_notebooks_mtropics_laos_32_0.png