import glob
import pandas as pd
import numpy as np
def make_date(x):
"""
Makes a datetime object out of the Date and Time columns
"""
return pd.to_datetime(x['Date'] + " " + x['Time'], format="%Y/%m/%d %I:%M %p")
def compute_precip(x):
"""
Returns for each entry the amount of precipitation that has accumulated
in the previous five minutes, inserting NA for any entry for which either:
- the difference in accumulated precipitation is negative, or
- the previous entry was not five minutes ago.
"""
dt = x["Date"].diff().dt.seconds
dp = np.maximum(0, x['Precip_Accum_mm'].diff()).mask(dt != 300, pd.NA)
return dp
def read_weather_files(ddir):
"""
Reads in all CSV files in the directory `ddir`, and returns a concatenated
data frame. For each file, assumes that file names are of the form
"something_CODE.csv"; and inserts "CODE into the "code" column of the result
for that file.
"""
wfiles = glob.glob(ddir + "/" + "*.csv")
assert len(wfiles) > 0, "No files found."
xl = []
for f in wfiles:
x = pd.read_csv(f).convert_dtypes()
x['Date'] = make_date(x)
x['code'] = f.split("/")[-1].split("_")[0] ## change "/" to "\\" on windows
x['Precip_Amount_mm'] = compute_precip(x)
xl.append(x)
return pd.concat(xl)