Skip to content

Commit

Permalink
Add missing EPFL readers (#155)
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi authored Jan 26, 2023
1 parent a7db617 commit a967207
Show file tree
Hide file tree
Showing 7 changed files with 432 additions and 15 deletions.
4 changes: 4 additions & 0 deletions disdrodb/L0/L0A_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,10 @@ def read_L0A_raw_file_list(
if df_sanitizer_fun is not None:
df = df_sanitizer_fun(df, lazy=lazy)

# Remove duplicated timesteps
# - TODO: Log info !!!
df = df.drop_duplicates(subset="time", keep="first")

# ------------------------------------------------------.
# Check column names met DISDRODB standards
check_L0A_column_names(df, sensor_name=sensor_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------.
"""Reader for HYMEX campaign."""
"""Reader for HYMEX SOP2 campaign."""
from disdrodb.L0 import run_L0
from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by

Expand Down Expand Up @@ -101,6 +101,10 @@ def df_sanitizer_fun(df, lazy=False):
# - Convert time column to datetime
df["time"] = dd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S")

# - Drop rows when "raw_drop_number" is "NA"
# --> This is used to drop all rows where all values are "NA"
df = df.dropna(subset="raw_drop_number", axis=0)

# - Drop columns not agreeing with DISDRODB L0 standards
columns_to_drop = [
"datalogger_debug",
Expand Down
141 changes: 141 additions & 0 deletions disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------.
# Copyright (c) 2021-2022 DISDRODB developers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------.
"""Reader for HYMEX SOP3 campaign."""
from disdrodb.L0 import run_L0
from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by


@is_documented_by(reader_generic_docstring)
def reader(
raw_dir,
processed_dir,
l0a_processing=True,
l0b_processing=True,
keep_l0a=False,
force=False,
verbose=False,
debugging_mode=False,
lazy=True,
single_netcdf=True,
):

##------------------------------------------------------------------------.
#### - Define column names
# - When no data are logged (every 30 seconds), all columns (except time) have "NA" values
column_names = [
"time",
"id",
"datalogger_temperature",
"datalogger_voltage",
"rainfall_rate_32bit",
"rainfall_accumulated_32bit",
"weather_code_synop_4680",
"weather_code_synop_4677",
"reflectivity_32bit",
"mor_visibility",
"laser_amplitude",
"number_particles",
"sensor_temperature",
"sensor_heating_current",
"sensor_battery_voltage",
"sensor_status",
"rainfall_amount_absolute_32bit",
"datalogger_debug",
"raw_drop_concentration",
"raw_drop_average_velocity",
"raw_drop_number",
"datalogger_error",
]

##------------------------------------------------------------------------.
#### - Define reader options
reader_kwargs = {}
# - Define delimiter
reader_kwargs["delimiter"] = ","
# - Avoid first column to become df index !!!
reader_kwargs["index_col"] = False
# - Define behaviour when encountering bad lines
reader_kwargs["on_bad_lines"] = "skip"
# - Define reader engine
# - C engine is faster
# - Python engine is more feature-complete
reader_kwargs["engine"] = "python"
# - Define on-the-fly decompression of on-disk data
# - Available: gzip, bz2, zip
reader_kwargs["compression"] = "infer"
# - Strings to recognize as NA/NaN and replace with standard NA flags
# - Already included: ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’,
# ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘<NA>’, ‘N/A’,
# ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’
reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"]
# - Define max size of dask dataframe chunks (if lazy=True)
# - If None: use a single block for each file
# - Otherwise: "<max_file_size>MB" by which to cut up larger files
reader_kwargs["blocksize"] = None # "50MB"

##------------------------------------------------------------------------.
#### - Define dataframe sanitizer function for L0 processing
def df_sanitizer_fun(df, lazy=False):
# Import dask or pandas
if lazy:
import dask.dataframe as dd
else:
import pandas as dd

# - Convert time column to datetime
df["time"] = dd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S")

# - Drop rows when "raw_drop_number" is "NA"
# --> This is used to drop all rows where all values are "NA"
df = df.dropna(subset="raw_drop_number", axis=0)

# - Drop columns not agreeing with DISDRODB L0 standards
columns_to_drop = [
"datalogger_debug",
"datalogger_voltage",
"id",
"datalogger_temperature",
"datalogger_error",
]
df = df.drop(columns=columns_to_drop)
return df

##------------------------------------------------------------------------.
#### - Define glob pattern to search data files in <raw_dir>/data/<station_id>
files_glob_pattern = "*.dat*"

####----------------------------------------------------------------------.
#### - Create L0 products
run_L0(
raw_dir=raw_dir,
processed_dir=processed_dir,
l0a_processing=l0a_processing,
l0b_processing=l0b_processing,
keep_l0a=keep_l0a,
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
lazy=lazy,
single_netcdf=single_netcdf,
# Custom arguments of the reader
files_glob_pattern=files_glob_pattern,
column_names=column_names,
reader_kwargs=reader_kwargs,
df_sanitizer_fun=df_sanitizer_fun,
)
141 changes: 141 additions & 0 deletions disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------.
# Copyright (c) 2021-2022 DISDRODB developers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------.
"""Reader for HYMEX SOP4 campaign."""
from disdrodb.L0 import run_L0
from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by


@is_documented_by(reader_generic_docstring)
def reader(
raw_dir,
processed_dir,
l0a_processing=True,
l0b_processing=True,
keep_l0a=False,
force=False,
verbose=False,
debugging_mode=False,
lazy=True,
single_netcdf=True,
):

##------------------------------------------------------------------------.
#### - Define column names
# - When no data are logged (every 30 seconds), all columns (except time) have "NA" values
column_names = [
"time",
"id",
"datalogger_temperature",
"datalogger_voltage",
"rainfall_rate_32bit",
"rainfall_accumulated_32bit",
"weather_code_synop_4680",
"weather_code_synop_4677",
"reflectivity_32bit",
"mor_visibility",
"laser_amplitude",
"number_particles",
"sensor_temperature",
"sensor_heating_current",
"sensor_battery_voltage",
"sensor_status",
"rainfall_amount_absolute_32bit",
"datalogger_debug",
"raw_drop_concentration",
"raw_drop_average_velocity",
"raw_drop_number",
"datalogger_error",
]

##------------------------------------------------------------------------.
#### - Define reader options
reader_kwargs = {}
# - Define delimiter
reader_kwargs["delimiter"] = ","
# - Avoid first column to become df index !!!
reader_kwargs["index_col"] = False
# - Define behaviour when encountering bad lines
reader_kwargs["on_bad_lines"] = "skip"
# - Define reader engine
# - C engine is faster
# - Python engine is more feature-complete
reader_kwargs["engine"] = "python"
# - Define on-the-fly decompression of on-disk data
# - Available: gzip, bz2, zip
reader_kwargs["compression"] = "infer"
# - Strings to recognize as NA/NaN and replace with standard NA flags
# - Already included: ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’,
# ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘<NA>’, ‘N/A’,
# ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’
reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"]
# - Define max size of dask dataframe chunks (if lazy=True)
# - If None: use a single block for each file
# - Otherwise: "<max_file_size>MB" by which to cut up larger files
reader_kwargs["blocksize"] = None # "50MB"

##------------------------------------------------------------------------.
#### - Define dataframe sanitizer function for L0 processing
def df_sanitizer_fun(df, lazy=False):
# Import dask or pandas
if lazy:
import dask.dataframe as dd
else:
import pandas as dd

# - Convert time column to datetime
df["time"] = dd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S")

# - Drop rows when "raw_drop_number" is "NA"
# --> This is used to drop all rows where all values are "NA"
df = df.dropna(subset="raw_drop_number", axis=0)

# - Drop columns not agreeing with DISDRODB L0 standards
columns_to_drop = [
"datalogger_debug",
"datalogger_voltage",
"id",
"datalogger_temperature",
"datalogger_error",
]
df = df.drop(columns=columns_to_drop)
return df

##------------------------------------------------------------------------.
#### - Define glob pattern to search data files in <raw_dir>/data/<station_id>
files_glob_pattern = "*.dat*"

####----------------------------------------------------------------------.
#### - Create L0 products
run_L0(
raw_dir=raw_dir,
processed_dir=processed_dir,
l0a_processing=l0a_processing,
l0b_processing=l0b_processing,
keep_l0a=keep_l0a,
force=force,
verbose=verbose,
debugging_mode=debugging_mode,
lazy=lazy,
single_netcdf=single_netcdf,
# Custom arguments of the reader
files_glob_pattern=files_glob_pattern,
column_names=column_names,
reader_kwargs=reader_kwargs,
df_sanitizer_fun=df_sanitizer_fun,
)
Loading

0 comments on commit a967207

Please sign in to comment.