Commit 7530577b authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Add "thin" command

parent abe4342f
......@@ -12,6 +12,7 @@ from .commands_functions import (
csv2obsoul,
select_stations,
show,
thin_data_from_csv_files,
)
......@@ -196,6 +197,21 @@ def get_parsed_args(program_name):
parser_csv2obsoul.set_defaults(func=csv2obsoul)
###########################################
# Configure parser for the "thin" command #
###########################################
parser_thin = subparsers.add_parser(
"thin", help="Thin data from input file(s) using domain's grid.",
)
parser_thin.add_argument(
"paths",
nargs="*",
type=Path,
default=list(Path(".").glob("*.csv")),
help="Path(s) to input CSVs containing at least (lon, lat) data.",
)
parser_thin.set_defaults(func=thin_data_from_csv_files)
###########################################
# Configure parser for the "apps" command #
###########################################
......
......@@ -36,7 +36,7 @@ from .plots import (
make_clustering_fig,
show_cmd_get_fig_from_dataframes,
)
from .save_data import netatmoqc_input2output
from .save_data import netatmoqc_input2output, save_df_as_netatmo_csv
logger = logging.getLogger(__name__)
......@@ -501,6 +501,65 @@ def csv2obsoul(args):
)
######################################
# Code related to the "thin" command #
######################################
def thin_data_from_csv_files(args):
"""Implement the 'thin' command.
Args:
args (argparse.Namespace): Parsed command line arguments.
"""
logger = get_logger(__name__, args.loglevel)
config = read_config(args.config_file)
domain = Domain.construct_from_dict(config.domain)
outdir_prefix = config.general.outdir / "{}_netatmoqc_thin".format(
datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
)
# Parse input paths. Keep file paths as they are, and find csv files
# recursively for paths that are directories.
file_list = []
for path in args.paths:
if path.is_dir():
file_list += list(path.rglob("*.csv"))
else:
file_list.append(path)
for fpath in file_list:
if fpath.suffix != ".csv":
logger.warning(
"Only csv files supported. Skipping file '%s'", fpath
)
continue
logger.info("Parsing data from file %s", fpath)
logger.debug("Read data from file %s", fpath)
try:
df = pd.read_csv(fpath)
except FileNotFoundError:
logger.warning("File %s not found.", fpath)
continue
except pd.errors.EmptyDataError:
logger.warning('No data in file "%s"', fpath)
continue
logger.debug("Thin data from file %s", fpath)
df = domain.trim_obs(df)
df = domain.thinning_grid.thin_obs(df, method="nearest")
# Save results
outdir = outdir_prefix / fpath.parent.relative_to(fpath.anchor)
outdir.mkdir(parents=True, exist_ok=True)
out_fpath = outdir / fpath.name
logger.info("Saving thinned data to file %s\n", out_fpath)
save_df_as_netatmo_csv(df, out_fpath, overwrite=True)
######################################
# Code related to the "show" command #
######################################
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment