Commit c615173b authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Minor fixes

parents 7556e72d a667f561
Pipeline #9739 passed with stages
in 3 minutes and 15 seconds
......@@ -16,16 +16,16 @@ stages:
- ls poetry.lock || poetry lock
- poetry install -v --remove-untracked
create_py36:
create_py38:
stage: "Prepare Environments"
script:
- conda activate gitlab_runner_py36 2>/dev/null || conda create -y --name gitlab_runner_py36 python=3.6
- conda activate gitlab_runner_py38 2>/dev/null || conda create -y --name gitlab_runner_py38 python=3.8
- !reference [.install_deps, script]
create_py38:
create_py39:
stage: "Prepare Environments"
script:
- conda activate gitlab_runner_py38 2>/dev/null || conda create -y --name gitlab_runner_py38 python=3.8
- conda activate gitlab_runner_py39 2>/dev/null || conda create -y --name gitlab_runner_py39 python=3.9
- !reference [.install_deps, script]
......@@ -42,24 +42,27 @@ create_py38:
isort:
stage: lint
needs: ["create_py38"]
script:
- conda activate gitlab_runner_py36
- conda activate gitlab_runner_py38
- isort --check-only .
extends:
- .run_lint_conditions
black:
stage: lint
needs: ["create_py38"]
script:
- conda activate gitlab_runner_py36
- conda activate gitlab_runner_py38
- black --check .
extends:
- .run_lint_conditions
pylint:
stage: lint
needs: ["create_py38"]
script:
- conda activate gitlab_runner_py36
- conda activate gitlab_runner_py38
- pylint netatmoqc
allow_failure: true
extends:
......@@ -67,8 +70,9 @@ pylint:
flake8:
stage: lint
needs: ["create_py38"]
script:
- conda activate gitlab_runner_py36
- conda activate gitlab_runner_py38
# Set FLAKEHELL_CACHE to a random new dir. Cached data was causing issues with pylint configs.
- export FLAKEHELL_CACHE="${TMPDIR:-${TEMP:-${TMP:-/tmp}}}/flakehell_cache.$(openssl rand -hex 12)"
- flake8helled .
......@@ -87,16 +91,18 @@ flake8:
- pyproject.toml
- .gitlab-ci.yml
Unit Tests Python 3.6:
Unit Tests Python 3.8:
stage: test
needs: ["create_py38"]
script:
- conda activate gitlab_runner_py36
- conda activate gitlab_runner_py38
- pytest -v tests
extends: .run_unit_tests_conditions
Unit Tests Python 3.8:
Unit Tests Python 3.9:
stage: test
needs: ["create_py39"]
script:
- conda activate gitlab_runner_py38
- conda activate gitlab_runner_py39
- pytest -v tests
extends: .run_unit_tests_conditions
......@@ -4,7 +4,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: stable
rev: 5d33f20a2a2c85cfb521ae9c5f9254bfe9fc2fd9
hooks:
- id: black
language_version: python3.6
......@@ -15,7 +15,7 @@ repos:
entry: pylint
language: system
types: [python]
- repo: https://github.com/life4/flakehell
rev: fb5b9b4e744af29f85658466b17f31c7048f16b2
- repo: https://github.com/flakehell/flakehell
rev: 91a470fa2adfd0010162ee0c2b54f046c053fef7
hooks:
- id: flakehell
......@@ -210,7 +210,9 @@ def generate_control_card():
value=5,
step=1,
required=True,
style=dict(display="table-cell",),
style=dict(
display="table-cell",
),
),
],
style=dict(display="table-cell"),
......@@ -233,7 +235,9 @@ def generate_control_card():
min=2,
value=5,
step=1,
style=dict(display="table-cell",),
style=dict(
display="table-cell",
),
),
],
style=dict(display="table-cell"),
......@@ -250,7 +254,9 @@ def generate_control_card():
inputMode="numeric",
min=0.0,
value=10.0,
style=dict(display="table-cell",),
style=dict(
display="table-cell",
),
),
],
style=dict(display="table-cell"),
......@@ -391,8 +397,14 @@ def indicator(text, id_value):
children=[
html.Div(
children=[
html.P(id=id_value, className="indicator_value",),
html.P(text, className="indicator_text",),
html.P(
id=id_value,
className="indicator_value",
),
html.P(
text,
className="indicator_text",
),
],
),
],
......@@ -423,7 +435,11 @@ def generate_right_column_elements():
children = [
html.Div(
id="clustering_plot_div",
children=[dcc.Graph(id="clustering_plot",)],
children=[
dcc.Graph(
id="clustering_plot",
)
],
),
html.Div(
id="clustered_data_table_card",
......@@ -438,7 +454,9 @@ def generate_right_column_elements():
sort_action="native",
filter_action="native",
# Styling
style_cell=dict(padding="10px",),
style_cell=dict(
padding="10px",
),
style_header=dict(
backgroundColor="rgb(2,21,70)",
color="white",
......@@ -486,7 +504,10 @@ app.layout = html.Div(
children=[
generate_indicators(),
html.B("Visualisation of Clusters"),
html.Div(id="calculated_dist", children=[],),
html.Div(
id="calculated_dist",
children=[],
),
html.Hr(),
dcc.Loading(
# Embed the the right-hand side column inside a dcc.Loading
......@@ -674,7 +695,9 @@ def run_clustering_and_make_plot(
time_start_clustering = time.time()
logger.info("Running %s...", method)
df = cluster_netatmo_obs(
df=df, config=clustering_config, calc_silhouette_samples=True,
df=df,
config=clustering_config,
calc_silhouette_samples=True,
)
df = sort_df_by_cluster_size(df)
time_end_clustering = time.time()
......
......@@ -47,7 +47,10 @@ def description_card():
children=[
html.H5("NetAtmo Data Explorer"),
html.H3("NetAtmo Data Explorer Dashboard"),
html.Div(id="intro", children="An aid to explore NetAtmo data",),
html.Div(
id="intro",
children="An aid to explore NetAtmo data",
),
],
style={"text-align": "center"},
)
......
......@@ -307,7 +307,9 @@ def run_clustering_on_df(
metric="precomputed",
).fit(distance_matrix)
logger.debug(
" * Done with %s. Elapsed: %.2fs", method, time.time() - tstart,
" * Done with %s. Elapsed: %.2fs",
method,
time.time() - tstart,
)
# Update df with cluster label info. It is important that this is done
# right before calling filter_outliers, as the filter_outliers function
......
......@@ -145,7 +145,10 @@ def _select_stations_single_dtg(dtg, config, args):
df, df_moving = remove_irregular_stations(df)
df = cluster_netatmo_obs(
df=df, config=config, n_jobs=cpu_share, calc_silhouette_samples=False,
df=df,
config=config,
n_jobs=cpu_share,
calc_silhouette_samples=False,
)
selected_cols = ["id", "lat", "lon", "alt"]
......@@ -446,6 +449,9 @@ def csv2obsoul(args):
Args:
args (argparse.Namespace): Parsed command line arguments.
Raises:
NotImplementedError: If args.selected_stations_fpath is not a CSV.
"""
config = read_config(args.config_file)
......@@ -581,7 +587,11 @@ def show(args):
fig = domain.get_fig(display_grid_max_gsize=20000.0)
fig.update_layout(
legend=dict(
orientation="h", xanchor="center", x=0.5, yanchor="top", y=0.0,
orientation="h",
xanchor="center",
x=0.5,
yanchor="top",
y=0.0,
)
)
fig.show(config=DEF_FIGSHOW_CONFIG)
......
......@@ -357,7 +357,9 @@ def parsed_path(path):
with config_section("general") as section:
# in/our dirs
config_metadata.register(
"data_rootdir", default=".", astype=parsed_path,
"data_rootdir",
default=".",
astype=parsed_path,
)
config_metadata.register(
"outdir",
......
......@@ -420,7 +420,10 @@ class Domain:
)
# (d) Set _grid attr
return DomainGrid(
xaxis=grid_xaxis, yaxis=grid_yaxis, proj=proj, tstep=tstep,
xaxis=grid_xaxis,
yaxis=grid_yaxis,
proj=proj,
tstep=tstep,
)
self._grid = init_grid(ngrid_lonlat, grid_spacing, ezone_ngrid)
......
......@@ -588,7 +588,9 @@ class HollowSymmetricMatrix(np.lib.mixins.NDArrayOperatorsMixin):
new_data = self[indices[:, np.newaxis], indices]
return self.__class__(
new_data, dtype=self.dtype, optimize_mode=self.optimize_mode,
new_data,
dtype=self.dtype,
optimize_mode=self.optimize_mode,
)
def convert_to_dense_storage(self):
......
......@@ -354,7 +354,11 @@ def rm_overlapping_stations(df):
overlapping_stations = (
df[["id", "lat", "lon"]]
.round(6)
.groupby(["lat", "lon"], as_index=False, sort=False,)
.groupby(
["lat", "lon"],
as_index=False,
sort=False,
)
.filter(lambda grp: len(grp["id"].unique()) != 1)["id"]
.unique()
)
......@@ -397,7 +401,11 @@ def remove_duplicates_within_cycle(df, dtg):
msg += "Keeping only the one closest to the DTG: "
msg += "%s obs now became %s"
logger.debug(
msg, n_stations_with_duplicates, dtg, orig_nobs, new_nobs,
msg,
n_stations_with_duplicates,
dtg,
orig_nobs,
new_nobs,
)
return df
......
......@@ -182,7 +182,10 @@ def _filter_outliers_iterative(
# We use "-2" as a "removed by refining methods" flag
n_removed_old = np.count_nonzero(df[:, -1] == -2)
df = _filter_outliers_iterative_one_iter(
df, max_n_stdev_around_mean, truncate=trunc_perc, weights=weights,
df,
max_n_stdev_around_mean,
truncate=trunc_perc,
weights=weights,
)
n_removed_new = np.count_nonzero(df[:, -1] == -2)
n_removed_this_iter = n_removed_new - n_removed_old
......
......@@ -238,8 +238,16 @@ def get_domain_fig(
traceorder="reversed",
),
geo=dict(
lataxis=dict(range=latrange, showgrid=True, dtick=10,),
lonaxis=dict(range=lonrange, showgrid=True, dtick=15,),
lataxis=dict(
range=latrange,
showgrid=True,
dtick=10,
),
lonaxis=dict(
range=lonrange,
showgrid=True,
dtick=15,
),
),
)
......@@ -436,7 +444,10 @@ def make_clustering_fig(df, domain, **kwargs):
trace_visible = True
trace = get_obs_scattergeo_trace(
cluster_df, trace_name=label, marker=marker, visible=trace_visible,
cluster_df,
trace_name=label,
marker=marker,
visible=trace_visible,
)
fig.add_trace(trace)
......@@ -526,7 +537,9 @@ def generate_single_frame(df, dataset_var, frame_duration, frame=None):
opacity=0.5,
line=dict(color="black", width=0.25),
colorbar=dict(
titleside="right", ticks="outside", showticksuffix="last",
titleside="right",
ticks="outside",
showticksuffix="last",
),
)
trace = get_obs_scattergeo_trace(df, marker=marker)
......@@ -581,7 +594,10 @@ def init_fig_dict(domain, dataset_var, frame_duration):
args=[
None,
dict(
frame=dict(duration=frame_duration, redraw=True,),
frame=dict(
duration=frame_duration,
redraw=True,
),
fromcurrent=True,
transition=dict(
duration=frame_duration / 2,
......@@ -615,9 +631,15 @@ def init_fig_dict(domain, dataset_var, frame_duration):
y=0,
pad=dict(b=10, t=50),
currentvalue=dict(
font=dict(size=20), prefix="DTG: ", visible=True, xanchor="right",
font=dict(size=20),
prefix="DTG: ",
visible=True,
xanchor="right",
),
transition=dict(
duration=frame_duration / 2,
easing="cubic-in-out",
),
transition=dict(duration=frame_duration / 2, easing="cubic-in-out",),
steps=[],
)
......
......@@ -284,7 +284,10 @@ def _input2output_single_dtg(
logger = get_logger(__name__, loglevel)
logger.debug(
"Reading data for %sDTG=%s%s...", logcolor.cyan, dtg, logcolor.reset,
"Reading data for %sDTG=%s%s...",
logcolor.cyan,
dtg,
logcolor.reset,
)
try:
# read_netatmo_data_for_dtg will raise DataNotFoundError if
......@@ -385,7 +388,9 @@ def netatmoqc_input2output(
outdir = Path()
logger.info(
"%sSaving selected observations...%s", logcolor.cyan, logcolor.reset,
"%sSaving selected observations...%s",
logcolor.cyan,
logcolor.reset,
)
outdir_csv = None
......@@ -393,7 +398,10 @@ def netatmoqc_input2output(
if save_csv:
outdir_csv = Path(outdir) / "csv_files"
logger.info(
"%s> CSV outdir:%s %s", logcolor.cyan, logcolor.reset, outdir_csv,
"%s> CSV outdir:%s %s",
logcolor.cyan,
logcolor.reset,
outdir_csv,
)
if save_obsoul:
outdir_obsoul = Path(outdir) / "obsoul_files"
......
[tool.poetry]
name = "netatmoqc"
version = "0.3.8"
version = "0.3.9"
description = "Use machine learning clustering methods to perform quality control over NetAtmo data"
authors = [
"Paulo V. C. Medeiros <paulo.medeiros@smhi.se>"
......@@ -21,49 +21,49 @@
]
[tool.poetry.dependencies]
attrs = "^20.2.0"
python = "^3.8.8,<3.10"
attrs = "^21.2.0"
dash = "^1.13.4"
dash-daq = "^0.5.0"
dotmap = "^1.3.17"
flask-caching = "^1.9.0"
hdbscan = "^0.8.26"
humanize = "^2.6.0"
importlib-metadata = "^1.7.0"
humanize = "^3.7.0"
importlib-metadata = "^4.5.0"
joblib = "^1.0.0"
llvmlite = "0.33.^0.dev0" # numba 0.50.1 requires llvmlite <0.34,>=0.33.0.dev0
llvmlite = "0.36.0"
mpi4py = {version="^3.0.3", optional=true}
numba = "^0.50.1"
numba = "^0.53.1"
numpy = "^1.19.0"
pandas = "^1.0.5"
pandas = "^1.2.4"
plotly = "^4.8.2"
psutil = "^5.7.0"
pyproj = "^2.6.1"
python = "^3.6.10"
pytz = "^2020.1"
pyproj = "^3.1.0"
pytz = "^2021.1"
redis = "^3.5.3"
scikit-learn = "^0.23.1"
scikit-learn = "^0.24.2"
# tbb>=2019.5 is needed by numba when using parallel=True in jit
# This version, however, is not available for sys_platform==darwin (on 2020-08-25)
tbb = { version = "^2020.0.133", markers = "sys_platform != 'darwin'" }
tbb = { version = "^2021.2.0", markers = "sys_platform != 'darwin'" }
toml = "^0.10.1"
[tool.poetry.extras]
mpi = ["mpi4py"]
[tool.poetry.dev-dependencies]
black = "^19.10b0"
flakehell = "^0.6.1"
black = "^21.5b2"
flakehell = "^0.9.0"
isort = "^5.6.4"
pre-commit = "^2.6.0"
pyment = "0.3.3"
pylint = "^2.6.0"
pylint = "^2.8.3"
pytest = "^6.0.0"
pytest-timeout = "^1.4.1"
tox = "^3.19.0"
# flake8 plugins
darglint = "^1.5.5"
flake8-bandit = "^2.1.2"
flake8-bugbear = "^20.1.4"
flake8-bugbear = "^21.4.3"
flake8-builtins = "^1.5.3"
flake8-comprehensions = "^3.2.3"
flake8-docstrings = "^1.5.0"
......@@ -117,12 +117,13 @@
[tool.flakehell]
base = ".flakehell.toml"
extended_default_ignore = [] # See <https://github.com/flakehell/flakehell/issues/10>
[tool.tox]
legacy_tox_ini = """
[tox]
isolated_build = True
envlist = py3{6,8}
envlist = py3{8,9}
skip_missing_interpreters = true
[testenv]
......
......@@ -138,6 +138,7 @@ class TestParsedConfig:
if __name__ == "__main__":
logging.basicConfig(
level=logging.WARNING, format="%(levelname)-s: %(message)s",
level=logging.WARNING,
format="%(levelname)-s: %(message)s",
)
pytest.main()
......@@ -272,6 +272,7 @@ class TestDtgContainer:
if __name__ == "__main__":
logging.basicConfig(
level=logging.ERROR, format="%(levelname)-s: %(message)s",
level=logging.ERROR,
format="%(levelname)-s: %(message)s",
)
pytest.main()
......@@ -83,6 +83,7 @@ class TestsCustomMetrics:
if __name__ == "__main__":
logging.basicConfig(
level=logging.DEBUG, format="%(levelname)-s: %(message)s",
level=logging.DEBUG,
format="%(levelname)-s: %(message)s",
)
pytest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment