Commit 0a70c72d authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Lint code with "isort", "flake8(helled)" and pylint

This is a merge commit.
The previous commit in the branch was 22e5a889
parents 22e5a889 d700b6c8
Pipeline #9234 passed with stages
in 1 minute and 15 seconds
[tool.flakehell]
exclude = [".*/", "tmp/", "*/tmp/"]
# Group output by file. Colored.
format = "grouped"
# Show line of source code in output, with syntax highlighting
show_source = true
# flake8-darglint params
docstring_style = "google"
strictness = "short"
# list of plugins and rules for them
[tool.flakehell.plugins]
# Remove from flake8-bandit:
# "S403": Consider possible security implications associated with pickle
# "S404": Consider possible security implications associated with subprocess
# "S603": To allow using subprocess.call/run
# "S606": To allow using os.startfile
flake8-bandit = ["+*", "-S403", "-S404", "-S603", "-S606"]
flake8-bugbear = ["+*"]
flake8-builtins = ["+*"]
# Remove C408 from flake8-comprehensions because I think sometimes the "dict" syntax
# looks cleaner than literal "{}". Dict creation performance is not an issue here.
flake8-comprehensions = ["+*", "-C408"]
flake8-darglint = ["+*"]
flake8-docstrings = ["+*", "-D105"] # Remove "D105: Missing docstring in magic method"
flake8-eradicate = ["+*"]
flake8-logging-format = ["+*"]
flake8-mutable = ["+*"]
flake8-pytest-style = ["+*"]
mccabe = ["+*"]
pep8-naming = ["+*"]
# Exclude some errors from pycodestyle for compatibility with black.
# "E501" is for max_line_length violations. Leave this for black to handle.
# For the other excluded errors, see:
# <https://black.readthedocs.io/en/stable/the_black_code_style.html#slices>
# <https://black.readthedocs.io/en/stable/the_black_code_style.html#line-breaks-binary-operators>
pycodestyle = ["+*", "-W503", "-E203", "-E501"]
pyflakes = ["+*"]
# Disable pylint plugin at the moment. pylint will be run separately.
pylint = ["-*"]
[tool.flakehell.exceptions."*/wsgi.py"]
# Ignore "F401 (imported but unused)" in this case
pyflakes = ["-F401"]
# W0611: Unused import
pylint = ["-W0611"]
[tool.flakehell.exceptions."tests/*.py"]
# Disable some flake8-bandit checks in tests:
# "S101": To allow assert use
# "S301": To Allow testing pickle/unpickle
flake8-bandit = ["-S101", "-S301"]
# Ignore "-D105" and "-D102" (Missing docstring in public class/method) in unit tests.
# The unit tests class and method names are supposed to be self-explanatory.
flake8-docstrings = ["-D105", "-D101", "-D102"]
# C0102: Black listed name. We want to use "foo", "bar", etc in the tests.
# C0103: Method name doesn't conform to snake_case naming style
# C0115: Missing class docstring
# C0116: Missing function or method docstring
# R0201: Method could be a function
# R0903: Too few public methods
# W0621: Redefining name from outer scope (false positive for fixtures)
# W0212: Access to a protected member _data of a client class
pylint = ["-C0102", "-C0103", "-C0115", "-C0116", "-R0201", "-R0903", "-W0621", "-W0212"]
stages:
- "Prepare Environments"
- lint
- test
before_script:
- curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3
- source $HOME/.poetry/env
- pip install black
Python Code Lint:
########################
# Prepare environments #
########################
# Template code for dependency install
.install_deps:
script:
- curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3
- source $HOME/.poetry/env
- ls poetry.lock || poetry lock
- poetry install -v --remove-untracked
create_py36:
stage: "Prepare Environments"
script:
- conda activate gitlab_runner_py36 2>/dev/null || conda create -y --name gitlab_runner_py36 python=3.6
extends:
- .install_deps
create_py38:
stage: "Prepare Environments"
script:
- conda activate gitlab_runner_py38 2>/dev/null || conda create -y --name gitlab_runner_py38 python=3.8
extends:
- .install_deps
##################################
# Configs for code linting stage #
##################################
.run_lint_conditions:
only:
changes:
- "*.py"
- pyproject.toml
- .gitlab-ci.yml
isort:
stage: lint
script:
- conda activate gitlab_runner_py36
- isort --check-only .
extends:
- .run_lint_conditions
black:
stage: lint
script:
- conda activate gitlab_runner_py36
- black --check .
extends:
- .run_lint_conditions
pylint:
stage: lint
script:
- conda activate gitlab_runner_py36
- pylint netatmoqc
allow_failure: true
extends:
- .run_lint_conditions
flake8:
stage: lint
script:
- conda activate gitlab_runner_py36
# Set FLAKEHELL_CACHE to a random new dir. Cached data was causing issues with pylint configs.
- export FLAKEHELL_CACHE="${TMPDIR:-${TEMP:-${TMP:-/tmp}}}/flakehell_cache.$(openssl rand -hex 12)"
- flake8helled .
extends:
- .run_lint_conditions
##############################
# Configs for unit test runs #
##############################
.run_unit_tests_conditions:
only:
changes:
- "*.py"
- pyproject.toml
- .gitlab-ci.yml
Unit Tests Python 3.6:
stage: test
script:
- conda activate gitlab_runner_py36 2>/dev/null || conda create -y --name gitlab_runner_py36 python=3.6 && conda activate gitlab_runner_py36
- rm -f poetry.lock
- poetry install -v
- conda activate gitlab_runner_py36
- pytest -v tests
extends: .run_unit_tests_conditions
Unit Tests Python 3.8:
stage: test
script:
- conda activate gitlab_runner_py38 2>/dev/null || conda create -y --name gitlab_runner_py38 python=3.8 && conda activate gitlab_runner_py38
- rm -f poetry.lock
- poetry install -v
- conda activate gitlab_runner_py38
- pytest -v tests
extends: .run_unit_tests_conditions
repos:
- repo: https://github.com/PyCQA/isort
rev: 718fe45817628f8033b2b47aa9ce5a2d8c890ca7
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
language_version: python3.6
- repo: local
hooks:
- id: pylint
name: pylint
entry: pylint
language: system
types: [python]
- repo: https://github.com/life4/flakehell
rev: fb5b9b4e744af29f85658466b17f31c7048f16b2
hooks:
- id: flakehell
......@@ -139,10 +139,11 @@ the repo, please also run the following:
pre-commit install
This sets up the git hook scripts defined in the
[.pre-commit-config.yaml](.pre-commit-config.yaml) file and only needs to be
done once within the repo's directory. The [pre-commit](https://pre-commit.com)
package is installed when you run any of the `poetry install` commands listed
above.
[.pre-commit-config.yaml](.pre-commit-config.yaml) file and only needs to be run
(i) before the first commit, and (ii) after having modified the
[.pre-commit-config.yaml](.pre-commit-config.yaml) file. The
[pre-commit](https://pre-commit.com) package is installed when you run any of
the `poetry install` commands listed above.
### After Installation: Configuration File
......
......@@ -125,7 +125,7 @@
# The weights will multiply the normalised values of the observation diffs
# in the calculation of the pairwise distance matrix. If weight<=0, then
# the corresponding property will be ignored altogether. If a weight is not
# explicitely set, then it will be set to 1.
# explicitly set, then it will be set to 1.
#
temperature = 5.0
[clustering_method.hdbscan.outlier_removal]
......
#!/usr/bin/env python3
"""Common definitions."""
try:
# From python3.8
from importlib.metadata import version
except (ModuleNotFoundError):
except ModuleNotFoundError:
# Prior to python3.8
from importlib_metadata import version
try:
__version__ = version(__name__)
except (PackageNotFoundError, ModuleNotFoundError):
except ModuleNotFoundError:
__version__ = "?"
#!/usr/bin/env python3
"""Implement the interactive clustering app."""
import logging
import os
import tempfile
import time
from datetime import datetime
import dash
import dash_core_components as dcc
from dash.dependencies import Input, Output, State
import dash_html_components as html
import dash_table
from datetime import datetime
from flask_caching import Cache
import logging
import numpy as np
import pandas as pd
import tempfile
import time
import os
import redis
from dash.dependencies import Input, Output, State
from flask_caching import Cache
from server import server
from netatmoqc.clustering import cluster_netatmo_obs
from netatmoqc.clustering import cluster_netatmo_obs, sort_df_by_cluster_size
from netatmoqc.config_parser import (
ParsedConfig,
read_config,
UndefinedConfigValue,
read_config,
)
from netatmoqc.domains import Domain
from netatmoqc.dtgs import Dtg
from netatmoqc.load_data import read_netatmo_data_for_dtg
from netatmoqc.load_data import (
read_netatmo_data_for_dtg,
remove_irregular_stations,
)
from netatmoqc.logs import CustomFormatter
from netatmoqc.metrics import haversine_distance
from netatmoqc.plots import make_clustering_fig
......@@ -44,7 +50,6 @@ app = dash.Dash(
{"name": "viewport", "content": "width=device-width, initial-scale=1"}
],
)
# app.config.suppress_callback_exceptions = True
# Fix duplicate log items
# TODO: Find a better way to do this without affecting Flask's own logging
......@@ -62,7 +67,7 @@ try:
logger.info("Using redis caching")
redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379")
CACHE_CONFIG = {"CACHE_TYPE": "redis", "CACHE_REDIS_URL": redis_url}
except (redis.exceptions.ConnectionError):
except redis.exceptions.ConnectionError:
cache_dir = tempfile.TemporaryDirectory(prefix="netatmo_clustering_tmp")
logger.info("Caching via redis unavailable. Caching to %s", cache_dir.name)
# See <https://stackoverflow.com/questions/12868222/
......@@ -78,9 +83,7 @@ cache.clear()
def description_card():
"""
:return: A Div containing dashboard title & descriptions.
"""
"""Return a Div containing dashboard title & descriptions."""
return html.Div(
id="description-card",
children=[
......@@ -96,9 +99,10 @@ def description_card():
def generate_obs_weights_panel():
def obs_weight_cell(
var_name, default=1.0, minval=0.0, maxval=float("Inf")
):
"""Generate the observation weights panel."""
def obs_weight_cell(var_name, default=1.0, minval=0.0, maxval=np.inf):
"""Return a div for an obs weight cell."""
# Get defaults from config file if defined. Use the ones
# defined in the calls to this function otherwise.
try:
......@@ -178,9 +182,7 @@ allowed_outlier_rm_method = ["Iterative", "GLOSH", "LOF", "Reclustering"]
def generate_control_card():
"""
:return: A Div containing controls for graphs.
"""
"""Return a Div containing controls for graphs."""
return html.Div(
id="control-card",
children=[
......@@ -208,14 +210,9 @@ def generate_control_card():
value=5,
step=1,
required=True,
style=dict(
display="table-cell",
# width='30%',
# verticalAlign="top",
),
style=dict(display="table-cell",),
),
],
# style=dict(display='inline-block', width='30%'),
style=dict(display="table-cell"),
),
# The min_cluster_size input should not show when using dbscan.
......@@ -236,11 +233,7 @@ def generate_control_card():
min=2,
value=5,
step=1,
style=dict(
display="table-cell",
# width='30%',
# verticalAlign="top",
),
style=dict(display="table-cell",),
),
],
style=dict(display="table-cell"),
......@@ -257,11 +250,7 @@ def generate_control_card():
inputMode="numeric",
min=0.0,
value=10.0,
style=dict(
display="table-cell",
# width='30%',
# verticalAlign="top",
),
style=dict(display="table-cell",),
),
],
style=dict(display="table-cell"),
......@@ -395,6 +384,7 @@ def generate_control_card():
# returns top indicator div
def indicator(text, id_value):
"""Return the html.Div for one app indicator."""
# Adapted from Dash gallery's app "dash-salesforce-crm"
return html.Div(
id="{}_div".format(id_value),
......@@ -412,6 +402,7 @@ def indicator(text, id_value):
def generate_indicators():
"""Generate the indicators used in the app."""
indicators = html.Div(
id="indicators_div",
children=[
......@@ -428,10 +419,11 @@ def generate_indicators():
def generate_right_column_elements():
"""Generate the elements of the right-hand side column."""
children = [
html.Div(
id="clustering_plot_div",
children=[dcc.Graph(id="clustering_plot",),],
children=[dcc.Graph(id="clustering_plot",)],
),
html.Div(
id="clustered_data_table_card",
......@@ -444,7 +436,6 @@ def generate_right_column_elements():
columns=[],
export_columns="all",
sort_action="native",
# sort_mode='multi',
filter_action="native",
# Styling
style_cell=dict(padding="10px",),
......@@ -453,11 +444,9 @@ def generate_right_column_elements():
color="white",
textAlign="center",
),
# Control table vertical scrolling
# page_size=10,
# Control table scrolling
# Don't use fixed_rows right now. It causes formatting
# issues at the moment (tested with dash v1.11 and v1.12)
# fixed_rows=dict(headers=True, data=0),
style_table=dict(
maxHeight="300px",
overflowY="scroll",
......@@ -477,7 +466,7 @@ app.layout = html.Div(
html.Div(
id="left-column",
className="three columns",
children=[description_card(), generate_control_card(),]
children=[description_card(), generate_control_card()]
+ [
html.Div(
["initial child"],
......@@ -497,11 +486,7 @@ app.layout = html.Div(
children=[
generate_indicators(),
html.B("Visualisation of Clusters"),
html.Div(
id="calculated_dist",
children=[],
# style=dict(display='inline'),
),
html.Div(id="calculated_dist", children=[],),
html.Hr(),
dcc.Loading(
# Embed the the right-hand side column inside a dcc.Loading
......@@ -532,8 +517,10 @@ app.layout = html.Div(
# and for all time.
@cache.memoize()
def read_data_df(str_date, cycle):
"""Read data for DTG composed by str_date and cycle."""
dtg = Dtg(datetime.strptime(str_date, "%Y-%m-%d").replace(hour=cycle))
df = read_netatmo_data_for_dtg(dtg, rootdir=config.general.data_rootdir)
df, _ = remove_irregular_stations(df)
return df
......@@ -548,6 +535,7 @@ def read_data_df(str_date, cycle):
[Input(component_id="method-select", component_property="value")],
)
def show_hide_depending_on_method(method):
"""Show/hide menus depending on clustering method."""
if method == "dbscan":
eps_div_style = {"display": "table-cell"}
min_cluster_size_div_style = {"display": "none"}
......@@ -565,13 +553,13 @@ def show_hide_depending_on_method(method):
component_property="style",
),
],
[Input(component_id="outlier_rm_method", component_property="value"),],
[Input(component_id="outlier_rm_method", component_property="value")],
)
def show_hide_max_num_refining_iter(outlier_rm_method):
"""Show/hide max_num_refining_iter menu according to outlier_rm_method."""
if outlier_rm_method == "iterative":
return [{"display": "table-row"}]
else:
return [{"display": "none"}]
return [{"display": "none"}]
# Producing plot
......@@ -626,6 +614,8 @@ def run_clustering_and_make_plot(
humidity_weight,
sum_rain_1_weight,
):
"""Control the app. Main app routine."""
# pylint: disable=locally-disabled, too-many-locals, too-many-arguments
empty_fig = make_clustering_fig(pd.DataFrame(), domain=domain)
empty_rtn = (empty_fig, [], [], "-", "-", "-", "-", "-", "-")
if n_clicks == 0:
......@@ -636,9 +626,7 @@ def run_clustering_and_make_plot(
df = read_data_df(date, cycle)
end_read_data = time.time()
logger.info(
"Done reading data. Elapsed: {:.1f}s".format(
end_read_data - start_read_data
)
"Done reading data. Elapsed: %.1fs", end_read_data - start_read_data
)
n_obs = len(df.index)
......@@ -684,18 +672,16 @@ def run_clustering_and_make_plot(
)
time_start_clustering = time.time()
logger.info("Running {}...".format(method))
logger.info("Running %s...", method)
df = cluster_netatmo_obs(
config=clustering_config,
df=df,
sort_by_cluster_size=True,
calc_silhouette_samples=True,
df=df, config=clustering_config, calc_silhouette_samples=True,
)
df = sort_df_by_cluster_size(df)
time_end_clustering = time.time()
logger.info(
"{0} performed in {1}s".format(
method, np.round(time_end_clustering - time_start_clustering, 2)
)
"%s performed in %.1fs",
method,
time_end_clustering - time_start_clustering,
)
noise_data_df = df[df["cluster_label"] < 0]
......@@ -707,10 +693,10 @@ def run_clustering_and_make_plot(
n_accepted = n_obs - noise_count
silhouette_score = df["silhouette_score"].mean(skipna=True)
logger.info("Estimated number of clusters: {}".format(n_clusters))
logger.info("Estimated number of noise points: {}".format(noise_count))
logger.info("Estimated number of clustered obs: {}".format(n_accepted))
logger.info("Mean silhouette score: {:.3f}".format(silhouette_score))
logger.debug("Estimated number of clusters: %d", n_clusters)
logger.debug("Estimated number of noise points: %d", noise_count)
logger.debug("Estimated number of clustered obs: %d", n_accepted)
logger.debug("Mean silhouette score: %.3f", silhouette_score)
fig = make_clustering_fig(df, domain=domain)
......@@ -763,7 +749,7 @@ def run_clustering_and_make_plot(
# Report distance between points upon selection fo two points
@app.callback(
[Output(component_id="calculated_dist", component_property="children"),],
[Output(component_id="calculated_dist", component_property="children")],
[
# Set clickmode='event+select' in the figure layout, and then
# use 'selectedData' here instead of 'clickData'
......@@ -772,18 +758,16 @@ def run_clustering_and_make_plot(
),
],
)
def geodist_upon_pt_pair_selection(selectedData):
try:
assert (selectedData is not None) and (
len(selectedData["points"]) == 2
)
except (AssertionError):
def geodist_upon_pt_pair_selection(selected_data):
"""Return geodist between selected points in the app."""
if (selected_data is None) or (len(selected_data["points"]) != 2):
return [html.P("")]
p0 = [selectedData["points"][0][attr] for attr in ["lat", "lon"]]
p1 = [selectedData["points"][1][attr] for attr in ["lat", "lon"]]
pt0 = [selected_data["points"][0][attr] for attr in ["lat", "lon"]]
pt1 = [selected_data["points"][1][attr] for attr in ["lat", "lon"]]
rtn = html.Span(
"Haversine distance between selected points: {:.3f} km".format(
haversine_distance(p0, p1)
haversine_distance(np.array(pt0), np.array(pt1))
)
)
return [rtn]
......
#!/usr/bin/env python3
"""Code for the scattergeo_timeseries app."""
import logging
import os
from datetime import datetime as dt
import dash
import dash_core_components as dcc
from dash.dependencies import Input, Output, State
import dash_html_components as html
from datetime import datetime as dt
import logging
import numpy as np
import os
import pandas as pd
import plotly.graph_objects as go
from dash.dependencies import Input, Output, State
from server import server
from netatmoqc.config_parser import read_config
from netatmoqc.domains import Domain
from netatmoqc.load_data import read_netatmo_data_for_dtg
from netatmoqc.load_data import (
read_netatmo_data_for_dtg,
remove_irregular_stations,
)
from netatmoqc.logs import CustomFormatter
from netatmoqc.plots import init_fig_dict, generate_single_frame
from netatmoqc.plots import generate_single_frame, init_fig_dict
logger = logging.getLogger(__name__)
logger_handler = logging.StreamHandler()
......@@ -31,13 +38,10 @@ app = dash.Dash(
{"name": "viewport", "content": "width=device-width, initial-scale=1"}
],
)
# app.config.suppress_callback_exceptions = True
def description_card():
"""
:return: A Div containing dashboard title & descriptions.