Commit b722667c authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Some more refactoring & update pre-commit hooks

parent 4b506b5c
Pipeline #10656 failed with stages
in 1 second
......@@ -10,49 +10,41 @@
# list of plugins and rules for them
[tool.flakehell.plugins]
# Activate all rules for all plugins by default
"*" = ["+*"]
# Remove from flake8-bandit:
# "S403": Consider possible security implications associated with pickle
# "S404": Consider possible security implications associated with subprocess
# "S603": To allow using subprocess.call/run
# "S606": To allow using os.startfile
flake8-bandit = ["+*", "-S403", "-S404", "-S603", "-S606"]
flake8-bugbear = ["+*"]
flake8-builtins = ["+*"]
# Remove C408 from flake8-comprehensions because I think sometimes the "dict" syntax
# looks cleaner than literal "{}". Dict creation performance is not an issue here.
flake8-comprehensions = ["+*", "-C408"]
flake8-darglint = ["+*"]
flake8-docstrings = ["+*", "-D105"] # Remove "D105: Missing docstring in magic method"
flake8-eradicate = ["+*"]
flake8-logging-format = ["+*"]
flake8-mutable = ["+*"]
flake8-pytest-style = ["+*"]
mccabe = ["+*"]
pep8-naming = ["+*"]
# Exclude some errors from pycodestyle for compatibility with black.
# "E501" is for max_line_length violations. Leave this for black to handle.
# For the other excluded errors, see:
# <https://black.readthedocs.io/en/stable/the_black_code_style.html#slices>
# <https://black.readthedocs.io/en/stable/the_black_code_style.html#line-breaks-binary-operators>
pycodestyle = ["+*", "-W503", "-E203", "-E501"]
pyflakes = ["+*"]
# Disable pylint plugin at the moment. pylint will be run separately.
pylint = ["-*"]
[tool.flakehell.exceptions."*/wsgi.py"]
# Ignore "F401 (imported but unused)" in this case
pyflakes = ["-F401"]
pyflakes = ["+*", "-F401"]
# W0611: Unused import
pylint = ["-W0611"]
pylint = ["+*", "-W0611"]
[tool.flakehell.exceptions."tests/*.py"]
# Disable some flake8-bandit checks in tests:
# "S101": To allow assert use
# "S301": To Allow testing pickle/unpickle
flake8-bandit = ["-S101", "-S301"]
flake8-bandit = ["+*", "-S101", "-S301"]
# Ignore "-D105" and "-D102" (Missing docstring in public class/method) in unit tests.
# The unit tests class and method names are supposed to be self-explanatory.
flake8-docstrings = ["-D105", "-D101", "-D102"]
flake8-docstrings = ["+*", "-D105", "-D101", "-D102"]
# C0102: Black listed name. We want to use "foo", "bar", etc in the tests.
# C0103: Method name doesn't conform to snake_case naming style
# C0115: Missing class docstring
......@@ -61,4 +53,4 @@
# R0903: Too few public methods
# W0621: Redefining name from outer scope (false positive for fixtures)
# W0212: Access to a protected member _data of a client class
pylint = ["-C0102", "-C0103", "-C0115", "-C0116", "-R0201", "-R0903", "-W0621", "-W0212"]
pylint = ["+*", "-C0102", "-C0103", "-C0115", "-C0116", "-R0201", "-R0903", "-W0621", "-W0212"]
repos:
- repo: https://github.com/PyCQA/isort
rev: 718fe45817628f8033b2b47aa9ce5a2d8c890ca7
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: 5d33f20a2a2c85cfb521ae9c5f9254bfe9fc2fd9
rev: 21.10b0
hooks:
- id: black
language_version: python3.6
language_version: python3.8
- repo: local
hooks:
- id: pylint
......@@ -15,7 +15,12 @@ repos:
entry: pylint
language: system
types: [python]
- repo: https://github.com/flakehell/flakehell
rev: 91a470fa2adfd0010162ee0c2b54f046c053fef7
exclude: ^tests/
- repo: local
hooks:
- id: flakehell
name: flakehell
language: python
types: [file, text]
files: \.(ipynb|md|py|rst|yaml|yml)$
entry: flakehell lint
......@@ -175,13 +175,13 @@ class ConfigDict(DotMap):
else:
super().__setitem__(attr, val)
def set_dynamic_flags(self, boolean):
"""Recursively set "_dynamic" to True/False."""
def recursively_set_dynamic_flags(self, boolean):
"""Recursively set the "_dynamic" attribute to True/False."""
boolean = bool(boolean)
self._dynamic = boolean
for key, val in self.items():
if isinstance(val, type(self)):
val.set_dynamic_flags(boolean)
val.recursively_set_dynamic_flags(boolean)
self[key] = val
self[key]._dynamic = boolean
else:
......@@ -713,7 +713,7 @@ class ParsedConfig:
# DTGs are treated a bit specially
self._parsed.general.dtgs = _parse_dtg_entires(self._parsed.general.dtgs)
self._parsed.set_dynamic_flags(False)
self._parsed.recursively_set_dynamic_flags(False)
# Define __setstate__ and __getstate__ to handle serialisation (pickling)
# and avoid recursion errors
......
......@@ -231,8 +231,6 @@ class DomainProjection:
def lonlat2xy(self, lon, lat):
"""Convert (lon, lat), in degrees, into projected (x, y) in meters."""
# lat, lon in degrees
# returns x, y in meters
return self.transformer(longitude=lon, latitude=lat)
def xy2lonlat(self, x, y):
......
......@@ -43,7 +43,7 @@ class Dtg(datetime):
# Set a default of 3H for the length of a cycle.
# Without the context provided by a cycle length, a
# DTG has no meaning.
self.set_cycle_length(kwargs.get("cycle_length", "3H"))
self.cycle_length = kwargs.get("cycle_length", "3H")
def __new__(cls, *args, **kwargs):
"""Create new instance keeping only Dtg-compatible datetime attrs."""
......@@ -152,12 +152,44 @@ class Dtg(datetime):
"""Return a copy as a datetime.datetime object."""
return self.as_pandas_timestamp().to_pydatetime()
# Useful little methods/properties
@property
def cycle_length(self):
"""Return the length of the data assimilation cycle."""
return self._cycle_length
@cycle_length.setter
def cycle_length(self, cycle_length):
"""Set the assimilation cycle length associated with the DTG.
Args:
cycle_length (str): The new cycle length.
Raises:
ValueError: If cycle_length smaller that min allowed, or if Dtg
not compatible with cycle_length.
"""
# Set a default of 3H for the length of a cycle.
# Without the context provided by a cycle length, a
# DTG has no meaning.
cycle_length = to_offset(cycle_length)
# Validate cycle_length
min_cycle_length = to_offset("1H")
ref_date = datetime(2000, 1, 1)
new_date = ref_date + cycle_length
min_allowed_new_date = ref_date + min_cycle_length
if new_date < min_allowed_new_date:
raise ValueError(
f"Min allowed cycle_length is {min_cycle_length}. "
+ f"Passed cycle_length={cycle_length}"
)
if not self.compatible_with_cycle_length(cycle_length):
raise ValueError(
f"Dtg {self} not compatible with cycle_length {cycle_length}"
)
self._cycle_length = cycle_length
# Useful little methods/properties
@property
def cycle_start(self):
"""Return the earliest observation time encompassed by the DTG."""
......@@ -173,15 +205,16 @@ class Dtg(datetime):
"""Return the DTG's assimilation window: [cycle_start, cycle_end)."""
return pd.Interval(self.cycle_start, self.cycle_end, closed="left")
def get_next(self):
@property
def next(self): # noqa: A003
"""Return the next DTG."""
return self + self.cycle_length
def get_previous(self):
@property
def previous(self):
"""Return the previous DTG."""
return self - self.cycle_length
# cycle_length-related methods
def compatible_with_cycle_length(self, cycle_length="self"):
"""Check if DTG is compatible with cycle length.
......@@ -205,37 +238,6 @@ class Dtg(datetime):
dtg = pd.Timestamp(self)
return (dtg - dtg.normalize()) % cycle_length == timedelta(0)
def set_cycle_length(self, cycle_length):
"""Set the assimilation cycle length associated with the DTG.
Args:
cycle_length (str): The new cycle length.
Raises:
ValueError: If cycle_length smaller that min allowed, or if Dtg
not compatible with cycle_length.
"""
# Set a default of 3H for the length of a cycle.
# Without the context provided by a cycle length, a
# DTG has no meaning.
cycle_length = to_offset(cycle_length)
# Validate cycle_length
min_cycle_length = to_offset("1H")
ref_date = datetime(2000, 1, 1)
new_date = ref_date + cycle_length
min_allowed_new_date = ref_date + min_cycle_length
if new_date < min_allowed_new_date:
raise ValueError(
f"Min allowed cycle_length is {min_cycle_length}. "
+ f"Passed cycle_length={cycle_length}"
)
if not self.compatible_with_cycle_length(cycle_length):
raise ValueError(
f"Dtg {self} not compatible with cycle_length {cycle_length}"
)
self._cycle_length = cycle_length
# Override some methods to ensure results are consistent with Dtg type
def replace(self, *args, **kwargs):
"""Overrride datetime's "replace" method.
......
......@@ -98,7 +98,7 @@ def normalize_df(df):
@njit("f4[:](f8[:, :], f8[:], types.unicode_type)", parallel=True, cache=True)
def numba_calc_distance_matrix_haversine_plus(df, weights_array, method):
def numba_calc_distance_matrix_haversine_plus(df, weights, method):
"""Calculate distance matrix using python+numba.
Spatial distances are calculated using the haversine method.
......@@ -109,7 +109,7 @@ def numba_calc_distance_matrix_haversine_plus(df, weights_array, method):
df (numpy.ndarray): Multidimensional numpy array containing the data
entries, obtained from a pandas dataframe (numba doesn't work with
pandas datafames).
weights_array (numpy.array): Weights chosen for each observation parameter.
weights (numpy.array): Weights chosen for each observation parameter.
The weigts determine the relative importance of the observation
parameters w.r.t. each other.
method (basestring): The method to be used for the non-spatial part of
......@@ -128,7 +128,7 @@ def numba_calc_distance_matrix_haversine_plus(df, weights_array, method):
nrows = df.shape[0]
# Set any negative weight value to zero
weights_array = np.where(weights_array < 0, 0.0, weights_array)
weights = np.where(weights < 0, 0.0, weights)
n_dists = (nrows * (nrows - 1)) // 2
rtn = np.zeros(n_dists, dtype=np.float32)
......@@ -143,13 +143,11 @@ def numba_calc_distance_matrix_haversine_plus(df, weights_array, method):
i, j = np.zeros(2, dtype=np.int64)
for idist in prange(n_dists): # pylint: disable=not-an-iterable
i, j = _data_index_to_matrix_index(nrows, idist, check_bounds=False)
rtn[idist] = weights_array[0] * haversine_distance(df[i], df[j])
rtn[idist] = weights[0] * haversine_distance(df[i], df[j])
if method == "manhattan":
rtn[idist] += np.sum(np.abs(weights_array[1:] * (df[j, 2:] - df[i, 2:])))
rtn[idist] += np.sum(np.abs(weights[1:] * (df[j, 2:] - df[i, 2:])))
elif method == "euclidean":
rtn[idist] += np.sqrt(
np.sum((weights_array[1:] * (df[j, 2:] - df[i, 2:])) ** 2)
)
rtn[idist] += np.sqrt(np.sum((weights[1:] * (df[j, 2:] - df[i, 2:])) ** 2))
return rtn
......@@ -177,13 +175,13 @@ def calc_distance_matrix_haversine_plus(df, config):
allowed_methods = ["manhattan", "euclidean"]
if method not in allowed_methods:
raise NotImplementedError(
f"Argument 'method' must be one of: {', '.join(allowed_methods)}. Received: {method}"
f"Arg 'method' must be one of: {', '.join(allowed_methods)}. Got: {method}"
)
return HollowSymmetricMatrix(
data=numba_calc_distance_matrix_haversine_plus(
df=df.to_numpy(),
weights_array=weights_dict_to_np_array(df, config=config),
weights=weights_dict_to_np_array(df, config=config),
method=method,
),
optimize_mode=config.metrics.optimize_mode,
......@@ -268,7 +266,7 @@ def calc_distance_matrix_considering_correlation(df, config, domain):
matrix.
"""
# Work with a copy, otherwise the df is modified out of scope too
# Work with a copy, otherwise the df seems to get modified out of this scope too
df = df.copy()
# Replace (lon, lat) with projected (x, y) in Km
......
......@@ -25,14 +25,13 @@ def mpi_parallel(fun, iterable):
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if size > 1:
if rank == 0:
raise ValueError(
f"Received '-n {size}' from the MPI runner. Please "
+ "use '-n 1' when running this application with MPI, "
+ "and then select the maximum number N of parallel "
+ "MPI tasks by passing '-usize N'"
)
if size > 1 and rank == 0:
raise ValueError(
f"Received '-n {size}' from the MPI runner. Please "
+ "use '-n 1' when running this application with MPI, "
+ "and then select the maximum number N of parallel "
+ "MPI tasks by passing '-usize N'"
)
# Establish max #workers that will be dynamically spawn
# (i.a) Check if universe size passed explicitly (e.g., via "-usize")
......@@ -49,4 +48,5 @@ def mpi_parallel(fun, iterable):
# Spawn workers and perform tasks
with MPIPoolExecutor(max_workers=max_workers) as executor:
results = executor.map(fun, iterable)
return list(results)
[tool.poetry]
name = "netatmoqc"
version = "0.4.0.beta-0"
version = "0.4.0.beta-1"
description = "Use machine learning clustering methods to perform quality control over NetAtmo data"
authors = [
"Paulo V. C. Medeiros <paulo.medeiros@smhi.se>"
......
......@@ -63,7 +63,7 @@ class TestConfigDict:
assert conf_dict._dynamic is not dynamic_flag
orig_conf_dict = copy.deepcopy(conf_dict)
conf_dict.set_dynamic_flags(dynamic_flag)
conf_dict.recursively_set_dynamic_flags(dynamic_flag)
assert conf_dict._dynamic is dynamic_flag
assert orig_conf_dict._dynamic is not dynamic_flag
......
......@@ -47,8 +47,8 @@ class TestDtg:
def test_next_and_prev(self):
dtg = Dtg("20180110T12", cycle_length="3H")
assert dtg.get_next() == Dtg("20180110T15", cycle_length="3H")
assert dtg.get_previous() == Dtg("20180110T09", cycle_length="3H")
assert dtg.next == Dtg("20180110T15", cycle_length="3H")
assert dtg.previous == Dtg("20180110T09", cycle_length="3H")
def test_eq_and_neq_operators(self):
dtg1 = Dtg("20180110T12", cycle_length="3H")
......@@ -60,13 +60,8 @@ class TestDtg:
def test_assim_windows_dont_overlap(self):
dtg = Dtg("20180110T12", cycle_length="3H")
assert (
dtg.assimilation_window.overlaps(dtg.get_next().assimilation_window) is False
)
assert (
dtg.assimilation_window.overlaps(dtg.get_previous().assimilation_window)
is False
)
assert dtg.assimilation_window.overlaps(dtg.next.assimilation_window) is False
assert dtg.assimilation_window.overlaps(dtg.previous.assimilation_window) is False
def test_copy_dtg(self):
dtg = Dtg("20180110T12", cycle_length="3H")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment