Commit 07c1aada authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Pass existing domain to clustering routines

parent ec0cfda7
......@@ -133,6 +133,7 @@ def get_silhouette_samples(df, distance_matrix):
def run_clustering_on_df(
df,
config,
domain,
distance_matrix=None,
remove_outliers=True,
calc_silhouette_samples=True,
......@@ -143,6 +144,7 @@ def run_clustering_on_df(
Args:
df (pandas.Dataframe): Dataframe with observations.
config (netatmoqc.config.ParsedConfig): Program's general configs.
domain (netatmoqc.domains.Domain): The adopted spatial domain.
distance_matrix (HollowSymmetricMatrix): Obs distance matrix.
Default value = None.
remove_outliers (bool): Use a post-clustering outlier removal method?
......@@ -177,7 +179,7 @@ def run_clustering_on_df(
# jit-compilation via numba, then the relative speed up can reach
# up to 120x.
distance_matrix = calc_distance_matrix(
df=df, config=config, num_threads=n_jobs
df=df, config=config, domain=domain, num_threads=n_jobs
)
# Running clustering with the computed distance matrix
......@@ -333,7 +335,7 @@ def self_consistent_reclustering(df, config, distance_matrix, **kwargs):
return df
def _cluster_netatmo_obs_one_domain(df, config, **kwargs):
def _cluster_netatmo_obs_one_domain(df, config, domain, **kwargs):
"""Cluster netatmo obs inside a given domain.
Helper for the main routine cluster_netatmo_obs
......@@ -341,6 +343,7 @@ def _cluster_netatmo_obs_one_domain(df, config, **kwargs):
Args:
df (pandas.Dataframe): Pandas dataframe with observations.
config (ParsedConfig): Parsed configs.
domain (netatmoqc.domains.Domain): The adopted spatial domain.
**kwargs: Keyword args passed to run_clustering_on_df.
Returns:
......@@ -349,7 +352,7 @@ def _cluster_netatmo_obs_one_domain(df, config, **kwargs):
"""
time_start_clustering = time.time()
logger.debug("Performing clustering...")
df = run_clustering_on_df(df=df, config=config, **kwargs)
df = run_clustering_on_df(df=df, config=config, domain=domain, **kwargs)
time_end_clustering = time.time()
logger.debug(
"Done with clustering. Elapsed: %.2fs",
......@@ -420,7 +423,10 @@ def cluster_netatmo_obs(df, config, **kwargs):
domain.n_subdomains,
)
df_sub = _cluster_netatmo_obs_one_domain(
df=df_sub, config=config, **pre_clustering_kwargs
df=df_sub,
config=config,
domain=subdomain,
**pre_clustering_kwargs,
)
domain_split_dfs.append(df_sub)
......@@ -463,7 +469,9 @@ def cluster_netatmo_obs(df, config, **kwargs):
"DTG=%s: Main clustering over whole domain...",
df.metadata_dict["dtg"],
)
df = _cluster_netatmo_obs_one_domain(df=df, config=config, **kwargs)
df = _cluster_netatmo_obs_one_domain(
df=df, config=config, domain=domain, **kwargs
)
if df_rejected is not None:
# Put back eventual obs rejected at the pre-clustering step
......
......@@ -311,7 +311,7 @@ def calc_distance_matrix_considering_correlation(
return rtn
def calc_distance_matrix(df, config, num_threads=-1):
def calc_distance_matrix(df, config, domain=None, num_threads=-1):
"""Calculate distance matrix between obs in dataframe df.
Spatial distances are calculated by projecting (lon, lat) into (x, y)
......@@ -322,6 +322,8 @@ def calc_distance_matrix(df, config, num_threads=-1):
Args:
df (pandas.Dataframe): Input data.
config (ParsedConfig): Parsed configs.
domain (netatmoqc.domains.Domain): The adopted spatial domain.
(Default value = None)
num_threads: Max number of threads used for the computation.
(Default value = -1)
......@@ -354,10 +356,16 @@ def calc_distance_matrix(df, config, num_threads=-1):
)
if method == "correlation":
if domain is None:
logger.warning(
"Domain not passed to '%s'. Constructing from configs.",
"calc_distance_matrix",
)
domain = Domain.construct_from_dict(config.domain)
return calc_distance_matrix_considering_correlation(
df=df,
weights_dict=weights_dict,
optimize_mode=config.general.custom_metrics_optimize_mode,
num_threads=num_threads,
domain=Domain.construct_from_dict(config.domain),
domain=domain,
)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment