Commit 135964c7 authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

More descriptive names for metrics calc methods

haversine --> haversine_plus_manhattan
correlation --> correlation_aware_euclidean
parent 300fd05a
......@@ -49,9 +49,9 @@
[metrics]
# method:
# > default: "haversine"
# > choices: "correlation", "haversine"
method = "correlation"
# > default: "correlation_aware_euclidean"
# > choices: "correlation_aware_euclidean", "haversine_plus_manhattan"
method = "correlation_aware_euclidean"
# optimize_mode:
# > default: "memory"
# > choices: "memory", "speed_mem_compromise", "speed"
......
......@@ -393,8 +393,8 @@ with config_section("general") as section:
with config_section("metrics") as section:
config_metadata.register(
"method",
default="correlation",
choices=["correlation", "haversine"],
default="correlation_aware_euclidean",
choices=["correlation_aware_euclidean", "haversine_plus_manhattan"],
)
config_metadata.register(
"optimize_mode",
......
......@@ -92,7 +92,7 @@ def get_obs_norm_factors(obs_values):
@njit("f4[:](f8[:, :], f8[:])", parallel=True, cache=True)
def calc_distance_matrix_haversine_numba(df, weights):
def calc_distance_matrix_haversine_plus_manhattan_numba(df, weights):
"""Calculate distance matrix using python+numba.
Args:
......@@ -148,7 +148,9 @@ def calc_distance_matrix_haversine_numba(df, weights):
return rtn
def calc_distance_matrix_haversine(df, weights, optimize_mode, num_threads=-1):
def calc_distance_matrix_haversine_plus_manhattan(
df, weights, optimize_mode, num_threads=-1
):
"""Calculate distance matrix between obs in dataframe df.
Spatial distances are calculated using the haversine method.
......@@ -178,7 +180,7 @@ def calc_distance_matrix_haversine(df, weights, optimize_mode, num_threads=-1):
atexit.register(numba.set_num_threads, original_nthreads)
rtn = HollowSymmetricMatrix(
data=calc_distance_matrix_haversine_numba(
data=calc_distance_matrix_haversine_plus_manhattan_numba(
df.to_numpy(), weights=weights
),
optimize_mode=optimize_mode,
......@@ -330,7 +332,10 @@ def calc_distance_matrix(df, config, domain=None, num_threads=-1):
matrix.
"""
accepted_methods = ["correlation", "haversine"]
accepted_methods = [
"correlation_aware_euclidean",
"haversine_plus_manhattan",
]
method = config.metrics.method.lower()
if method not in accepted_methods:
raise NotImplementedError(
......@@ -341,8 +346,8 @@ def calc_distance_matrix(df, config, domain=None, num_threads=-1):
logger.debug("Computing distance matrix using the '%s' method", method)
weights_dict = config.get_clustering_opt("obs_weights")
df = df.copy().drop(config.general.unclusterable_data_columns, axis=1)
if method == "haversine":
return calc_distance_matrix_haversine(
if method == "haversine_plus_manhattan":
return calc_distance_matrix_haversine_plus_manhattan(
# Drop columns that won't be used in the clustering
df=df,
weights=weights_dict_to_np_array(df, config=config),
......@@ -350,7 +355,7 @@ def calc_distance_matrix(df, config, domain=None, num_threads=-1):
num_threads=num_threads,
)
if method == "correlation":
if method == "correlation_aware_euclidean":
if domain is None:
logger.warning(
"Domain not passed to '%s'. Constructing from configs.",
......
......@@ -40,7 +40,7 @@ class TestsCustomMetrics:
# Prepare a mock config
fpath = Path(__file__).resolve().parent / "test_config.toml"
config = ParsedConfig(fpath)
config.metrics.method = "haversine"
config.metrics.method = "haversine_plus_manhattan"
config.general.clustering_method = "hdbscan"
config.clustering_method.hdbscan.obs_weights = {"some_obs": 0.0}
config.metrics.optimize_mode = "speed"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment