Commit e1ccbd01 authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Allow choice of HDBSCAN's cluster_selection_method

parent 38804a79
......@@ -204,9 +204,9 @@ def run_clustering_on_df(
metric="precomputed",
core_dist_n_jobs=n_jobs,
allow_single_cluster=True,
# Default cluster_selection_method: 'eom'. Sometimes it leads to
# clusters that are too big. Using 'leaf' seems better.
cluster_selection_method="leaf",
cluster_selection_method=config.get_clustering_opt(
"cluster_selection_method"
),
).fit(distance_matrix)
elif method == "optics":
db = OPTICS(
......
......@@ -417,6 +417,12 @@ with config_section("clustering_method.hdbscan") as section:
choices=[None, "glosh", "lof", "iterative", "reclustering"],
)
config_metadata.copy_template("outlier_removal.iterative")
# 'eom' may sometimes lead to clusters that are too big and lower
# silhouette scores, but maybe the final station selection is better
# than that using "leaf"
config_metadata.register(
"cluster_selection_method", default="leaf", choices=["eom", "leaf"]
)
# clustering_method.dbscan
with config_section("clustering_method.dbscan") as section:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment