Commit 1fc97669 authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

Fix InvalidIndexError: pandas df.at --> df.loc

The code was using the "at" method of pandas DataFrame with multiple
indices, and that was accepted by pandas < 1.4.0. But their docs do
say that "at" is for single element access only, whereas "loc" is for
access to multiple elements.
parent b722667c
...@@ -613,7 +613,7 @@ class HollowSymmetricMatrix(np.lib.mixins.NDArrayOperatorsMixin): ...@@ -613,7 +613,7 @@ class HollowSymmetricMatrix(np.lib.mixins.NDArrayOperatorsMixin):
# HDBSCAN requires that the result of np.array(self) to have # HDBSCAN requires that the result of np.array(self) to have
# dtype=numpy.float64. There's an open feature request at # dtype=numpy.float64. There's an open feature request at
# <https://github.com/scikit-learn-contrib/hdbscan/issues/108> # <https://github.com/scikit-learn-contrib/hdbscan/issues/108>
# (checked on 2020-09-02) for support to other types. Let's wait. # (checked on 2022-02-09) for support to other types. Let's wait.
logger.debug("%s: Calling '__array__' method", type(self).__name__) logger.debug("%s: Calling '__array__' method", type(self).__name__)
return self.dense_form().astype(np.float64) return self.dense_form().astype(np.float64)
......
...@@ -252,7 +252,7 @@ def filter_outliers_glosh(df, outlier_scores): ...@@ -252,7 +252,7 @@ def filter_outliers_glosh(df, outlier_scores):
threshold = min(0.05, df[df["cluster_label"] > -1]["GLOSH"].quantile(0.75)) threshold = min(0.05, df[df["cluster_label"] > -1]["GLOSH"].quantile(0.75))
outliers_index = (df["cluster_label"] > -1) & (df["GLOSH"] > threshold) outliers_index = (df["cluster_label"] > -1) & (df["GLOSH"] > threshold)
# Use "-2" as a "removed by refining methods" flag # Use "-2" as a "removed by refining methods" flag
df.at[outliers_index, "cluster_label"] = -2 df.loc[outliers_index, "cluster_label"] = -2
return df return df
...@@ -317,7 +317,7 @@ def filter_outliers_lof(df, distance_matrix): ...@@ -317,7 +317,7 @@ def filter_outliers_lof(df, distance_matrix):
df["LOF"] = get_local_outlier_factors(df, distance_matrix) df["LOF"] = get_local_outlier_factors(df, distance_matrix)
outliers_index = (df["cluster_label"] > -1) & (df["LOF"] < -1.5) outliers_index = (df["cluster_label"] > -1) & (df["LOF"] < -1.5)
# Use "-2" as a "removed by refining methods" flag # Use "-2" as a "removed by refining methods" flag
df.at[outliers_index, "cluster_label"] = -2 df.loc[outliers_index, "cluster_label"] = -2
return df return df
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment