Commit 57df5ef8 authored by Paulo Medeiros's avatar Paulo Medeiros
Browse files

More refactoring

parent 0455b9ce
Pipeline #10394 failed with stages
in 0 seconds
......@@ -125,7 +125,7 @@ def numba_calc_distance_matrix_haversine_plus(df, weights_array, method):
if method not in ["manhattan", "euclidean"]:
raise NotImplementedError("'method' must be one of: manhattan, euclidean")
nrows, ncols = df.shape
nrows = df.shape[0]
# Set any negative weight value to zero
weights_array = np.where(weights_array < 0, 0.0, weights_array)
......@@ -192,7 +192,7 @@ def calc_distance_matrix_haversine_plus(df, config):
@njit(parallel=True, cache=True)
def calc_distance_matrix_data_considering_correlation_numba(
df, weights_array, covariance_matrix
df, weights, covariance_matrix
):
"""Calculate data for distance matrix between obs in dataframe df.
......@@ -201,7 +201,7 @@ def calc_distance_matrix_data_considering_correlation_numba(
Args:
df (numpy.array): Input data.
weights_array (numpy.array): Weights chosen for each observation param.
weights (numpy.array): Weights chosen for each observation param.
The weights determine the relative importance of the observation
parameters w.r.t. each other.
covariance_matrix (numpy.array): The covariance matrix for the data
......@@ -212,7 +212,7 @@ def calc_distance_matrix_data_considering_correlation_numba(
"""
# Set any negative weight value to zero
weights_array = np.where(weights_array < 0, 0.0, weights_array)
weights = np.where(weights < 0, 0.0, weights)
nrows, ncols = df.shape
n_dists = (nrows * (nrows - 1)) // 2
......@@ -229,24 +229,23 @@ def calc_distance_matrix_data_considering_correlation_numba(
for idist in prange(n_dists): # pylint: disable=not-an-iterable
i, j = _data_index_to_matrix_index(nrows, idist, check_bounds=False)
# Part with each obs type alone
dij_squared_part1 = 0.0
# Part with obs types correlated with each other
dij_squared_part2 = 0.0
for m in range(ncols):
dij_squared_part1 += (weights_array[m] * (df[i, m] - df[j, m])) ** 2
for n in range(m + 1, ncols):
dij_squared_part2 += (
weights_array[m]
* weights_array[n]
* (df[i, m] - df[j, m])
* (df[i, n] - df[j, n])
* covariance_matrix[m, n]
# Term 1 of the squared distance, involving each obs type alone
dij_sq_term1 = 0.0
# Term 2, involving correlations between different obs types
dij_sq_term2 = 0.0
for iparam1 in range(ncols):
dij_sq_term1 += (weights[iparam1] * (df[i, iparam1] - df[j, iparam1])) ** 2
for iparam2 in range(iparam1 + 1, ncols):
dij_sq_term2 += (
weights[iparam1]
* weights[iparam2]
* (df[i, iparam1] - df[j, iparam1])
* (df[i, iparam2] - df[j, iparam2])
* covariance_matrix[iparam1, iparam2]
)
dij_squared_part2 *= 2.0
dij_sq_term2 *= 2.0
distance_matrix_data[idist] = np.sqrt(dij_squared_part1 + dij_squared_part2)
distance_matrix_data[idist] = np.sqrt(dij_sq_term1 + dij_sq_term2)
return distance_matrix_data
......@@ -289,7 +288,7 @@ def calc_distance_matrix_considering_correlation(df, config, domain):
distance_matrix_data = calc_distance_matrix_data_considering_correlation_numba(
df=df.to_numpy(),
weights_array=weights_dict_to_np_array(df, config=config),
weights=weights_dict_to_np_array(df, config=config),
covariance_matrix=covariance_matrix.to_numpy(),
)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment