Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
iObs
WP2
Task 2.3
netatmoqc
Commits
b205df5b
Commit
b205df5b
authored
Nov 05, 2021
by
Paulo Medeiros
Browse files
More refactoring in metrics.py
parent
cb5ca9c5
Changes
1
Hide whitespace changes
Inline
Side-by-side
netatmoqc/metrics.py
View file @
b205df5b
...
...
@@ -37,10 +37,11 @@ def weights_dict_to_np_array(df, config, default=1):
"""Convert pairwise_diff_weights into a numpy array.
Takes a pandas dataframe and a {column_name:weight} dictionary and returns
an array of weights
as needed in the calc_distance_matrix_haversine_plus
routine
.
an array of weights
ordered in a way consistent with the order of the data
used in the routines to calculate distance matrices
.
Columns "lat" and "lon" in df are treated specially, in that they are
If using "haversine_plus_" distance matrix calculation, methods then the
columns "lat" and "lon" in df are treated specially, in that they are
not assigned a weight individually, but rather a single weight gets
assigned to the "geo_dist" property.
...
...
@@ -60,22 +61,29 @@ def weights_dict_to_np_array(df, config, default=1):
column.
"""
if
df
.
columns
.
get_loc
(
"lon"
)
-
df
.
columns
.
get_loc
(
"lat"
)
!=
1
:
raise
ValueError
(
"'lat' column is not followed by 'lon' column"
)
weights
=
[]
weights_dict
=
config
.
get_clustering_opt
(
"obs_weights"
)
unclusterable_cols
=
config
.
general
.
unclusterable_data_columns
col2weight
=
{
c
:
(
"geo_dist"
if
c
==
"lon"
else
c
)
for
c
in
df
.
columns
}
for
col
in
df
.
columns
[
~
df
.
columns
.
isin
(
unclusterable_cols
+
[
"lat"
])]:
try
:
weights
.
append
(
weights_dict
[
col2weight
[
col
]])
except
(
KeyError
,
TypeError
):
weights
.
append
(
default
)
weights
=
np
.
array
(
weights
,
dtype
=
np
.
float64
)
weights
=
np
.
where
(
weights
<
0
,
0.0
,
weights
)
if
config
.
metrics
.
method
.
lower
().
startswith
(
"haversine_plus_"
):
if
df
.
columns
.
get_loc
(
"lon"
)
-
df
.
columns
.
get_loc
(
"lat"
)
!=
1
:
raise
ValueError
(
"'lat' column is not followed by 'lon' column"
)
weights
=
[]
col2weight
=
{
c
:
(
"geo_dist"
if
c
==
"lon"
else
c
)
for
c
in
df
.
columns
}
for
col
in
df
.
columns
[
~
df
.
columns
.
isin
(
unclusterable_cols
+
[
"lat"
])]:
try
:
weights
.
append
(
weights_dict
[
col2weight
[
col
]])
except
(
KeyError
,
TypeError
):
weights
.
append
(
default
)
weights
=
np
.
array
(
weights
,
dtype
=
np
.
float64
)
else
:
# x and y should have their individual weights
unclusterable_cols
=
[
c
for
c
in
unclusterable_cols
if
c
not
in
[
"x"
,
"y"
]
]
selected_cols
=
[
c
for
c
in
df
.
columns
if
c
not
in
unclusterable_cols
]
weights
=
np
.
array
([
weights_dict
.
get
(
c
,
1.0
)
for
c
in
selected_cols
])
weights
=
np
.
where
(
weights
<
0
,
0.0
,
weights
)
return
weights
...
...
@@ -185,7 +193,7 @@ def calc_distance_matrix_haversine_plus(df, config):
return
HollowSymmetricMatrix
(
data
=
numba_calc_distance_matrix_haversine_plus
(
df
.
to_numpy
(),
df
=
df
.
to_numpy
(),
weights_array
=
weights_dict_to_np_array
(
df
,
config
=
config
),
method
=
method
,
),
...
...
@@ -291,17 +299,14 @@ def calc_distance_matrix_considering_correlation(df, config, domain):
df
.
insert
(
0
,
"x"
,
xvals
/
1000.0
)
df
=
df
.
drop
([
"lon"
,
"lat"
],
axis
=
1
)
# Make sure weights and df columns are consistent
weights_dict
=
config
.
get_clustering_opt
(
"obs_weights"
)
weights_array
=
np
.
array
([
weights_dict
.
get
(
c
,
1.0
)
for
c
in
df
.
columns
])
weights_array
=
np
.
where
(
weights_array
<
0
,
0.0
,
weights_array
)
covariance_matrix
=
df
.
corr
().
fillna
(
0
)
np
.
fill_diagonal
(
covariance_matrix
.
values
,
1.0
)
distance_matrix_data
=
(
calc_distance_matrix_data_considering_correlation_numba
(
df
.
to_numpy
(),
weights_array
,
covariance_matrix
.
to_numpy
()
df
=
df
.
to_numpy
(),
weights_array
=
weights_dict_to_np_array
(
df
,
config
=
config
),
covariance_matrix
=
covariance_matrix
.
to_numpy
(),
)
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment