plots.py 19.7 KB
Newer Older
1
#!/usr/bin/env python3
2
"""Code related to producing plots."""
3
4
from pathlib import Path

Paulo Medeiros's avatar
Paulo Medeiros committed
5
6
7
import humanize
import numpy as np
import pandas as pd
8
import plotly.express as px
9
import plotly.graph_objects as go
10

11
from .logs import get_logger
12

Paulo Medeiros's avatar
Paulo Medeiros committed
13
14
15
16
17
18
# This "config" is plotly's fig.show config, not our parsed config file.
# See <https://github.com/plotly/plotly.js/blob/master/src/plot_api/
#      plot_config.js>
DEF_FIGSHOW_CONFIG = dict(editable=True, displaylogo=False)


19
def get_obs_scattergeo_trace(df, trace_name=None, marker=None, visible=True):
20
    """Get a go.Scattergeo object from observations in dataframe df."""
21
22
23
    base_marker = dict(
        color="blue", size=8, opacity=0.75, line=dict(width=1.0)
    )
24
    if marker is None:
25
26
27
28
29
30
31
32
33
34
35
36
        marker = base_marker
    else:
        new_marker = base_marker.copy()
        new_marker.update(marker)
        marker = new_marker

    # Format popup hover text
    if trace_name is None:
        hovertemplate = []
    else:
        hovertemplate = ["<b>%s</b><br>" % (trace_name)]
    hoverinfo_cols = [c for c in df.columns if not c.startswith("_")]
37
    for icol, col in enumerate(hoverinfo_cols):
38
        fmt = None
39
        if pd.api.types.is_float_dtype(df[col]):
40
            fmt = ": .5f"
41
        elif pd.api.types.is_datetime64_any_dtype(df[col]):
42
43
            fmt = "| %Y-%m-%d %H:%M:%S"
        if fmt is None:
44
            hovertemplate.append("%s: %%{customdata[%d]}" % (col, icol))
45
        else:
46
            hovertemplate.append("%s: %%{customdata[%d]%s}" % (col, icol, fmt))
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
    hovertemplate = "<br>".join(hovertemplate)

    return go.Scattergeo(
        name=trace_name,
        lat=df.lat,
        lon=df.lon,
        mode="markers",
        marker=marker,
        customdata=df,
        hovertemplate=hovertemplate,
        hoverlabel=dict(namelength=0),
        visible=visible,
    )


62
63
64
65
66
67
68
69
70
def draw_boundaries(
    fig,
    domain,
    name="boundaries",
    corners=None,
    showlegend=True,
    legendgroup=None,
    **kwargs
):
71
    """Add to fig line segments connecting the given corners within the domain.
72

Paulo Medeiros's avatar
Paulo Medeiros committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    Args:
        fig (go.Scattergeo): Figure where the boundaries are to be drawn.
        domain (netatmoqc.domains.Domain): Model domain.
        name (str): Name of the boundaries (shown in legend).
            (Default value = "boundaries").
        corners: (x, y) coords of he start and end of the corners of the closed
            boundary to be drawn. If corners is not passed, then the domain
            corners will be used. (Default value = None)
        showlegend (bool): (Default value = True)
        legendgroup (str): Passed to go.Scattergeo. (Default value = None).
        **kwargs: Passed to the "lines" opt of go.Scattergeo.

    Returns:
        go.Scattergeo: Input figure with the passed boundaries drawn.
87
88
89

    """
    if corners is None:
Paulo Medeiros's avatar
Paulo Medeiros committed
90
        corners = domain.grid.corners
91
92

    # Construct line segments
Paulo Medeiros's avatar
Paulo Medeiros committed
93
94
    # We interpolate using (x, y) instead of (lon, lat) to prevent distorted
    # line segments (segments that do not conform to the used projection).
95
    segments = []
Paulo Medeiros's avatar
Paulo Medeiros committed
96
    npts_per_segment = max(max(domain.grid.nlon, domain.grid.nlat) // 100, 5)
97
98
    for istart, start in enumerate(corners):
        end = corners[(istart + 1) % len(corners)]
99
100
101
102
103
104
105
106
107
108
109
110
111
112
        segments.append(
            dict(
                x=np.linspace(start[0], end[0], npts_per_segment),
                y=np.linspace(start[1], end[1], npts_per_segment),
            )
        )

    xvals = np.empty(3 * len(segments) * npts_per_segment)
    yvals = np.empty(3 * len(segments) * npts_per_segment)

    # Add all segments as a single trace
    # Put start points in go.Scattergeo's "single-trace" style
    xvals[::3] = np.concatenate([s["x"] for s in segments])
    yvals[::3] = np.concatenate([s["y"] for s in segments])
Paulo Medeiros's avatar
Paulo Medeiros committed
113
    lons, lats = domain.proj.xy2lonlat(xvals, yvals)
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
    # Same for end points
    lats[1::3] = np.roll(lats[::3], 1)
    lons[1::3] = np.roll(lons[::3], 1)
    # Indicate separation between traces
    lats[2::3] = None
    lons[2::3] = None

    fig.add_trace(
        go.Scattergeo(
            name=name,
            lat=lats,
            lon=lons,
            mode="lines",
            line=kwargs,
            legendgroup=legendgroup,
            showlegend=showlegend,
        )
    )

    return fig


136
def draw_grid_pts(
Paulo Medeiros's avatar
Paulo Medeiros committed
137
    fig, grid, display_grid_max_gsize=None, name="Grid", **marker_opts
138
):
139
    """Add to fig a trace containing a selection of grid points."""
140
    if display_grid_max_gsize is None:
Paulo Medeiros's avatar
Paulo Medeiros committed
141
        display_grid_max_gsize = grid.x_spacing
142
143
144
    if display_grid_max_gsize <= 0:
        return fig

Paulo Medeiros's avatar
Paulo Medeiros committed
145
146
    grid_draw_every = max(1, int(display_grid_max_gsize / grid.x_spacing))
    lons, lats = grid.ij2lonlat_map()
147
    if grid_draw_every > 1:
148
        name += " (every %s point of)" % (humanize.ordinal(grid_draw_every))
149
150
151
152
153
154
155
156
157
158
159
160
    fig.add_trace(
        go.Scattergeo(
            name=name,
            lat=lats[::grid_draw_every, ::grid_draw_every].flatten(),
            lon=lons[::grid_draw_every, ::grid_draw_every].flatten(),
            mode="markers",
            marker=marker_opts,
        )
    )
    return fig


161
162
def get_domain_fig(
    domain,
163
164
165
    # Show 1 grid point every "display_grid_max_gsize" meters.
    # Don't show any grid point if display_grid_max_gsize <=0.
    display_grid_max_gsize=0,
166
167
168
169
170
171
    lonrange=None,
    latrange=None,
    ezone=None,
    obs_df=None,
    **kwargs
):
172
    """Return map representation of "domain" (a ".domains.Domain" object)."""
173
174
    # Should we plot the extension zone?
    if ezone is None:
Paulo Medeiros's avatar
Paulo Medeiros committed
175
        ezone = domain.grid.nx_ezone > 0
176
177
178

    # Define longitude range
    if lonrange is None:
Paulo Medeiros's avatar
Paulo Medeiros committed
179
        if domain.grid.nlon == 1:
180
181
182
183
184
185
186
187
188
189
190
191
            minlon = -180
            maxlon = 180
        elif ezone:
            minlon = domain.ezone_minlon
            maxlon = domain.ezone_maxlon
        else:
            minlon = domain.minlon
            maxlon = domain.maxlon
        lonrange = np.clip((minlon - 2.0, maxlon + 2.0), -180, 180)

    # Define latitude range
    if latrange is None:
Paulo Medeiros's avatar
Paulo Medeiros committed
192
        if domain.grid.nlat == 1:
193
194
195
196
197
198
199
200
201
202
            minlat = -90
            maxlat = 90
        elif ezone:
            minlat = domain.ezone_minlat
            maxlat = domain.ezone_maxlat
        else:
            minlat = domain.minlat
            maxlat = domain.maxlat
        latrange = np.clip((minlat - 2.0, maxlat + 2.0), -90, 90)

203
    # Make main, base figure
204
    fig = go.Figure(go.Scattergeo())
Paulo Medeiros's avatar
Paulo Medeiros committed
205
206
207
208
    proj2plotly_name = dict(
        merc="mercator", stere="stereographic", lcc="conic conformal"
    )
    fig_center = domain.center_lonlat
209
210
211
    fig.update_geos(
        # resolution is either 50 (higher) or 110 (lower)
        resolution=50,
Paulo Medeiros's avatar
Paulo Medeiros committed
212
213
        projection_type=proj2plotly_name[domain.proj.name],
        center=dict(lon=fig_center[0], lat=fig_center[1]),
214
215
216
217
218
219
220
221
        showland=True,
        showcountries=True,
    )
    fig.update_layout(
        height=800,
        margin=dict(r=0, l=0, b=0),
        title=dict(
            text="{} Domain Boundaries and Grid ({} Projection)".format(
Paulo Medeiros's avatar
Paulo Medeiros committed
222
                domain.name, domain.proj.full_name
223
224
225
226
227
228
229
            ),
            xanchor="center",
            x=0.5,
            yanchor="top",
            y=0.9,
        ),
        legend=dict(
230
231
232
233
234
235
236
237
            orientation="v",
            xanchor="left",
            x=1.0,
            yanchor="middle",
            y=0.5,
            # Use "reversed" instead of "normal" because we can't directly
            # conrol the order of the layers drawn next. We then draw them
            # from lower to upper and reverse the legend.
238
239
240
            traceorder="reversed",
        ),
        geo=dict(
Paulo Medeiros's avatar
Paulo Medeiros committed
241
242
243
244
245
246
247
248
249
250
            lataxis=dict(
                range=latrange,
                showgrid=True,
                dtick=10,
            ),
            lonaxis=dict(
                range=lonrange,
                showgrid=True,
                dtick=15,
            ),
251
252
253
        ),
    )

254
255
    # Draw selection of grid points
    # (a) Support grid (if used)
Paulo Medeiros's avatar
Paulo Medeiros committed
256
    if domain.thinning_grid is not None:
257
258
        fig = draw_grid_pts(
            fig,
Paulo Medeiros's avatar
Paulo Medeiros committed
259
            domain.thinning_grid,
260
261
262
263
264
            display_grid_max_gsize=display_grid_max_gsize,
            name="Support Grid",
            color="red",
            size=1.5,
            opacity=0.5,
265
        )
266
267
268
    # (b) Main grid
    fig = draw_grid_pts(
        fig,
Paulo Medeiros's avatar
Paulo Medeiros committed
269
        domain.grid,
270
271
272
273
        display_grid_max_gsize=display_grid_max_gsize,
        color="black",
        size=1.5,
    )
274

275
    # Add line segments to mark domain, subdomain and extension zone boundaries
276
277
278
279
280
281
    # (a) Observations
    if obs_df is not None:
        trace = get_obs_scattergeo_trace(obs_df, trace_name="Observations")
        fig.add_trace(trace)

    # (b) Extension zone
282
    if ezone:
283
284
285
286
        fig = draw_boundaries(
            fig,
            domain,
            name="Extension Zone",
Paulo Medeiros's avatar
Paulo Medeiros committed
287
            corners=domain.grid.ezone_corners,
288
289
            color="blue",
            dash="dash",
290
291
        )

292
293
294
    # (c) Subdomain boundaries
    if domain.n_subdomains > 1:
        splits = domain.split()
295
296
        for isub, subdomain in zip(range(len(splits))[::-1], splits[::-1]):
            showlegend = isub == 0
297
            sub_name = "Subdomains"
298
299
300
301
            fig = draw_boundaries(
                fig,
                subdomain,
                name=sub_name,
302
303
304
305
306
307
                showlegend=showlegend,
                legendgroup="Subdomains",
                color="black",
                width=1,
            )

308
309
    # (d) Draw main domain doundaries
    fig = draw_boundaries(fig, domain, name="Domain", color="red")
310
311
312
313
314

    return fig


def make_clustering_fig(df, domain, **kwargs):
Paulo Medeiros's avatar
Paulo Medeiros committed
315
316
317
318
319
320
321
322
323
324
325
    """Make fig produced by/in "clustering" command/app.

    Args:
        df (pandas.dataframe): Dataframe with clustering data.
        domain (netatmoqc.domains.Domain): Model domain.
        **kwargs: Passed on to the domain.get_fig method.

    Returns:
        go.Scattergeo: Figure depicting the clustering results..

    """
326
327
328
    # Make sure df index is sequential
    df = df.reset_index(drop=True)

329
    if "cluster_label" not in df.columns:
330
        df["cluster_label"] = np.nan
331
    if "original_cluster_label" not in df.columns:
332
333
334
335
336
        df["original_cluster_label"] = df["cluster_label"]

    # Add a col to df with descriptive labels for plot
    label_counts = df["cluster_label"].value_counts()
    orig_label_counts = df["original_cluster_label"].value_counts()
337

338
339
    # Vectorize setting of plot labels. About 25x faster than looping over df.
    def _set_plot_label(label, orig_label):
340
        if label == -1:
341
342
343
344
345
            # Obs removed in the normal way with clustering
            legend_label = "Rejected: "
            label_count = label_counts[label]
        elif label == -2:
            # Outliers found after main clustering
Paulo Medeiros's avatar
Paulo Medeiros committed
346
            legend_label = "Cluster {}, removed outliers:".format(orig_label)
347
348
            label_count = (
                orig_label_counts[orig_label] - label_counts[orig_label]
349
            )
350
351
352
353
354
355
        elif label == -3:
            legend_label = "Outliers, preliminary clustering:"
            label_count = label_counts[label]
        elif label == -4:
            legend_label = "Missed (due to domain splitting):"
            label_count = label_counts[label]
356
357
358
        elif label == -5:
            legend_label = "Moving stations:"
            label_count = label_counts[label]
Paulo Medeiros's avatar
Paulo Medeiros committed
359
        else:
360
361
362
            legend_label = "Cluster {}, accepted:".format(int(label))
            label_count = label_counts[label]
        legend_label += " {} obs".format(int(label_count))
363
364
365
366
367
368
        return legend_label

    set_plot_label = np.vectorize(_set_plot_label, otypes=[str])
    df["_plot_label"] = set_plot_label(
        df["cluster_label"], df["original_cluster_label"]
    )
369

370
    # Define colours to be used in the plot
371
    # See colorscales at <https://plotly.com/python/builtin-colorscales>
372
373
374
375
376
    color_discrete_sequence = [
        px.colors.qualitative.Light24,
        px.colors.qualitative.Alphabet,
        px.colors.qualitative.Dark24,
    ][0]
377

378
    # Define symbols to be used in the plot
379
380
381
    # See all symbols at
    # <https://plotly.com/python/marker-style/#custom-marker-symbols>
    symbols = [
382
383
384
385
386
387
388
        "circle",
        "octagon",
        "hexagon2",
        "circle-dot",
        "octagon-dot",
        "circle-cross",
        "circle-x",
389
    ]
Paulo Medeiros's avatar
Paulo Medeiros committed
390

391
392
    # Now assign appropriate colors and symbols to each observation row
    unique_labels = df["cluster_label"].unique()
393
394
    label2color_map = {}
    label2symbol_map = {}
395
396
397
    for ilab, label in enumerate(unique_labels[unique_labels > -1]):
        color = color_discrete_sequence[ilab % len(color_discrete_sequence)]
        symbol = symbols[(ilab // len(color_discrete_sequence)) % len(symbols)]
398
399
400
        label2color_map[label] = color
        label2symbol_map[label] = symbol

401
    def _label2symbol(label):
402
403
        if label < 0:
            return "x-open"
404
        return label2symbol_map[label]
405

406
407
408
    label2symbol = np.vectorize(_label2symbol, otypes=[str])

    def _label2color(label):
409
410
        if label < 0:
            return "black"
411
        return label2color_map[label]
412

413
414
    label2color = np.vectorize(_label2color, otypes=[str])

415
416
    df["_plot_color"] = label2color(df["original_cluster_label"])
    df["_plot_symbol"] = label2symbol(df["cluster_label"])
417

418
419
420
421
    #########################
    # Now create the figure #
    #########################

422
    fig = domain.get_fig(max_ngrid=0, **kwargs)
423
424
    fig.update_layout(
        height=600,
425
        clickmode="event+select",
426
427
        title=None,
        margin=dict(r=0, l=0, b=0, t=0),
428
429
        legend_title="<b>Clustering of Observations</b>",
        legend_traceorder="normal",
430
431
    )

432
    # Add each group as a different trace
433
    for label, cluster_df in df.groupby("_plot_label", sort=False):
434
435

        marker = dict(
436
437
            color=cluster_df["_plot_color"].iloc[0],
            symbol=cluster_df["_plot_symbol"].iloc[0],
438
        )
439
440

        if cluster_df["cluster_label"].iloc[0] < 0:
441
            trace_visible = "legendonly"
442
            marker.update(dict(size=5, opacity=0.25))
443
444
        else:
            trace_visible = True
445

446
        trace = get_obs_scattergeo_trace(
Paulo Medeiros's avatar
Paulo Medeiros committed
447
448
449
450
            cluster_df,
            trace_name=label,
            marker=marker,
            visible=trace_visible,
451
        )
452
        fig.add_trace(trace)
453

Paulo Medeiros's avatar
Paulo Medeiros committed
454
    return fig
455
456


457
def show_cmd_get_fig_from_dataframes(args, dataframes, domain):
Paulo Medeiros's avatar
Paulo Medeiros committed
458
459
460
461
462
463
464
465
466
467
468
469
470
    """Make fig produced by "show" command.

    Args:
        args (argparse.Namespace): Parsed command line arguments.
        dataframes (list): List of pandas.dataframe objects with data to be
            plotted as scattergeo.
        domain (netatmoqc.domains.Domain): Model domain.

    Returns:
        go.Scattergeo: Scattergeo fig composed with data in the
        input dataframes.

    """
471
472
    logger = get_logger(__name__, args.loglevel)

473
    fig = domain.get_fig(max_ngrid=0)
474
    fig.update_layout(
Paulo Medeiros's avatar
Paulo Medeiros committed
475
476
477
478
479
480
481
482
        title=dict(
            text="NetatmoQC Output",
            xanchor="center",
            x=0.5,
            yanchor="top",
            y=0.9,
        ),
        margin=dict(r=0, l=0, b=0),
483
    )
484

Paulo Medeiros's avatar
Paulo Medeiros committed
485
486
487
488
    # We'll use a fig dict so we can control the order in which data is added
    fig_dict = fig.to_dict()

    new_fig_data = []
489
490
    # Add contents from each file as a different trace
    for fname, df in dataframes.items():
Paulo Medeiros's avatar
Paulo Medeiros committed
491
        logger.info("Add data from file '%s' (%d obs)", fname, len(df.index))
492
493
        if "accepted" in fname:
            trace_visible = True
494
495
496
497
498
499
500
501
            marker = dict(color="blue")
        else:
            trace_visible = "legendonly"
            marker = dict(size=5, opacity=0.25, symbol="x-open")
            if "moving" in fname:
                marker["color"] = "red"
            else:
                marker["color"] = "black"
502

Paulo Medeiros's avatar
Paulo Medeiros committed
503
504
        trace_name = Path(fname).stem.replace("_", " ").title()
        trace_name += " (%d obs)" % (len(df.index))
505
        trace = get_obs_scattergeo_trace(
506
            df, trace_name=trace_name, marker=marker, visible=trace_visible
507
        )
Paulo Medeiros's avatar
Paulo Medeiros committed
508
        new_fig_data.append(trace)
509

Paulo Medeiros's avatar
Paulo Medeiros committed
510
511
512
    # Add data in reverse order. Needed for legend/layer order.
    fig_dict["data"] = new_fig_data[::-1] + fig_dict["data"]
    return go.Figure(fig_dict)
513
514
515
516
517
518
519


###########################################################
# Routines only used in apps/scattergeo_timeseries/app.py #
###########################################################


520
521
# For animations, see:
# <https://plotly.com/python/animations/#animated-figures-with-graph-objects>
522
def generate_single_frame(df, dataset_var, frame_duration, frame=None):
523
    """Generate a single frame of the animation produced by scattergeo app."""
524
525
526
527
    if frame is None:
        # Dict is mutable and func args are created at func def time
        frame = {}

528
529
530
531
532
533
    # Controlling colour schemes
    if dataset_var in ["temperature"]:
        color_scale = px.colors.diverging.RdBu_r
    else:
        color_scale = px.colors.sequential.haline_r

534
535
536
537
538
539
    marker = dict(
        color=df[dataset_var],
        colorscale=color_scale,
        opacity=0.5,
        line=dict(color="black", width=0.25),
        colorbar=dict(
Paulo Medeiros's avatar
Paulo Medeiros committed
540
541
542
            titleside="right",
            ticks="outside",
            showticksuffix="last",
543
544
        ),
    )
545
546
547
548
549
550
551
552
553
554
555
    trace = get_obs_scattergeo_trace(df, marker=marker)
    frame["data"] = trace

    slider_step = dict(
        args=[
            [frame["name"]],
            dict(
                frame=dict(duration=frame_duration, redraw=True),
                mode="immediate",
                transition=dict(duration=frame_duration),
            ),
556
        ],
557
558
559
        label=frame["name"],
        method="animate",
    )
560
561
562
563

    return frame, slider_step


564
def init_fig_dict(domain, dataset_var, frame_duration):
Paulo Medeiros's avatar
Paulo Medeiros committed
565
566
567
    """Initiate dict used to create plotly fig used in scattergeo app."""
    # We make the figure by constructing a dictionary and passing it to the
    # final plotly method.
568
569
570
571
    fig = domain.get_fig(max_ngrid=0)
    fig.update_geos(resolution=110)
    fig_dict = fig.to_dict()
    # resolution is either 50 (higher) or 110 (lower)
572
    fig_dict["frames"] = []
573

574
575
576
    # Figure layout
    fig_dict["layout"]["title"]["text"] = "NetAtmo Data: {}".format(
        dataset_var
577
578
579
    )

    fig_dict["layout"]["updatemenus"] = [
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
        # <https://plotly.com/python/reference/layout/updatemenus>
        dict(
            type="buttons",
            direction="left",
            showactive=False,
            xanchor="right",
            yanchor="top",
            x=0.1,
            y=0,
            pad=dict(r=10, t=87),
            buttons=[
                dict(
                    label="Play",
                    method="animate",
                    args=[
595
                        None,
596
                        dict(
Paulo Medeiros's avatar
Paulo Medeiros committed
597
598
599
600
                            frame=dict(
                                duration=frame_duration,
                                redraw=True,
                            ),
601
602
603
604
605
606
                            fromcurrent=True,
                            transition=dict(
                                duration=frame_duration / 2,
                                easing="cubic-in-out",
                            ),
                        ),
607
                    ],
608
609
610
611
612
613
614
615
616
617
618
                ),
                dict(
                    label="Pause",
                    method="animate",
                    args=[
                        None,
                        dict(
                            frame=dict(duration=0, redraw=True),
                            mode="immediate",
                            transition=dict(duration=0),
                        ),
619
                    ],
620
                ),
621
            ],
622
        )
623
624
    ]

625
626
627
628
629
630
631
632
633
    sliders_dict = dict(
        active=0,
        len=0.9,
        yanchor="top",
        xanchor="left",
        x=0.1,
        y=0,
        pad=dict(b=10, t=50),
        currentvalue=dict(
Paulo Medeiros's avatar
Paulo Medeiros committed
634
635
636
637
638
639
640
641
            font=dict(size=20),
            prefix="DTG: ",
            visible=True,
            xanchor="right",
        ),
        transition=dict(
            duration=frame_duration / 2,
            easing="cubic-in-out",
642
643
644
        ),
        steps=[],
    )
645
646

    return fig_dict, sliders_dict