HW#2: Visualizing Historical Temperature Changes in Singapore#

Objectives

This homework will provide you a real-world example in terms of:

  • how to visualize the GEV distribution

  • how to visualize time series of temperature anomalies and the trend

  • how to make beautiful, accessible, and understandable data visualizations

Happy coding!

Submission Guide

Deadline: Sunday 11:59 pm, 16th November 2025 (Note: Late submissions will not be accepted).

Please upload your solutions to Canvas in a Jupyter Notebook format with the name “Homework2_StudentID.ipynb”. Make sure to write down your student ID and full name in the cell below.

For any questions, feel free to contact Prof. Xiaogang HE (hexg@nus.edu.sg), Kewei ZHANG (kewei_zhang@u.nus.edu) or Yifan LU (yifan_lu@u.nus.edu).

## Fill your student ID and full name below.

# Student ID:
# Full name:

Data: You will need to use the historical (1982-2020) daily mean air temperature data measured at Singapore’s Changi station for this homework. You can create a DataFrame using Pandas by reading the file “../../assets/data/Changi_daily_temperature.csv”.

Task 1: Visualize the GEV distribution (30 marks)#

To visualize the GEV distribution, you can:

  1. Fit the Generalized Extreme Value (GEV) distribution to annual maximum daily temperature data and estimate the GEV parameters using the L-Moments method.

  2. Plot the probability density function (PDF) curve to represent the distribution directly.

  3. Calculate the return level for a 10-year event and mark it on the plot.

  4. Shade the plot with different colors to distinguish areas above and below the calculated return level.

  5. Ensure that the necessary non-data elements are included, such as title, x/y axis labels, legend, etc. (you can check the Matplotlib tutorial for details).

Bonus: 10 marks

Illustrate how return levels vary across different return periods (e.g., 2, 5, 10, 20, 50, 100 years) using interactive sliders or dashboards. Resources such as the Plotly documentation and this tutorial video may be helpful for this task.

# Your solutions go here.
# Use the + icon in the toolbar to add a cell.
import numpy as np
import pandas as pd
import matplotlib as mpl
import math
import matplotlib.pyplot as plt
from scipy.stats import genextreme as gev
from scipy.optimize import fsolve
# Calculate L-moments based on samples
def samlmom3(sample):
    """
    samlmom3 returns the first three L-moments of samples
    sample is the 1-d array
    n is the total number of the samples, j is the j_th sample
    """
    n = len(sample)
    sample = np.sort(sample.reshape(n))[::-1]
    b0 = np.mean(sample)
    b1 = np.array([(n - j - 1) * sample[j] / n / (n - 1)
                   for j in range(n)]).sum()
    b2 = np.array([(n - j - 1) * (n - j - 2) * sample[j] / n / (n - 1) / (n - 2)
                   for j in range(n - 1)]).sum()
    lmom1 = b0
    lmom2 = 2 * b1 - b0
    lmom3 = 6 * (b2 - b1) + b0

    return lmom1, lmom2, lmom3

def pargev_fsolve(lmom):
    """
    pargev_fsolve estimates the parameters of the Generalized Extreme Value 
    distribution given the L-moments of samples
    """
    lmom_ratios = [lmom[0], lmom[1], lmom[2] / lmom[1]]
    f = lambda x, t: 2 * (1 - 3**(-x)) / (1 - 2**(-x)) - 3 - t
    G = fsolve(f, 0.01, lmom_ratios[2])[0]
    para3 = G
    GAM = math.gamma(1 + G)
    para2 = lmom_ratios[1] * G / (GAM * (1 - 2**-G))
    para1 = lmom_ratios[0] - para2 * (1 - GAM) / G
    return para1, para2, para3
# load and resample data
daytem = pd.read_csv('../assets/data/Changi_daily_temperature.csv', 
                     index_col=0, header=0, parse_dates=True)
ymaxtem = daytem.resample('YE').max()
ymaxtem.index = ymaxtem.index.year
ymaxtem.index.name = "Year"
ymaxtem.columns = ["Yearly Max Temperature"]

# GEV using L-moments method
LMM = samlmom3(ymaxtem.values)
locLMM, scaleLMM, cLMM = pargev_fsolve(LMM)
LMMGEV = gev(cLMM, loc=locLMM, scale=scaleLMM) 

# plot estimated PDF
bins = LMMGEV.ppf(np.linspace(0,1,1000))
fig, ax = plt.subplots()
ax.plot(bins, LMMGEV.pdf(bins),
         color='k', lw=2, label='Estimated PDF (MLE)')
ax.set_xlabel("Yearly max temperature [°C]")
ax.set_ylabel('PDF')
ax.set_ylim([0, 1])
ax.set_title('GEV distribution')
ax.legend(frameon=False, loc='upper left')
ax.spines[['top', 'right']].set_visible(False)
# calculate return level of 10-year event and mark it on the figure
return_period = 10
prob = 1-1/return_period
return_level = LMMGEV.ppf(prob)
plt.axvline(return_level, color='0.2', linestyle='dashed')
bbox = dict(boxstyle="round", fc="0.8")
arrowprops = dict(
    arrowstyle="->",
    connectionstyle="angle,angleA=90,rad=10")
plt.annotate('return level = %.2f' % (return_level),
            (return_level, 0.4), xytext=(30, 40), textcoords='offset points',
            bbox=bbox, arrowprops=arrowprops)

# color to distingusih
return_level_id = np.size(bins[bins<=return_level])
color = ['#56B4E9', '#D55E00']
ax.fill_between(bins[:return_level_id], np.zeros((return_level_id,)), LMMGEV.pdf(bins[:return_level_id]),
                 color=color[0], alpha=0.5)
ax.fill_between(bins[return_level_id:], np.zeros((1000-return_level_id,)), LMMGEV.pdf(bins[return_level_id:]), 
                 color=color[1], alpha=0.5)

plt.show()
../../_images/47a114da5281e0e00f4a5edf5078beac46f88769c44ff9122efb8c80e62a021a.png
import plotly.graph_objects as go
import plotly
from IPython.display import display, HTML
plotly.offline.init_notebook_mode()
display(HTML(
    '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
))
# Function to calculate return levels
def calculate_return_level(return_period):
    prob = 1 - 1 / return_period
    return LMMGEV.ppf(prob)

# Create the initial plot
fig = go.Figure()
return_period_list = [2, 5, 10, 20, 50, 100]

# Add traces, one for each slider step
for rp in return_period_list:
    return_level = calculate_return_level(rp)
    return_level_id = np.size(bins[bins<=return_level])
    fig.add_trace(go.Scatter(
        x=bins[:return_level_id],
        y=LMMGEV.pdf(bins[:return_level_id]),
        fill='tozeroy',
        fillcolor='rgba(86, 180, 233, 0.5)',
        line=dict(color='black'),
        name='Below the return level',
        mode='lines',
        showlegend=False,
        visible=False,
    ))
    fig.add_trace(go.Scatter(
        x=bins[return_level_id:],
        y=LMMGEV.pdf(bins[return_level_id:]),
        fill='tozeroy',
        fillcolor='rgba(213, 94, 0, 0.5)',
        line=dict(color='black'),
        mode='lines',
        name='Above the return level',
        showlegend=False,
        visible=False,
    ))
    fig.add_trace(go.Scatter(
        x=[return_level, return_level],
        y=[0, 1],
        line=dict(color="grey", dash="dash"),
        name=f"{rp}-year",
        mode='lines',
        showlegend=False,
        visible=False,
    ))

# Initially show the traces for 10-year return period
fig.data[6].visible = True
fig.data[7].visible = True
fig.data[8].visible = True

# Add the slider steps
steps = []
for i in range(len(return_period_list)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
            ],
        label=f"{return_period_list[i]}-year"
    )
    step["args"][0]["visible"][i*3] = True
    step["args"][0]["visible"][i*3+1] = True
    step["args"][0]["visible"][i*3+2] = True
    steps.append(step)

# Add the slider
sliders = [dict(
    active=2,  # Corresponds to the initial return period of 10 years
    currentvalue={"prefix": "Return period: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders,
    title={'text': "GEV distribution with return period slider", 'x': 0.5, 'xanchor': 'center'},
    xaxis_title="Yearly max temperature [°C]",
    yaxis_title="PDF",
    yaxis=dict(range=[0, 1]),
    width=700, height=500
)

# Show the figure
fig.show()

Task 2: Visualize the trend of temperature anomaly (70 marks)#

Q1: Visualize the time series and local trend (slope) of historical temperature anomalies for Singapore (35 marks)#

  • Calculate the annual mean temperature from the daily data. This will result in a dataset of 39 values — one per year. (5 marks)

  • Calculate the annual temperature anomalies using the first 10-year period as the baseline. (Hint: subtract the mean temperature over 1982 to 1991 from the annual mean temperature for each year.) (5 marks)

  • Visualize the change in these annual temperature anomalies over time, ensuring that you include essential non-data elements. (10 marks)

  • Compute a rolling linear regression (10-year moving window) of annual mean temperature anomalies to estimate how the local warming rate evolves through time. (10 marks)

  • Plot the resulting slope (°C per decade) against the central year. (5 marks)

# Your solutions go here.
# Use the + icon in the toolbar to add a cell.
from sklearn.linear_model import LinearRegression

# calculate the annual mean temperature
yrtem = daytem.resample('YE').mean()
yrtem.index = yrtem.index.year
yrtem.index.name = "Year"
yrtem.columns = ["Yearly Mean Temperature"]

# calculate the annual temperature anomalies
tembase = np.mean(yrtem.loc[1982: 1991], axis=0)
temanom = yrtem - tembase

# Function to compute slope in a window
def rolling_slope(y_window):
    # Convert index to float for regression
    idx = y_window.index.values.astype(float)
    model = LinearRegression()
    model.fit(idx.reshape(-1,1), y_window.values)
    return model.coef_[0]

# Compute 10-year rolling slope (centered)
temanom_slope10 = temanom.rolling(10, center=True).apply(rolling_slope, raw=False)
fig, ax1 = plt.subplots(figsize=(10,5))

# Plot anomalies normally
ax1.plot(temanom.index, temanom['Yearly Mean Temperature'], color='k', label='Annual anomaly')
ax1.axhline(0, color='gray', linestyle='--', alpha=0.5)
ax1.set_ylabel("Temperature anomaly [°C]", color='k')
ax1.tick_params(axis='y', labelcolor='k')
ax1.set_xlabel("Year")

# Draw the figure to get axis positions
fig.canvas.draw()

# Get y-limits and zero fraction
y1min, y1max = ax1.get_ylim()
zero_rel = (0 - y1min) / (y1max - y1min)

# Slope min/max
slope_min = np.nanmin(temanom_slope10.values)
slope_max = np.nanmax(temanom_slope10.values)

# Distance from zero to extremes
dist_up = slope_max
dist_down = -slope_min

# Compute scale to fit all values, keeping zero at same relative position
scale = max(dist_up / (1 - zero_rel), dist_down / zero_rel)
scale *= 1.05
ax2_min_final = -scale * zero_rel
ax2_max_final = scale * (1 - zero_rel)

# Secondary y-axis
ax2 = ax1.twinx()
ax2.plot(temanom_slope10.index, temanom_slope10.values, color='b', label='10-year rolling trend')
ax2.axhline(0, color='gray', linestyle='--', alpha=0.5)
ax2.set_ylim(ax2_min_final, ax2_max_final)
ax2.set_ylabel("Trend strength [°C/year]", color='b')
ax2.tick_params(axis='y', labelcolor='b')

# Title and legend
ax1.set_title("Annual Temperature Anomalies and 10-year Rolling Trend")
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, frameon=False, loc='lower right')

plt.show()
../../_images/d629f515560fb1a284e3ab6c4c287b8067b26aed5961354d42d5c18ac2ffbce3.png

Q2: Add climate stripes for Singapore (35 marks)#

  • Reproduce the climate stripes for Singapore using Matplotlib. (20 marks)

  • Overlay the annual temperature anomaly time series and its local trend (from Q1) on top of your generated climate stripes. (15 marks)

Tips:

  • Refer to this GitHub repository for guidance on creating climate stripes.

  • Fine-tune the aesthetics of your chart (e.g., color palette of the diverging colorbar) to ensure it is visually appealing and accessible (colorblind safe).

# Your solutions go here.
# Use the + icon in the toolbar to add a cell.
fig, ax1 = plt.subplots(frameon=True)
ax1.spines[:].set_visible(False)

# Climate stripes
mplnor = mpl.colors.Normalize(vmin=np.nanmin(temanom),
                              vmax=np.nanmax(temanom))
for i in range(temanom.size):
    ax1.axvline(temanom.index[i],
                color=plt.cm.RdBu_r(mplnor(temanom.iloc[i, 0])),
                linewidth=10)

# Plot anomaly line in gray
ax1.plot(temanom.index, temanom.values, linestyle='solid', color='gray', alpha=0.8)

# Zero alignment for regression line
anom_min, anom_max = np.nanmin(temanom.values), np.nanmax(temanom.values)
zero_rel = (0 - anom_min) / (anom_max - anom_min)

slope_min, slope_max = np.nanmin(temanom_slope10.values), np.nanmax(temanom_slope10.values)
dist_up = slope_max
dist_down = -slope_min

# Scale limits for regression axis to align zero
scale = max(dist_up / (1 - zero_rel), dist_down / zero_rel)
scale *= 1.05  # 5% leeway
ax2_min_final = -scale * zero_rel
ax2_max_final = scale * (1 - zero_rel)

# Secondary axis for 10-year regression
ax2 = ax1.twinx()
ax2.plot(temanom_slope10.index, temanom_slope10.values, linestyle='solid', color='k')
ax2.set_ylim(ax2_min_final, ax2_max_final)
ax2.set_ylabel("10-year rolling trend strength [°C/year]", color='k')

# Regression annotation
ax2.annotate("Trend",
             (temanom_slope10.index[-1] + 0.5, float(temanom_slope10.iloc[-1, 0])),
             color='k')

# Labels and limits
ax1.set_xlim(temanom_slope10.index.min() - 0.5, temanom_slope10.index.max() + 0.5)
ax1.set_xlabel("Year")
ax1.set_ylabel("Temperature anomaly [$^\circ C$]", color='gray')
ax1.tick_params(axis='y', labelcolor='gray')
ax1.set_title("Climate stripes with 10-year rolling trend")

plt.show()
../../_images/63e50551a43f74a2eca01835c7b68e1aef3de61f28e99a12b10e4e50d43014f0.png

Bonus: 10 marks

Create an interactive plot where users can hover over climate stripes and trend lines to view detailed information. For inspiration, you can refer to this example.

import plotly.graph_objects as go

# Limits for plotting
ymax = temanom['Yearly Mean Temperature'].max() + 0.1
ymin = temanom['Yearly Mean Temperature'].min() - 0.1

# Normalize for colormap
mplnor = mpl.colors.Normalize(vmin=temanom['Yearly Mean Temperature'].min(),
                              vmax=temanom['Yearly Mean Temperature'].max())

# Function to convert RGBA to hex
def rgba_to_hex(rgba):
    return '#{:02x}{:02x}{:02x}'.format(int(rgba[0]*255),
                                        int(rgba[1]*255),
                                        int(rgba[2]*255))

# Align rolling slope with full index
slope_plot_aligned = temanom_slope10.reindex(temanom.index)
if isinstance(slope_plot_aligned, pd.DataFrame):
    slope_plot_aligned = slope_plot_aligned.iloc[:,0]  # convert to Series

# Prepare stripe colors
stripe_colors = [rgba_to_hex(plt.cm.RdBu_r(mplnor(temp))) for temp in temanom['Yearly Mean Temperature']]

# Create figure
fig = go.Figure()

# Add climate stripes as background rectangles
for year, color in zip(temanom.index, stripe_colors):
    fig.add_shape(
        type="rect",
        x0=year-0.5, x1=year+0.5,
        y0=ymin-0.5, y1=ymax+0.5,
        fillcolor=color,
        line=dict(width=0),
        layer='below'
    )

# Add annual temperature anomaly trace
fig.add_trace(go.Scatter(
    x=temanom.index,
    y=temanom['Yearly Mean Temperature'],
    mode='lines+markers',
    line=dict(color='gray'),
    name='Temp anomaly',
    marker=dict(size=6),
    customdata=temanom['Yearly Mean Temperature'],
    hovertemplate="<span style='color:gray'>Temp: %{customdata:.2f} °C</span><extra></extra>"
))

# Add 10-year rolling slope line on secondary y-axis (black) with hover
fig.add_trace(go.Scatter(
    x=slope_plot_aligned.index,
    y=slope_plot_aligned.values,
    mode='lines',
    line=dict(color='black', width=2),
    name='10-year slope',
    yaxis='y2',
    customdata=slope_plot_aligned.values,
    hovertemplate="<span style='color:black'>10-yr slope: %{customdata:.3f} °C/year</span><extra></extra>"
))

# Layout with dual y-axis
fig.update_layout(
    title={'text': 'Climate stripes with 10-year rolling trend', 'x': 0.5},
    xaxis_title='Year',
    yaxis=dict(title='Temperature anomaly [°C]', range=[ymin-0.5, ymax+0.5]),
    yaxis2=dict(title='10-year slope [°C/year]', overlaying='y', side='right'),
    width=900, height=500,
    hovermode="x unified",
    showlegend=True
)

# Remove gridlines
fig.update_xaxes(showgrid=False, zeroline=False)
fig.update_yaxes(showgrid=False, zeroline=False)

fig.show()