matplotlib/blssample.py at main · blortorbis/matplotlib · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
"""
Fetch U.S. Unemployment Rate and Consumer Price Index from the
Bureau of Labor Statistics public API (v1, no registration needed)
and plot them side-by-side with matplotlib.

Series used:
  LNS14000000  - Unemployment Rate (seasonally adjusted)
  CUUR0000SA0  - CPI-U All Items, U.S. City Average (not seasonally adjusted)
"""

# --- Standard library imports (built into Python, no install needed) ---
import json           # Encode/decode JSON for sending to and receiving from the API
import ssl            # Handle HTTPS connections (needed for the SSL workaround below)
import urllib.request # Make HTTP requests — Python's built-in way to talk to web APIs
from datetime import datetime  # Create proper date objects that matplotlib understands

# --- Third-party imports (installed via "pip install matplotlib") ---
import matplotlib.pyplot as plt    # The main plotting interface; conventionally aliased as "plt"
import matplotlib.dates as mdates  # Helpers for formatting date-based x-axes

# ==========================================================================
# CONFIGURATION
# ==========================================================================

# The BLS public API endpoint. You POST JSON to this URL and get JSON back.
# v1 requires no registration; v2 offers higher limits but needs an API key.
# Docs: https://www.bls.gov/developers/home.htm
BLS_URL = "https://api.bls.gov/publicAPI/v1/timeseries/data/"

# Every BLS dataset has a unique "series ID." You can browse them at:
# https://www.bls.gov/data/  (click any subject, then look at the series ID)
#
# The two we're using:
#   LNS14000000 = Civilian unemployment rate (seasonally adjusted)
#   CUUR0000SA0 = CPI-U All Items, U.S. city average (not seasonally adjusted)
SERIES = ["LNS14000000", "CUUR0000SA0"]

# The free v1 API limits each request to 10 years of data.
# To get 20 years (2005-2025), we split into two requests.
YEAR_RANGES = [(2005, 2014), (2015, 2025)]

# SSL WORKAROUND: Some corporate networks intercept HTTPS traffic with their
# own certificate authority, which Python doesn't trust by default.
# These lines tell Python "don't verify the certificate." This is acceptable
# here because we're only reading public government data — not sending secrets.
SSL_CTX = ssl.create_default_context()
SSL_CTX.check_hostname = False
SSL_CTX.verify_mode = ssl.CERT_NONE


# ==========================================================================
# DATA FETCHING
# ==========================================================================

def fetch_bls(series_ids, start_year, end_year):
    """Call the BLS public API and return the parsed JSON response."""

    # Build the request body as a JSON string, then encode to bytes.
    # The API expects: {"seriesid": [...], "startyear": "YYYY", "endyear": "YYYY"}
    payload = json.dumps({
        "seriesid": series_ids,
        "startyear": str(start_year),
        "endyear": str(end_year),
    }).encode("utf-8")

    # Create an HTTP request. When you pass data=, urllib automatically sends
    # a POST request instead of a GET. We also set the Content-Type header so
    # the server knows we're sending JSON.
    req = urllib.request.Request(
        BLS_URL,
        data=payload,
        headers={"Content-Type": "application/json"},
    )

    # Send the request, read the response, and parse the JSON into a Python dict.
    # The "with" block ensures the connection is closed when we're done.
    with urllib.request.urlopen(req, context=SSL_CTX) as resp:
        return json.loads(resp.read().decode("utf-8"))


def parse_series(raw_series):
    """Convert a BLS series response into sorted (date, value) lists.

    The API returns entries like:
        {"year": "2020", "period": "M04", "value": "14.7", ...}

    We convert these into two parallel lists:
        dates  = [datetime(2020, 4, 1), ...]
        values = [14.7, ...]
    """
    dates, values = [], []
    for entry in raw_series["data"]:
        year = int(entry["year"])
        period = entry["period"]  # e.g. "M01" (January) through "M12" (December)

        # The API sometimes includes annual averages coded as "M13" — skip those.
        if not period.startswith("M") or period == "M13":
            continue

        month = int(period[1:])  # "M04" -> 4

        # Create a datetime for the 1st of that month, and grab the value as a float.
        dates.append(datetime(year, month, 1))
        values.append(float(entry["value"]))

    # IMPORTANT: The BLS API returns data newest-first. We reverse both lists
    # so they're in chronological order, which is what matplotlib expects.
    dates.reverse()
    values.reverse()
    return dates, values


# ==========================================================================
# MAIN DATA-FETCHING LOOP
# ==========================================================================

# Create an accumulator dict to hold data from both API calls.
# After the loop it will look like:
#   {"LNS14000000": {"dates": [datetime, ...], "values": [float, ...]},
#    "CUUR0000SA0":  {"dates": [datetime, ...], "values": [float, ...]}}
all_data = {sid: {"dates": [], "values": []} for sid in SERIES}

print("Fetching data from BLS (this may take a moment)...")
for start, end in YEAR_RANGES:
    result = fetch_bls(SERIES, start, end)

    # Check that the API call succeeded before trying to use the data.
    if result["status"] != "REQUEST_SUCCEEDED":
        print(f"API error: {result.get('message', 'unknown')}")
        raise SystemExit(1)

    # The response contains a list of series (one per series ID we requested).
    # Parse each one and append (.extend) its data to our accumulator.
    # We use .extend() instead of .append() because we're adding many items,
    # not one — think of it as concatenating two lists.
    for series in result["Results"]["series"]:
        sid = series["seriesID"]
        dates, values = parse_series(series)
        all_data[sid]["dates"].extend(dates)
        all_data[sid]["values"].extend(values)

print(f"  Unemployment Rate: {len(all_data['LNS14000000']['dates'])} months")
print(f"  CPI-U All Items:   {len(all_data['CUUR0000SA0']['dates'])} months")

# ==========================================================================
# PLOTTING
#
# The core matplotlib pattern:
#   1. Create a figure (the window) and one or more axes (individual plots)
#   2. Call methods on each axis to draw lines, shading, labels, etc.
#   3. Call plt.show() to display the interactive window
# ==========================================================================

# plt.subplots() creates the figure and axes in one call.
#   2, 1         = 2 rows, 1 column (two plots stacked vertically)
#   figsize      = width and height in inches
#   sharex=True  = both panels share the same x-axis, so zooming one zooms both
#
# It returns:
#   fig          = the overall figure (the window itself)
#   (ax1, ax2)   = the two individual axes (plots) — we unpack them from a tuple
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 7), sharex=True)

# fig.suptitle() sets a "super title" for the whole figure, above both panels.
# This is different from ax.set_title(), which titles a single panel.
fig.suptitle("U.S. Bureau of Labor Statistics — Key Indicators (2005–2025)",
             fontsize=14, fontweight="bold")

# -- Unemployment Rate (top panel) --
unemp = all_data["LNS14000000"]

# ax1.plot() draws a line chart. Arguments:
#   First arg  = x values (dates)
#   Second arg = y values (unemployment %)
#   color      = any CSS color name or hex code (e.g. "#4682B4")
#   linewidth  = thickness of the line in points
ax1.plot(unemp["dates"], unemp["values"], color="steelblue", linewidth=1.5)

# fill_between() shades the area from 0 up to the line.
# alpha controls transparency: 0.0 = invisible, 1.0 = fully opaque.
# 0.15 gives a subtle hint of color under the line.
ax1.fill_between(unemp["dates"], unemp["values"], alpha=0.15, color="steelblue")

ax1.set_ylabel("Unemployment Rate (%)")   # Label for the y-axis
ax1.set_title("Civilian Unemployment Rate (Seasonally Adjusted)")  # Panel title
ax1.grid(True, alpha=0.3)  # Light gridlines (alpha makes them subtle)

# ANNOTATIONS — adding labeled arrows that point to interesting data points.
# This is one of matplotlib's more powerful features for storytelling with data.
for label, target_year, target_month in [("2008 Recession", 2009, 10),
                                          ("COVID-19", 2020, 4)]:
    try:
        # Find the index of this specific date in our data
        idx = unemp["dates"].index(datetime(target_year, target_month, 1))

        ax1.annotate(
            label,                          # The text to display
            xy=(unemp["dates"][idx],        # Where the arrow POINTS TO (data coordinates)
                unemp["values"][idx]),
            xytext=(30, 10),                # Where to place the text, relative to xy
            textcoords="offset points",     # xytext is in screen pixels, not data units
            arrowprops=dict(                # Style the arrow
                arrowstyle="->",            #   "->": line with arrowhead
                color="gray"),
            fontsize=9,
            color="dimgray",
        )
    except ValueError:
        pass  # If the exact date isn't in our data, just skip the annotation

# -- Consumer Price Index (bottom panel) --
# Same pattern as above: plot the line, shade under it, add labels.
cpi = all_data["CUUR0000SA0"]
ax2.plot(cpi["dates"], cpi["values"], color="darkorange", linewidth=1.5)
ax2.fill_between(cpi["dates"], cpi["values"], alpha=0.15, color="darkorange")
ax2.set_ylabel("CPI-U Index (1982–84 = 100)")
ax2.set_title("Consumer Price Index — All Urban Consumers, All Items")
ax2.grid(True, alpha=0.3)

# DATE AXIS FORMATTING — uses the matplotlib.dates (mdates) module.
# Because we set sharex=True above, we only need to configure the x-axis on the
# bottom panel (ax2) — the top panel (ax1) inherits the same settings.
#
# YearLocator(2)       = place a tick mark every 2 years
# DateFormatter("%Y")  = display just the four-digit year (e.g. "2010", "2012")
ax2.xaxis.set_major_locator(mdates.YearLocator(2))
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
ax2.set_xlabel("Year")

# tight_layout() automatically adjusts padding between and around panels so that
# titles, labels, and tick marks don't overlap. Always call this before show().
plt.tight_layout()

# show() opens the interactive window. The script pauses here until you close it.
# Alternative: plt.savefig("chart.png") saves to a file instead of displaying.
plt.show()