-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcps.html
More file actions
389 lines (313 loc) · 27.7 KB
/
cps.html
File metadata and controls
389 lines (313 loc) · 27.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CPS Microdata Guide | BD Economics</title>
<link rel="preconnect" href="https://cdnjs.cloudflare.com" crossorigin>
<link rel="stylesheet" href="style.css">
<meta name="description" content="Python tutorial: Working with Current Population Survey microdata for labor market analysis.">
<meta name="keywords" content="Current Population Survey, CPS, CPS microdata, CPS python, Current Population Survey Python, IMF API, Economics Dashboard, Trade Network, Trade Networks, Macroeconomics Dashboard, Macroeconomic Dashboard, Markets Dashboard, Market Dashboard, U.S. Economy, U.S. Economy Dashboard, US economy, US economy dashboard, US economy charts, US economy charts pdf, NetworkX trade, international trade networks, network analysis of trade, Census Bureau CPS Python, Census Bureau CPS Pandas, BLS CPS Pandas, BLS CPS Python">
<meta name="author" content="Brian Dew">
<link rel="canonical" href="https://bd-econ.com/cps.html">
<!-- Open Graph -->
<meta property="og:title" content="CPS Microdata Guide | BD Economics">
<meta property="og:description" content="Python tutorial: Working with Current Population Survey microdata for labor market analysis.">
<meta property="og:url" content="https://bd-econ.com/cps.html">
<meta property="og:type" content="article">
<meta property="og:image" content="https://bd-econ.com/images/01_bdlogo.png">
<!-- Twitter Card -->
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="CPS Microdata Guide | BD Economics">
<meta name="twitter:description" content="Python tutorial: Working with Current Population Survey microdata for labor market analysis.">
<meta name="twitter:image" content="https://bd-econ.com/images/01_bdlogo.png">
<link rel="apple-touch-icon" sizes="180x180" href="favicon/apple-icon-180x180.png">
<link rel="icon" type="image/png" sizes="32x32" href="favicon/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="favicon/favicon-16x16.png">
<link rel="manifest" href="favicon/manifest.json">
<meta name="theme-color" content="#ffffff">
<script>
(function() {
const saved = localStorage.getItem('theme');
if (saved) {
document.documentElement.setAttribute('data-theme', saved);
} else if (window.matchMedia('(prefers-color-scheme: dark)').matches) {
document.documentElement.setAttribute('data-theme', 'dark');
}
})();
</script>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "TechArticle",
"headline": "CPS Microdata Python Tutorial",
"description": "Python tutorial: Working with Current Population Survey microdata for labor market analysis.",
"author": {
"@type": "Person",
"name": "Brian Dew"
},
"publisher": {
"@type": "Organization",
"name": "BD Economics",
"url": "https://bd-econ.com"
},
"mainEntityOfPage": "https://bd-econ.com/cps.html",
"datePublished": "2018-03-10",
"dateModified": "2026-01-17"
}
</script>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "BreadcrumbList",
"itemListElement": [
{
"@type": "ListItem",
"position": 1,
"name": "Guides",
"item": "https://bd-econ.com/python.html"
},
{
"@type": "ListItem",
"position": 2,
"name": "CPS Microdata",
"item": "https://bd-econ.com/cps.html"
}
]
}
</script>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-PGVF5S620Y"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-PGVF5S620Y');
</script>
</head>
<body class="page-cps">
<a href="#main" class="skip-link">Skip to main content</a>
<header>
<nav aria-label="Main navigation">
<ul class="site-nav" id="menu">
<li class="nav-main"> <a href="index.html"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 139 17" fill="currentColor" class="brand-logo" role="img" aria-label="BD Economics"> <g transform="translate(0,1.5) scale(2.64,2.59)"> <rect x="0" y="0" width="1" height="5"/> <rect x="0" y="4" width="4" height="1"/> <rect x="2" y="2" width="2" height="1"/> <rect x="3" y="2" width="1" height="2"/> <rect x="8" y="0" width="1" height="5"/> <rect x="5" y="4" width="4" height="1"/> <rect x="5" y="2" width="2" height="1"/> <rect x="5" y="2" width="1" height="2"/> </g> <g transform="translate(27.7,15) scale(0.01857,-0.01857)"> <path transform="translate(0,0)" d="M94 0V700H518V622H178V390H512V312H178V78H524V0Z"/> <path transform="translate(672,0)" d="M310 -14Q195 -14 129.5 59.5Q64 133 64 272V428Q64 563 129.5 638.5Q195 714 310 714Q390 714 445.0 680.0Q500 646 528.0 588.5Q556 531 556 460V448H472V460Q471 506 454.5 546.0Q438 586 402.5 611.0Q367 636 310 636Q229 636 188.5 577.0Q148 518 148 422V278Q148 176 188.5 120.0Q229 64 310 64Q367 64 403.0 89.0Q439 114 455.5 154.0Q472 194 472 240V252H556V240Q556 169 528.0 111.5Q500 54 445.0 20.0Q390 -14 310 -14Z"/> <path transform="translate(1344,0)" d="M306 -14Q191 -14 125.5 59.5Q60 133 60 272V428Q60 563 125.5 638.5Q191 714 306 714Q422 714 487.0 638.5Q552 563 552 428V272Q552 133 487.0 59.5Q422 -14 306 -14ZM306 64Q387 64 427.5 120.0Q468 176 468 278V422Q468 518 427.5 577.0Q387 636 306 636Q225 636 184.5 577.0Q144 518 144 422V278Q144 176 184.5 120.0Q225 64 306 64Z"/> <path transform="translate(2016,0)" d="M73 0V700H241L443 36H455V700H539V0H371L169 664H157V0Z"/> <path transform="translate(2688,0)" d="M306 -14Q191 -14 125.5 59.5Q60 133 60 272V428Q60 563 125.5 638.5Q191 714 306 714Q422 714 487.0 638.5Q552 563 552 428V272Q552 133 487.0 59.5Q422 -14 306 -14ZM306 64Q387 64 427.5 120.0Q468 176 468 278V422Q468 518 427.5 577.0Q387 636 306 636Q225 636 184.5 577.0Q144 518 144 422V278Q144 176 184.5 120.0Q225 64 306 64Z"/> <path transform="translate(3360,0)" d="M46 0V700H206L300 36H312L406 700H566V0H488V664H476L382 0H230L136 664H124V0Z"/> <path transform="translate(4032,0)" d="M84 0V78H264V622H84V700H528V622H348V78H528V0Z"/> <path transform="translate(4704,0)" d="M310 -14Q195 -14 129.5 59.5Q64 133 64 272V428Q64 563 129.5 638.5Q195 714 310 714Q390 714 445.0 680.0Q500 646 528.0 588.5Q556 531 556 460V448H472V460Q471 506 454.5 546.0Q438 586 402.5 611.0Q367 636 310 636Q229 636 188.5 577.0Q148 518 148 422V278Q148 176 188.5 120.0Q229 64 310 64Q367 64 403.0 89.0Q439 114 455.5 154.0Q472 194 472 240V252H556V240Q556 169 528.0 111.5Q500 54 445.0 20.0Q390 -14 310 -14Z"/> <path transform="translate(5376,0)" d="M320 -14Q230 -14 169.0 19.5Q108 53 76.5 111.0Q45 169 45 242V272H129V248Q129 157 180.0 110.5Q231 64 320 64Q398 64 437.5 99.0Q477 134 477 190V196Q477 251 436.5 280.0Q396 309 305 322Q200 337 137.5 381.5Q75 426 75 516V528Q75 583 104.5 624.5Q134 666 186.0 690.0Q238 714 306 714Q385 714 440.5 685.0Q496 656 525.5 608.5Q555 561 555 504V462H471V498Q471 544 448.0 574.5Q425 605 387.0 620.5Q349 636 305 636Q267 636 233.5 623.0Q200 610 179.5 585.5Q159 561 159 525V519Q159 461 206.0 434.5Q253 408 347 394Q457 378 509.0 330.0Q561 282 561 202V190Q561 99 499.5 42.5Q438 -14 320 -14Z"/> </g> </svg></a> </li>
<li><a href="about.html">About</a> </li>
<li><a href="https://briandew.wordpress.com" target="_blank" rel="noopener">Blog <svg class="icon icon-external" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M15 3h6v6M10 14 21 3"/><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/></svg><span class="sr-only"> (opens in new tab)</span></a> </li>
<li><a href="python.html" class="active" aria-current="page">Guides <span class="nav-arrow">↓</span></a>
<ul class="hidden">
<li><a href="getstarted.html">Setup</a></li>
<li><a href="imfapi1.html">IMF API</a></li>
<li><a href="blsapi.html">BLS API</a></li>
<li><a href="beaapi.html">BEA API</a></li>
<li><a href="censusapi.html">Census API</a></li>
<li><a href="treasuryapi.html">Treasury API</a></li>
<li><a href="cps.html">CPS Microdata</a></li>
</ul>
</li>
<li>
<a href="reports.html">Reports <span class="nav-arrow">↓</span></a>
<ul class="hidden">
<li><a href="chartbook.html">US Chartbook</a></li>
<li><a href="indicators.html">Economic Indicators</a></li>
<li><a href="gdpm.html">Monthly GDP</a></li>
<li><a href="imfweo.html">IMF WEO</a></li>
<li><a href="calendar.html">Release Calendar</a></li>
</ul>
</li>
<li><button class="theme-toggle" onclick="toggleTheme()" aria-label="Toggle dark mode"><span id="theme-icon"><svg class="icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z"/></svg></span></button></li>
<li class="icon">
<button type="button" onclick="responsiveNav()" aria-label="Toggle navigation menu" aria-expanded="false">☰</button>
</li>
</ul>
</nav>
</header>
<div class="page-strip accent-teal">
<picture><source srcset="images/cps_strip.webp" type="image/webp"><img decoding="async" fetchpriority="high" src="images/cps_strip.jpg" alt="" aria-hidden="true" class="page-strip-img" width="1600" height="200"></picture>
</div>
<div class="page-title">
<h1>CPS Microdata</h1>
</div><!-- .page-title -->
<main id="main">
<section>
<article class="prose">
<div class="tutorial-meta">
<span>Updated <time datetime="2026-01-17">Jan 2026</time></span>
<span class="meta-sep">·</span>
<span class="trail-badge trail-advanced">◆◆ Advanced</span>
<span class="meta-sep">·</span>
<button class="tutorial-share" title="Copy link" aria-label="Copy link"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M10 13a5 5 0 0 0 7.54.54l3-3a5 5 0 0 0-7.07-7.07l-1.72 1.71"/><path d="M14 11a5 5 0 0 0-7.54-.54l-3 3a5 5 0 0 0 7.07 7.07l1.71-1.71"/></svg><span class="share-label">Link</span></button>
</div>
<h2>Current Population Survey Microdata with Python</h2>
<p>January 2026</p>
<p>This tutorial shows two ways to read CPS microdata with Python:</p>
<ul>
<li><strong>CSV method</strong> - simplest approach, recommended for quick access to single months of recent data</li>
<li><strong>Struct method</strong> - fastest approach, recommended for processing many months of data</li>
</ul>
<p class="callout accent-teal">Note: <a href="https://cps.ipums.org/cps/">IPUMS</a> provides a user-friendly interface for downloading CPS data (and other surveys). IPUMS handles the complexity of variable selection and file formatting, making it an excellent alternative to working with raw Census files directly.</p>
<p>See also: Tom Augspurger's <a href="https://github.com/TomAugspurger/pycps">pycps</a> (archived) and his four-part blog series (<a href="https://tomaugspurger.net/posts/tackling-the-cps/">1</a>, <a href="https://tomaugspurger.net/posts/tackling-the-cps-2/">2</a>, <a href="https://tomaugspurger.net/posts/tackling-the-cps-3/">3</a>, <a href="https://tomaugspurger.net/posts/tackling-the-cps-4/">4</a>) as resources for working with CPS microdata in python</p>
<p>The <a href="https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html">Census Basic Monthly CPS page</a> contains the microdata files (in both CSV and fixed-width format), along with data dictionaries identifying each variable name, location, value range, and whether it applies to a restricted sample.</p>
<hr class="section-bar accent-teal">
<h3>Method 1: CSV files (recommended for single months)</h3>
<p>Census publishes CSV files for recent months of CPS data. This is the simplest way to load CPS microdata and is useful for quick access to a limited amount of recent data. Download the CSV file from the <a href="https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html">Census CPS page</a>.</p>
<p>Note that CSV files are larger than the fixed-width format files, and are not available for all years.</p>
<span class="label step-label accent-teal">Read and filter the data</span>
<p>This example calculates the employment-to-population ratio for women age 25 to 54 in December 2025. First, we read only the columns we need from the CSV file. This speeds up the code and uses less memory than reading the entire file. We then filter to our population of interest: women (<code>pesex == 2</code>) between ages 25 and 54.</p>
<p>In[1]:</p>
<pre><code class="python">import pandas as pd
import numpy as np
# Read selected columns and query for women age 25 to 54
columns = ['prtage', 'pesex', 'prempnot', 'pwcmpwgt']
df = (pd.read_csv('dec25pub.csv', usecols=columns).dropna()
.query('pesex == 2 and 25 <= prtage <= 54'))</code></pre>
<span class="label step-label accent-teal">Calculate the weighted employment rate</span>
<p>The CPS is a sample survey, so each observation represents many people in the population. The <code>pwcmpwgt</code> variable is the person-level composite weight, which tells us how many people each survey respondent represents. We create an indicator variable for employment (<code>prempnot == 1</code> means employed) and then calculate the weighted average to get the employment rate.</p>
<p>In[2]:</p>
<pre><code class="python"># Identify employed portion of group as 1 & the rest as 0
empl = np.where(df['prempnot'] == 1, 1, 0)
# Take sample weighted average of employed portion of group
epop = np.average(empl, weights=df['pwcmpwgt'])
# Print out the result to check against LNU02300062
print(f'December 2025: {epop*100:.1f}%')</code></pre>
<pre>December 2025: 75.4%</pre>
<p>This result matches the <a href="https://data.bls.gov/timeseries/LNU02300062">BLS published value</a> for December 2025.</p>
<hr class="section-bar accent-teal">
<h3>Method 2: Struct method (fastest for many months)</h3>
<p>If you are processing decades of monthly data, the struct method is the fastest approach. This method reads the fixed-width format files directly, where each variable occupies a specific position in each row of data.</p>
<p>Download the data dictionary (e.g., <code>January_2017_Record_Layout.txt</code>) and the microdata file (e.g., <code>apr17pub.dat</code>) from the <a href="https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html">Census CPS page</a>. This example calculates the same employment-to-population ratio for women age 25 to 54, but for April 2017.</p>
<p>In[3]:</p>
<pre><code class="python"># Import relevant libraries
import re, struct
import pandas as pd
import numpy as np</code></pre>
<span class="label step-label accent-teal">Parse the data dictionary</span>
<p>The data dictionary file describes how to read the fixed-width format CPS microdata files. It tells us where each variable is located in the raw data. We manually identify four variables of interest: <code>PRTAGE</code> for age, <code>PESEX</code> for gender, <code>PREMPNOT</code> for employment status, and <code>PWCMPWGT</code> for the person-level composite weight.</p>
<p>In[4]:</p>
<pre><code class="python"># Read data dictionary text file
data_dict = open('January_2017_Record_Layout.txt').read()
# Manually list out the IDs for series of interest
var_names = ['PRTAGE', 'PESEX', 'PREMPNOT', 'PWCMPWGT']</code></pre>
<p>The data dictionary text file follows a pattern that makes it machine readable. We use a regular expression to extract the variable name, length, and start/end positions. The start location is adjusted by -1 for Python's zero-based indexing. The width is stored as a string ending in <code>s</code>, which is the struct format code for a character.</p>
<p>Note that data dictionaries change over time and don't follow a consistent format, so the regex pattern may need adjustment for different years.</p>
<p>In[5]:</p>
<pre><code class="python"># Regular expression matching series name and data dict pattern
p = f'\n({"|".join(var_names)})\s+(\d+)\s+.*?\t+.*?(\d\d*).*?(\d\d+)'
# Dictionary of variable name: [start, end, and length + 's']
d = {s[0]: [int(s[2])-1, int(s[3]), f'{s[1]}s']
for s in re.findall(p, data_dict)}
print(d)</code></pre>
<pre>{'PRTAGE': [121, 123, '2s'], 'PESEX': [128, 130, '2s'], 'PREMPNOT': [392, 394, '2s'], 'PWCMPWGT': [845, 855, '10s']}</pre>
<span class="label step-label accent-teal">Build the struct format string</span>
<p>Python's <code>struct</code> module can efficiently parse binary data using a format string. The format string specifies which characters to keep and which to skip. For example, <code>121x</code> means skip 121 characters, while <code>2s</code> means keep the next 2 characters as a string. By chaining these together, we can extract just the variables we need from each row.</p>
<p>In[6]:</p>
<pre><code class="python"># Lists of variable starts, ends, and lengths
start, end, width = zip(*d.values())
# Create list of which characters to skip in each row
skip = ([f'{s - e}x' for s, e in zip(start, [0] + list(end[:-1]))])
# Create format string by joining skip and variable segments
unpack_fmt = ''.join([j for i in zip(skip, width) for j in i])
print(unpack_fmt)
# Struct can interpret row bytes with the format string
unpacker = struct.Struct(unpack_fmt).unpack_from</code></pre>
<pre>121x2s5x2s262x2s451x10s</pre>
<p>Reading this format string: skip 121 characters, keep 2 (age), skip 5, keep 2 (sex), skip 262, keep 2 (employment status), skip 451, keep 10 (weight).</p>
<span class="label step-label accent-teal">Understanding fixed-width format</span>
<p>To see what the raw data looks like, here is the first line of the microdata file:</p>
<p>In[7]:</p>
<pre><code class="python">print(open('apr17pub.dat').readline())</code></pre>
<pre style="white-space: pre-wrap; word-break: break-all;">000110116792163 42017 120100-1 1 1-1 115-1-1-1 15049796 1 2 1 7 2 0 205011 2 1 1-1-1-1 36 01 338600001103000 -1-1 1-1420 1 2 1 2-1 243 1-1 9-1 1-1 1 1 1 2 1 2 57 57 57 1 0 0 1 1 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 2-150-1-1 50-1-1-1-1 2-1 2-150-1 50-1-1 2 5 5-1 2 3 5 2-1-1-1-1-1 -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 1-121 1 1 1 6-1-1-1 -1-1-1 1 2-1-1-1-1 1 2 1 6 4 -1-1 4 3 3 1 2 4-1-1 6-138-114-1 1 9-1 3-1 2 1 1 1 0-1-1-1-1 -1 -1 -1 -10-1 -10-1-1 -1 -10-1-1-1-1-1-1-1-1-1 2-1-1 2 15049796 22986106 0 16044411 15280235 0 0 1-1-1-1 0 0 1 0-1 050 0 0 0 0 1 0 0 0-1-1-1 1 0 0-1 1 1 0 1 0 1 1 0 1 1 1 0 1 0 1 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 0 0 0-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1 0 1 1 3865 1-1-1-1-1-1-1 1 1 1-1-1-1 1573071277704210 -1 -114-1-1-1-1-1 0-1-1-1-1-15050 1 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0-1-1-1-1-1 1 1 1202020 A</pre>
<p>If we skip the first 121 characters and keep the next two, we find <code>42</code>, which is the age of the person in the first row of the microdata.</p>
<span class="label step-label accent-teal">Read the raw microdata</span>
<p>We open the raw CPS microdata file in binary mode and read all lines. For each row, we check if the sample weight is positive (meaning the observation should be included), then apply the unpacker to extract just the four variables we need. The result is a list of lists, where each inner list contains the values for one person.</p>
<p>In[8]:</p>
<pre><code class="python"># Open file (read as binary) and read lines into "raw_data"
raw_data = open('apr17pub.dat', 'rb').readlines()
wgt = d['PWCMPWGT'] # Location of sample weight variable
# Unpack and store data of interest if sample weight > 0
data = [[*map(int, unpacker(row))] for row in raw_data
if int(row[wgt[0]:wgt[1]]) > 0]
print(data[:5])</code></pre>
<pre>[[42, 1, 1, 15730712], [26, 2, 1, 14582612], [25, 2, 1, 20672047], [42, 2, 4, 15492377], [47, 1, 1, 18155638]]</pre>
<span class="label step-label accent-teal">Create pandas dataframe</span>
<p>We convert the list of lists to a pandas DataFrame for easier filtering and analysis. The DataFrame is filtered to women (<code>PESEX == 2</code>) between ages 25 and 54. The sample weights have four implied decimal places in the raw data, so we divide by 10,000 to get the actual weight values.</p>
<p>In[9]:</p>
<pre><code class="python"># Pandas dataframe of women age 25 to 54
df = (pd.DataFrame(data, columns=d.keys())
.query('PESEX == 2 and 25 <= PRTAGE <= 54')
.assign(PWCMPWGT = lambda x: x['PWCMPWGT'] / 10000))
print(df.head().to_string(index=False))</code></pre>
<pre>PRTAGE PESEX PREMPNOT PWCMPWGT
26 2 1 1458.2612
25 2 1 2067.2047
42 2 4 1549.2377
49 2 1 1633.0038
26 2 1 1611.2316</pre>
<span class="label step-label accent-teal">Calculate the weighted employment rate</span>
<p>As with the CSV method, we create an indicator variable for employment (<code>PREMPNOT == 1</code> means employed) and calculate the weighted average using the composite weight. The result matches the <a href="https://data.bls.gov/timeseries/LNU02300062">BLS published value</a> for April 2017.</p>
<p>In[10]:</p>
<pre><code class="python"># Identify employed portion of group as 1 & the rest as 0
empl = np.where(df['PREMPNOT'] == 1, 1, 0)
# Take sample weighted average of employed portion of group
epop = np.average(empl, weights=df['PWCMPWGT'])
# Print out the result to check against LNU02300062
print(f'April 2017: {epop*100:.1f}%')</code></pre>
<pre>April 2017: 72.3%</pre>
<hr class="section-bar accent-teal">
<h3>Scaling up</h3>
<p>These examples can be scaled up to work with multiple years of monthly data. For a project creating harmonized partial CPS extracts, see <a href="https://github.com/bdecon/econ_data/tree/master/bd_CPS">here</a>.</p>
<hr class="section-bar accent-teal">
<h3>About the CPS</h3>
<p>The CPS was initially deployed in 1940 to give a more accurate unemployment rate estimate, and it is still the source of the official unemployment rate. The CPS is a monthly survey of around 65,000 households. Each selected household is surveyed up to 8 times. Interviewers ask basic demographic and employment information for the first three interview months, then ask additional detailed wage questions on the 4th interview. The household is not surveyed again for eight months, and then repeats four months of interviews with detailed wage questions again on the fourth.</p>
<p>The CPS is not a random sample, but a multi-stage stratified sample. In the first stage, each state and DC are divided into "primary sampling units". In the second stage, a sample of housing units are drawn from the selected PSUs.</p>
<p>There are also months where each household receives supplemental questions on a topic of interest. The largest such "CPS supplement", conducted each March, is the Annual Social and Economic Supplement. The sample size for this supplement is expanded, and the respondents are asked questions about various sources of income, and about the quality of their jobs (for example, health insurance benefits). Other supplements cover topics like job tenure, or computer and internet use.</p>
<p>The CPS is a joint product of the U.S. Census Bureau and the Bureau of Labor Statistics.</p>
</article>
<div class="subfooter" data-hub="guides" data-current="cps.html" style="--accent-color: var(--color-card-teal)"></div>
</section>
</main>
<footer>
<div class="footer-sitemap">
<div>
<h4><a href="reports.html">Data</a></h4>
<ul>
<li><a href="chartbook.html">US Chartbook</a></li>
<li><a href="indicators.html">Economic Indicators</a></li>
<li><a href="gdpm.html">Monthly GDP</a></li>
<li><a href="imfweo.html">WEO Forecasts</a></li>
</ul>
</div>
<div>
<h4><a href="python.html">Guides</a></h4>
<ul>
<li><a href="getstarted.html">Setup</a></li>
<li><a href="imfapi1.html">IMF API</a></li>
<li><a href="blsapi.html">BLS API</a></li>
<li><a href="censusapi.html">Census API</a></li>
</ul>
</div>
<div>
<h4><a href="about.html">About</a></h4>
<ul>
<li><a href="about.html">About BD Economics</a></li>
<li><a href="https://briandew.wordpress.com" target="_blank" rel="noopener">Blog</a></li>
<li><a href="https://github.com/bdecon/" target="_blank" rel="noopener">GitHub</a></li>
</ul>
</div>
</div>
<div class="footer-bottom">
<div class="footer-left">
<p><time datetime="2026">2026</time>, by Brian Dew</p>
</div>
<nav class="footer-right" aria-label="Social links">
<a href="https://github.com/bdecon/" aria-label="GitHub"><svg class="icon" viewBox="0 0 16 16" fill="currentColor" aria-hidden="true"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27s1.36.09 2 .27c1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.01 8.01 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg></a>
<a href="https://www.linkedin.com/in/brian-dew-5788a386/" aria-label="LinkedIn"><svg class="icon" viewBox="0 0 16 16" fill="currentColor" aria-hidden="true"><path d="M0 1.146C0 .513.526 0 1.175 0h13.65C15.474 0 16 .513 16 1.146v13.708c0 .633-.526 1.146-1.175 1.146H1.175C.526 16 0 15.487 0 14.854zm4.943 12.248V6.169H2.542v7.225zm-1.2-8.212c.837 0 1.358-.554 1.358-1.248-.016-.709-.52-1.248-1.342-1.248S1.4 3.226 1.4 3.934c0 .694.521 1.248 1.327 1.248zm4.908 8.212V9.359c0-.216.016-.432.08-.586.173-.431.568-.878 1.232-.878.869 0 1.216.662 1.216 1.634v3.865h2.401V9.25c0-2.22-1.184-3.252-2.764-3.252-1.274 0-1.845.7-2.165 1.193v.025h-.016a.3.3 0 0 1 .016-.025V6.169h-2.4c.03.678 0 7.225 0 7.225z"/></svg></a>
<a href="https://twitter.com/bd_econ" aria-label="Twitter"><svg class="icon" viewBox="0 0 16 16" fill="currentColor" aria-hidden="true"><path d="M5.026 15c6.038 0 9.341-5.003 9.341-9.334q.002-.211-.006-.422A6.7 6.7 0 0 0 16 3.542a6.7 6.7 0 0 1-1.889.518 3.3 3.3 0 0 0 1.447-1.817 6.5 6.5 0 0 1-2.087.793A3.286 3.286 0 0 0 7.875 6.03a9.32 9.32 0 0 1-6.767-3.429 3.29 3.29 0 0 0 1.018 4.382A3.3 3.3 0 0 1 .64 6.575v.045a3.29 3.29 0 0 0 2.632 3.218 3.2 3.2 0 0 1-.865.115 3 3 0 0 1-.614-.057 3.28 3.28 0 0 0 3.067 2.277A6.6 6.6 0 0 1 .78 13.58a6 6 0 0 1-.78-.045A9.34 9.34 0 0 0 5.026 15"/></svg></a>
<a href="https://briandew.wordpress.com/" target="_blank" rel="noopener" aria-label="WordPress Blog"><svg class="icon" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><path d="M21.469 6.825c.84 1.537 1.318 3.3 1.318 5.175 0 3.979-2.156 7.456-5.363 9.325l3.295-9.527c.615-1.54.82-2.771.82-3.864 0-.405-.026-.78-.07-1.11m-7.981.105c.647-.03 1.232-.105 1.232-.105.582-.075.514-.93-.067-.899 0 0-1.755.135-2.88.135-1.064 0-2.85-.15-2.85-.15-.585-.03-.661.855-.075.885 0 0 .54.061 1.125.09l1.68 4.605-2.37 7.08L5.354 6.9c.649-.03 1.234-.1 1.234-.1.585-.075.516-.93-.065-.896 0 0-1.746.138-2.874.138-.2 0-.438-.008-.69-.015C4.911 3.15 8.235 1.215 12 1.215c2.809 0 5.365 1.072 7.286 2.833-.046-.003-.091-.009-.141-.009-1.06 0-1.812.923-1.812 1.914 0 .89.513 1.643 1.06 2.531.411.72.89 1.643.89 2.977 0 .915-.354 1.994-.821 3.479l-1.075 3.585-3.9-11.61.001.014zM12 22.784c-1.059 0-2.081-.153-3.048-.437l3.237-9.406 3.315 9.087c.024.053.05.101.078.149-1.12.393-2.325.609-3.582.609M1.211 12c0-1.564.336-3.05.935-4.39L7.29 21.709C3.694 19.96 1.212 16.271 1.211 12M12 0C5.385 0 0 5.385 0 12s5.385 12 12 12 12-5.385 12-12S18.615 0 12 0"/></svg></a>
</nav>
</div>
</footer>
<script src="scripts/nav.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.11.1/highlight.min.js"></script>
<script>hljs.highlightAll();</script>
</body>
</html>