From 8f413a9d7d65453391018f80319de8c928137b63 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 14 Jan 2026 23:44:35 +0300 Subject: [PATCH 1/5] feat: District breakdowns --- .../calculate_economy_comparison.py | 119 ++++++++++++++++++ .../macro/single/calculate_single_economy.py | 29 +++++ 2 files changed, 148 insertions(+) diff --git a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py index dd0123a4..8b80dfc5 100644 --- a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py +++ b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py @@ -845,6 +845,120 @@ def uk_local_authority_breakdown( return UKLocalAuthorityBreakdownWithValues(**output) +# US Congressional District Breakdown Models + +# State FIPS to abbreviation mapping +STATE_FIPS_TO_ABBREV = { + 1: "AL", 2: "AK", 4: "AZ", 5: "AR", 6: "CA", 8: "CO", 9: "CT", 10: "DE", + 11: "DC", 12: "FL", 13: "GA", 15: "HI", 16: "ID", 17: "IL", 18: "IN", + 19: "IA", 20: "KS", 21: "KY", 22: "LA", 23: "ME", 24: "MD", 25: "MA", + 26: "MI", 27: "MN", 28: "MS", 29: "MO", 30: "MT", 31: "NE", 32: "NV", + 33: "NH", 34: "NJ", 35: "NM", 36: "NY", 37: "NC", 38: "ND", 39: "OH", + 40: "OK", 41: "OR", 42: "PA", 44: "RI", 45: "SC", 46: "SD", 47: "TN", + 48: "TX", 49: "UT", 50: "VT", 51: "VA", 53: "WA", 54: "WV", 55: "WI", + 56: "WY", 72: "PR", +} + + +def geoid_to_district_name(geoid: int) -> str: + """Convert congressional district geoid (SSDD format) to name like 'GA-05'.""" + state_fips = geoid // 100 + district_num = geoid % 100 + state_abbrev = STATE_FIPS_TO_ABBREV.get(state_fips, f"S{state_fips}") + return f"{state_abbrev}-{district_num:02d}" + + +class USCongressionalDistrictImpact(BaseModel): + district: str # e.g., "GA-05" + average_household_income_change: float + relative_household_income_change: float + + +class USCongressionalDistrictBreakdownWithValues(BaseModel): + districts: List[USCongressionalDistrictImpact] + + +USCongressionalDistrictBreakdown = USCongressionalDistrictBreakdownWithValues | None + + +def us_congressional_district_breakdown( + baseline: SingleEconomy, reform: SingleEconomy, country_id: str +) -> USCongressionalDistrictBreakdown: + """Break down results by US congressional district using household geoids. + + This function groups households by their congressional_district_geoid and + computes aggregate income changes per district. Only works for US simulations + that have district assignments (typically state-level datasets). + + Args: + baseline: Baseline economy with household-level data + reform: Reform economy with household-level data + country_id: Country identifier (must be "us") + + Returns: + District-level breakdown or None if not applicable + """ + if country_id != "us": + return None + + if baseline.congressional_district_geoid is None: + return None + + # Group households by district + from collections import defaultdict + + district_indices: dict[int, list[int]] = defaultdict(list) + for i, geoid in enumerate(baseline.congressional_district_geoid): + if geoid > 0: # Filter out 0 (unassigned) + district_indices[geoid].append(i) + + if not district_indices: + return None + + districts: list[USCongressionalDistrictImpact] = [] + + # Calculate district-level impacts + for geoid, indices in district_indices.items(): + district_name = geoid_to_district_name(geoid) + + # Extract household data for this district + weights = [baseline.household_weight[i] for i in indices] + baseline_incomes = [baseline.household_net_income[i] for i in indices] + reform_incomes = [reform.household_net_income[i] for i in indices] + + baseline_income = MicroSeries(baseline_incomes, weights=weights) + reform_income = MicroSeries(reform_incomes, weights=weights) + + total_households = baseline_income.count() + + if total_households == 0 or baseline_income.sum() == 0: + continue + + average_household_income_change = ( + reform_income.sum() - baseline_income.sum() + ) / total_households + + relative_household_income_change = ( + reform_income.sum() / baseline_income.sum() - 1 + ) + + districts.append( + USCongressionalDistrictImpact( + district=district_name, + average_household_income_change=float(average_household_income_change), + relative_household_income_change=float(relative_household_income_change), + ) + ) + + if not districts: + return None + + # Sort by district name for consistent ordering + districts.sort(key=lambda d: d.district) + + return USCongressionalDistrictBreakdownWithValues(districts=districts) + + class CliffImpactInSimulation(BaseModel): cliff_gap: float cliff_share: float @@ -873,6 +987,7 @@ class EconomyComparison(BaseModel): labor_supply_response: LaborSupplyResponse constituency_impact: UKConstituencyBreakdown local_authority_impact: UKLocalAuthorityBreakdown + congressional_district_impact: USCongressionalDistrictBreakdown # US only cliff_impact: CliffImpact | None @@ -906,6 +1021,9 @@ def calculate_economy_comparison( local_authority_impact_data: UKLocalAuthorityBreakdown = ( uk_local_authority_breakdown(baseline, reform, country_id) ) + congressional_district_impact_data: USCongressionalDistrictBreakdown = ( + us_congressional_district_breakdown(baseline, reform, country_id) + ) wealth_decile_impact_data = wealth_decile_impact( baseline, reform, country_id ) @@ -945,5 +1063,6 @@ def calculate_economy_comparison( labor_supply_response=labor_supply_response_data, constituency_impact=constituency_impact_data, local_authority_impact=local_authority_impact_data, + congressional_district_impact=congressional_district_impact_data, cliff_impact=cliff_impact, ) diff --git a/policyengine/outputs/macro/single/calculate_single_economy.py b/policyengine/outputs/macro/single/calculate_single_economy.py index 34e5ee1d..7999c63e 100644 --- a/policyengine/outputs/macro/single/calculate_single_economy.py +++ b/policyengine/outputs/macro/single/calculate_single_economy.py @@ -53,6 +53,7 @@ class SingleEconomy(BaseModel): programs: Dict[str, float] | None cliff_gap: float | None = None cliff_share: float | None = None + congressional_district_geoid: List[int] | None = None # US only: SSDD format @dataclass @@ -342,6 +343,28 @@ def calculate_cliffs(self): cliff_share=cliff_share, ) + def calculate_congressional_district_geoid(self) -> List[int] | None: + """Calculate congressional district geoid for US households. + + Returns list of geoids in SSDD format (state FIPS * 100 + district number), + or None if not available (non-US or variable doesn't exist). + """ + if self.country_id != "us": + return None + + try: + geoids = ( + self.simulation.calculate("congressional_district_geoid") + .astype(int) + .tolist() + ) + # Check if we have any non-zero values (0 means unassigned) + if all(g == 0 for g in geoids): + return None + return geoids + except Exception: + return None + class CliffImpactInSimulation(BaseModel): cliff_gap: float @@ -411,6 +434,11 @@ def calculate_single_economy( cliff_gap = None cliff_share = None + # US congressional district geoids + congressional_district_geoid = ( + task_manager.calculate_congressional_district_geoid() + ) + return SingleEconomy( **{ "total_net_income": total_net_income, @@ -447,5 +475,6 @@ def calculate_single_economy( "programs": uk_programs, "cliff_gap": cliff_gap if include_cliffs else None, "cliff_share": cliff_share if include_cliffs else None, + "congressional_district_geoid": congressional_district_geoid, } ) From 5c6443afe202d1b9218eb4e2bf33ed2f5f5ce6be Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 14 Jan 2026 23:57:58 +0300 Subject: [PATCH 2/5] test: Add tests --- tests/conftest.py | 14 + .../test_us_congressional_districts.py | 295 ++++++++++++++++++ tests/fixtures/__init__.py | 12 + tests/fixtures/simulation.py | 74 +++++ 4 files changed, 395 insertions(+) create mode 100644 tests/conftest.py create mode 100644 tests/country/test_us_congressional_districts.py create mode 100644 tests/fixtures/__init__.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..e816468f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,14 @@ +"""Pytest configuration and shared fixtures.""" + +import pytest + +# Re-export fixtures from fixtures module +from tests.fixtures.simulation import ( + mock_get_default_dataset, + mock_dataset, + mock_simulation_with_cliff_vars, + mock_single_economy_with_ga_districts, + mock_single_economy_with_multi_state_districts, + mock_single_economy_without_districts, + mock_single_economy_with_null_districts, +) diff --git a/tests/country/test_us_congressional_districts.py b/tests/country/test_us_congressional_districts.py new file mode 100644 index 00000000..708f512e --- /dev/null +++ b/tests/country/test_us_congressional_districts.py @@ -0,0 +1,295 @@ +"""Tests for US congressional district breakdown functionality.""" + +import pytest +from tests.fixtures.simulation import create_mock_single_economy +from policyengine.outputs.macro.comparison.calculate_economy_comparison import ( + us_congressional_district_breakdown, + geoid_to_district_name, + USCongressionalDistrictBreakdownWithValues, + USCongressionalDistrictImpact, +) + + +class TestGeoidToDistrictName: + """Tests for the geoid_to_district_name helper function.""" + + def test__given_georgia_district_5_geoid__then_returns_ga_05(self): + # Given + geoid = 1305 # State FIPS 13 (GA) + District 05 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "GA-05" + + def test__given_california_district_12_geoid__then_returns_ca_12(self): + # Given + geoid = 612 # State FIPS 6 (CA) + District 12 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "CA-12" + + def test__given_north_carolina_district_4_geoid__then_returns_nc_04(self): + # Given + geoid = 3704 # State FIPS 37 (NC) + District 04 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "NC-04" + + def test__given_single_digit_district__then_pads_with_zero(self): + # Given + geoid = 101 # State FIPS 1 (AL) + District 01 + + # When + result = geoid_to_district_name(geoid) + + # Then + assert result == "AL-01" + + +class TestUsCongressionalDistrictBreakdown: + """Tests for the us_congressional_district_breakdown function.""" + + def test__given_non_us_country__then_returns_none( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "uk" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert result is None + + def test__given_null_district_geoids__then_returns_none( + self, mock_single_economy_with_null_districts + ): + # Given + baseline = mock_single_economy_with_null_districts + reform = mock_single_economy_with_null_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert result is None + + def test__given_all_zero_district_geoids__then_returns_none( + self, mock_single_economy_without_districts + ): + # Given + baseline = mock_single_economy_without_districts + reform = mock_single_economy_without_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert result is None + + def test__given_valid_district_data__then_returns_breakdown_with_districts_list( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert result is not None + assert isinstance(result, USCongressionalDistrictBreakdownWithValues) + assert hasattr(result, "districts") + assert isinstance(result.districts, list) + + def test__given_two_ga_districts__then_returns_two_district_impacts( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert len(result.districts) == 2 + district_names = [d.district for d in result.districts] + assert "GA-05" in district_names + assert "GA-06" in district_names + + def test__given_districts_from_multiple_states__then_returns_all_districts_sorted( + self, mock_single_economy_with_multi_state_districts + ): + # Given + baseline = mock_single_economy_with_multi_state_districts + reform = mock_single_economy_with_multi_state_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert len(result.districts) == 4 + district_names = [d.district for d in result.districts] + # Should be sorted alphabetically + assert district_names == ["GA-05", "GA-06", "NC-04", "NC-12"] + + def test__given_no_income_change__then_returns_zero_changes( + self, mock_single_economy_with_ga_districts + ): + # Given: baseline and reform are identical + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + for district in result.districts: + assert district.average_household_income_change == 0.0 + assert district.relative_household_income_change == 0.0 + + def test__given_income_increase__then_returns_positive_changes(self): + # Given: reform has higher incomes than baseline + baseline = create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + reform = create_mock_single_economy( + household_net_income=[51000.0, 61000.0, 71000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert len(result.districts) == 1 + district = result.districts[0] + assert district.district == "GA-05" + assert district.average_household_income_change == 1000.0 + assert district.relative_household_income_change > 0 + + def test__given_income_decrease__then_returns_negative_changes(self): + # Given: reform has lower incomes than baseline + baseline = create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + reform = create_mock_single_economy( + household_net_income=[49000.0, 59000.0, 69000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305], + ) + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + assert len(result.districts) == 1 + district = result.districts[0] + assert district.district == "GA-05" + assert district.average_household_income_change == -1000.0 + assert district.relative_household_income_change < 0 + + def test__given_weighted_households__then_calculates_weighted_averages(self): + # Given: households with different weights + baseline = create_mock_single_economy( + household_net_income=[50000.0, 100000.0], + household_weight=[3000.0, 1000.0], # First household has 3x weight + congressional_district_geoid=[1305, 1305], + ) + reform = create_mock_single_economy( + household_net_income=[51000.0, 101000.0], + household_weight=[3000.0, 1000.0], + congressional_district_geoid=[1305, 1305], + ) + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + district = result.districts[0] + # Weighted sum of income change: (3000*1000 + 1000*1000) = 4,000,000 + # Total households: 3000 + 1000 = 4000 + # Average change: 4,000,000 / 4000 = 1000 + assert district.average_household_income_change == 1000.0 + + def test__given_district_impact__then_has_required_fields( + self, mock_single_economy_with_ga_districts + ): + # Given + baseline = mock_single_economy_with_ga_districts + reform = mock_single_economy_with_ga_districts + country_id = "us" + + # When + result = us_congressional_district_breakdown(baseline, reform, country_id) + + # Then + for district in result.districts: + assert isinstance(district, USCongressionalDistrictImpact) + assert hasattr(district, "district") + assert hasattr(district, "average_household_income_change") + assert hasattr(district, "relative_household_income_change") + assert isinstance(district.district, str) + assert isinstance(district.average_household_income_change, float) + assert isinstance(district.relative_household_income_change, float) + + +class TestCongressionalDistrictGeoidExtraction: + """Tests for congressional_district_geoid extraction in SingleEconomy.""" + + def test__given_us_simulation_with_state_dataset__then_geoid_is_extracted(self): + """Integration test: verify geoid extraction works with real simulation. + + Note: This test requires network access to download state dataset. + Skip if running in isolated environment. + """ + pytest.importorskip("policyengine_us") + + from policyengine import Simulation + + # Given: A US state simulation (GA has district assignments) + sim = Simulation( + scope="macro", + country="us", + region="state/GA", + time_period=2025, + ) + + # When + result = sim.calculate_single_economy() + + # Then + assert result.congressional_district_geoid is not None + assert len(result.congressional_district_geoid) > 0 + # All geoids should be in Georgia (FIPS 13xx) + non_zero_geoids = [g for g in result.congressional_district_geoid if g > 0] + assert len(non_zero_geoids) > 0 + for geoid in non_zero_geoids: + state_fips = geoid // 100 + assert state_fips == 13, f"Expected GA (13), got state FIPS {state_fips}" diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 00000000..131d59d6 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,12 @@ +"""Test fixtures for policyengine tests.""" + +from tests.fixtures.simulation import ( + create_mock_single_economy, + mock_get_default_dataset, + mock_dataset, + mock_simulation_with_cliff_vars, + mock_single_economy_with_ga_districts, + mock_single_economy_with_multi_state_districts, + mock_single_economy_without_districts, + mock_single_economy_with_null_districts, +) diff --git a/tests/fixtures/simulation.py b/tests/fixtures/simulation.py index 8e0f7700..9469f2f1 100644 --- a/tests/fixtures/simulation.py +++ b/tests/fixtures/simulation.py @@ -73,3 +73,77 @@ def mock_simulation_with_cliff_vars(): "is_adult": Mock(sum=Mock(return_value=80.0)), }[var] return mock_sim + + +def create_mock_single_economy( + household_net_income: list[float], + household_weight: list[float], + congressional_district_geoid: list[int] | None = None, +): + """Create a mock SingleEconomy with specified household data. + + Args: + household_net_income: List of household net incomes + household_weight: List of household weights + congressional_district_geoid: List of district geoids (SSDD format) or None + + Returns: + Mock SingleEconomy object with the specified data + """ + mock_economy = Mock() + mock_economy.household_net_income = household_net_income + mock_economy.household_weight = household_weight + mock_economy.congressional_district_geoid = congressional_district_geoid + return mock_economy + + +@pytest.fixture +def mock_single_economy_with_ga_districts(): + """Mock SingleEconomy with Georgia congressional district data. + + Creates 6 households across 2 districts: + - GA-05 (geoid 1305): 3 households + - GA-06 (geoid 1306): 3 households + """ + return create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 100000.0], + household_weight=[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1305, 1306, 1306, 1306], + ) + + +@pytest.fixture +def mock_single_economy_with_multi_state_districts(): + """Mock SingleEconomy with districts from multiple states. + + Creates 8 households across 4 districts in 2 states: + - GA-05 (geoid 1305): 2 households + - GA-06 (geoid 1306): 2 households + - NC-04 (geoid 3704): 2 households + - NC-12 (geoid 3712): 2 households + """ + return create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0, 80000.0, 40000.0, 45000.0, 55000.0, 65000.0], + household_weight=[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + congressional_district_geoid=[1305, 1305, 1306, 1306, 3704, 3704, 3712, 3712], + ) + + +@pytest.fixture +def mock_single_economy_without_districts(): + """Mock SingleEconomy with no congressional district data (all zeros).""" + return create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=[0, 0, 0], + ) + + +@pytest.fixture +def mock_single_economy_with_null_districts(): + """Mock SingleEconomy with None congressional district data.""" + return create_mock_single_economy( + household_net_income=[50000.0, 60000.0, 70000.0], + household_weight=[1000.0, 1000.0, 1000.0], + congressional_district_geoid=None, + ) From a9d364d969c8048ffb39bf8cd76a1cf14d9cea10 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 15 Jan 2026 00:16:46 +0300 Subject: [PATCH 3/5] feat: Add congressional district breakdowns to state simulations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add congressional_district_geoid to SingleEconomy model - Add us_congressional_district_breakdown() function mirroring UK constituency logic - Add USCongressionalDistrictImpact and USCongressionalDistrictBreakdownWithValues models - Add congressional_district_impact field to EconomyComparison - Add comprehensive tests following given-when-then naming pattern - Add mock fixtures for district testing The district-level calculations use identical formulas to UK parliamentary constituencies: - average_household_income_change = (reform.sum() - baseline.sum()) / count() - relative_household_income_change = reform.sum() / baseline.sum() - 1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../calculate_economy_comparison.py | 72 ++++++++++++++++--- .../macro/single/calculate_single_economy.py | 4 +- .../test_us_congressional_districts.py | 60 ++++++++++++---- tests/fixtures/simulation.py | 42 +++++++++-- 4 files changed, 147 insertions(+), 31 deletions(-) diff --git a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py index 8b80dfc5..e7797ff9 100644 --- a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py +++ b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py @@ -849,14 +849,58 @@ def uk_local_authority_breakdown( # State FIPS to abbreviation mapping STATE_FIPS_TO_ABBREV = { - 1: "AL", 2: "AK", 4: "AZ", 5: "AR", 6: "CA", 8: "CO", 9: "CT", 10: "DE", - 11: "DC", 12: "FL", 13: "GA", 15: "HI", 16: "ID", 17: "IL", 18: "IN", - 19: "IA", 20: "KS", 21: "KY", 22: "LA", 23: "ME", 24: "MD", 25: "MA", - 26: "MI", 27: "MN", 28: "MS", 29: "MO", 30: "MT", 31: "NE", 32: "NV", - 33: "NH", 34: "NJ", 35: "NM", 36: "NY", 37: "NC", 38: "ND", 39: "OH", - 40: "OK", 41: "OR", 42: "PA", 44: "RI", 45: "SC", 46: "SD", 47: "TN", - 48: "TX", 49: "UT", 50: "VT", 51: "VA", 53: "WA", 54: "WV", 55: "WI", - 56: "WY", 72: "PR", + 1: "AL", + 2: "AK", + 4: "AZ", + 5: "AR", + 6: "CA", + 8: "CO", + 9: "CT", + 10: "DE", + 11: "DC", + 12: "FL", + 13: "GA", + 15: "HI", + 16: "ID", + 17: "IL", + 18: "IN", + 19: "IA", + 20: "KS", + 21: "KY", + 22: "LA", + 23: "ME", + 24: "MD", + 25: "MA", + 26: "MI", + 27: "MN", + 28: "MS", + 29: "MO", + 30: "MT", + 31: "NE", + 32: "NV", + 33: "NH", + 34: "NJ", + 35: "NM", + 36: "NY", + 37: "NC", + 38: "ND", + 39: "OH", + 40: "OK", + 41: "OR", + 42: "PA", + 44: "RI", + 45: "SC", + 46: "SD", + 47: "TN", + 48: "TX", + 49: "UT", + 50: "VT", + 51: "VA", + 53: "WA", + 54: "WV", + 55: "WI", + 56: "WY", + 72: "PR", } @@ -878,7 +922,9 @@ class USCongressionalDistrictBreakdownWithValues(BaseModel): districts: List[USCongressionalDistrictImpact] -USCongressionalDistrictBreakdown = USCongressionalDistrictBreakdownWithValues | None +USCongressionalDistrictBreakdown = ( + USCongressionalDistrictBreakdownWithValues | None +) def us_congressional_district_breakdown( @@ -945,8 +991,12 @@ def us_congressional_district_breakdown( districts.append( USCongressionalDistrictImpact( district=district_name, - average_household_income_change=float(average_household_income_change), - relative_household_income_change=float(relative_household_income_change), + average_household_income_change=float( + average_household_income_change + ), + relative_household_income_change=float( + relative_household_income_change + ), ) ) diff --git a/policyengine/outputs/macro/single/calculate_single_economy.py b/policyengine/outputs/macro/single/calculate_single_economy.py index 7999c63e..c8f05034 100644 --- a/policyengine/outputs/macro/single/calculate_single_economy.py +++ b/policyengine/outputs/macro/single/calculate_single_economy.py @@ -53,7 +53,9 @@ class SingleEconomy(BaseModel): programs: Dict[str, float] | None cliff_gap: float | None = None cliff_share: float | None = None - congressional_district_geoid: List[int] | None = None # US only: SSDD format + congressional_district_geoid: List[int] | None = ( + None # US only: SSDD format + ) @dataclass diff --git a/tests/country/test_us_congressional_districts.py b/tests/country/test_us_congressional_districts.py index 708f512e..25fd463a 100644 --- a/tests/country/test_us_congressional_districts.py +++ b/tests/country/test_us_congressional_districts.py @@ -66,7 +66,9 @@ def test__given_non_us_country__then_returns_none( country_id = "uk" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert result is None @@ -80,7 +82,9 @@ def test__given_null_district_geoids__then_returns_none( country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert result is None @@ -94,7 +98,9 @@ def test__given_all_zero_district_geoids__then_returns_none( country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert result is None @@ -108,7 +114,9 @@ def test__given_valid_district_data__then_returns_breakdown_with_districts_list( country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert result is not None @@ -125,7 +133,9 @@ def test__given_two_ga_districts__then_returns_two_district_impacts( country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert len(result.districts) == 2 @@ -142,7 +152,9 @@ def test__given_districts_from_multiple_states__then_returns_all_districts_sorte country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert len(result.districts) == 4 @@ -159,7 +171,9 @@ def test__given_no_income_change__then_returns_zero_changes( country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then for district in result.districts: @@ -181,7 +195,9 @@ def test__given_income_increase__then_returns_positive_changes(self): country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert len(result.districts) == 1 @@ -205,7 +221,9 @@ def test__given_income_decrease__then_returns_negative_changes(self): country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then assert len(result.districts) == 1 @@ -214,7 +232,9 @@ def test__given_income_decrease__then_returns_negative_changes(self): assert district.average_household_income_change == -1000.0 assert district.relative_household_income_change < 0 - def test__given_weighted_households__then_calculates_weighted_averages(self): + def test__given_weighted_households__then_calculates_weighted_averages( + self, + ): # Given: households with different weights baseline = create_mock_single_economy( household_net_income=[50000.0, 100000.0], @@ -229,7 +249,9 @@ def test__given_weighted_households__then_calculates_weighted_averages(self): country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then district = result.districts[0] @@ -247,7 +269,9 @@ def test__given_district_impact__then_has_required_fields( country_id = "us" # When - result = us_congressional_district_breakdown(baseline, reform, country_id) + result = us_congressional_district_breakdown( + baseline, reform, country_id + ) # Then for district in result.districts: @@ -263,7 +287,9 @@ def test__given_district_impact__then_has_required_fields( class TestCongressionalDistrictGeoidExtraction: """Tests for congressional_district_geoid extraction in SingleEconomy.""" - def test__given_us_simulation_with_state_dataset__then_geoid_is_extracted(self): + def test__given_us_simulation_with_state_dataset__then_geoid_is_extracted( + self, + ): """Integration test: verify geoid extraction works with real simulation. Note: This test requires network access to download state dataset. @@ -288,8 +314,12 @@ def test__given_us_simulation_with_state_dataset__then_geoid_is_extracted(self): assert result.congressional_district_geoid is not None assert len(result.congressional_district_geoid) > 0 # All geoids should be in Georgia (FIPS 13xx) - non_zero_geoids = [g for g in result.congressional_district_geoid if g > 0] + non_zero_geoids = [ + g for g in result.congressional_district_geoid if g > 0 + ] assert len(non_zero_geoids) > 0 for geoid in non_zero_geoids: state_fips = geoid // 100 - assert state_fips == 13, f"Expected GA (13), got state FIPS {state_fips}" + assert ( + state_fips == 13 + ), f"Expected GA (13), got state FIPS {state_fips}" diff --git a/tests/fixtures/simulation.py b/tests/fixtures/simulation.py index 9469f2f1..d2361fef 100644 --- a/tests/fixtures/simulation.py +++ b/tests/fixtures/simulation.py @@ -106,7 +106,14 @@ def mock_single_economy_with_ga_districts(): - GA-06 (geoid 1306): 3 households """ return create_mock_single_economy( - household_net_income=[50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 100000.0], + household_net_income=[ + 50000.0, + 60000.0, + 70000.0, + 80000.0, + 90000.0, + 100000.0, + ], household_weight=[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], congressional_district_geoid=[1305, 1305, 1305, 1306, 1306, 1306], ) @@ -123,9 +130,36 @@ def mock_single_economy_with_multi_state_districts(): - NC-12 (geoid 3712): 2 households """ return create_mock_single_economy( - household_net_income=[50000.0, 60000.0, 70000.0, 80000.0, 40000.0, 45000.0, 55000.0, 65000.0], - household_weight=[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], - congressional_district_geoid=[1305, 1305, 1306, 1306, 3704, 3704, 3712, 3712], + household_net_income=[ + 50000.0, + 60000.0, + 70000.0, + 80000.0, + 40000.0, + 45000.0, + 55000.0, + 65000.0, + ], + household_weight=[ + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + 1000.0, + ], + congressional_district_geoid=[ + 1305, + 1305, + 1306, + 1306, + 3704, + 3704, + 3712, + 3712, + ], ) From 7a3d1c77b4a817ceeab383865ee19c8f092ded0b Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 15 Jan 2026 00:24:12 +0300 Subject: [PATCH 4/5] refactor: Move geography utilities to policyengine/utils/geography.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move STATE_FIPS_TO_ABBREV mapping to utils/geography.py - Move geoid_to_district_name() function to utils/geography.py - Update imports in calculate_economy_comparison.py and tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../calculate_economy_comparison.py | 68 +---------------- policyengine/utils/geography.py | 73 +++++++++++++++++++ .../test_us_congressional_districts.py | 2 +- 3 files changed, 78 insertions(+), 65 deletions(-) create mode 100644 policyengine/utils/geography.py diff --git a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py index e7797ff9..04c7081a 100644 --- a/policyengine/outputs/macro/comparison/calculate_economy_comparison.py +++ b/policyengine/outputs/macro/comparison/calculate_economy_comparison.py @@ -10,6 +10,10 @@ from policyengine.outputs.macro.single.calculate_single_economy import ( SingleEconomy, ) +from policyengine.utils.geography import ( + STATE_FIPS_TO_ABBREV, + geoid_to_district_name, +) from typing import List, Dict, Optional import logging @@ -847,70 +851,6 @@ def uk_local_authority_breakdown( # US Congressional District Breakdown Models -# State FIPS to abbreviation mapping -STATE_FIPS_TO_ABBREV = { - 1: "AL", - 2: "AK", - 4: "AZ", - 5: "AR", - 6: "CA", - 8: "CO", - 9: "CT", - 10: "DE", - 11: "DC", - 12: "FL", - 13: "GA", - 15: "HI", - 16: "ID", - 17: "IL", - 18: "IN", - 19: "IA", - 20: "KS", - 21: "KY", - 22: "LA", - 23: "ME", - 24: "MD", - 25: "MA", - 26: "MI", - 27: "MN", - 28: "MS", - 29: "MO", - 30: "MT", - 31: "NE", - 32: "NV", - 33: "NH", - 34: "NJ", - 35: "NM", - 36: "NY", - 37: "NC", - 38: "ND", - 39: "OH", - 40: "OK", - 41: "OR", - 42: "PA", - 44: "RI", - 45: "SC", - 46: "SD", - 47: "TN", - 48: "TX", - 49: "UT", - 50: "VT", - 51: "VA", - 53: "WA", - 54: "WV", - 55: "WI", - 56: "WY", - 72: "PR", -} - - -def geoid_to_district_name(geoid: int) -> str: - """Convert congressional district geoid (SSDD format) to name like 'GA-05'.""" - state_fips = geoid // 100 - district_num = geoid % 100 - state_abbrev = STATE_FIPS_TO_ABBREV.get(state_fips, f"S{state_fips}") - return f"{state_abbrev}-{district_num:02d}" - class USCongressionalDistrictImpact(BaseModel): district: str # e.g., "GA-05" diff --git a/policyengine/utils/geography.py b/policyengine/utils/geography.py new file mode 100644 index 00000000..1baccd9e --- /dev/null +++ b/policyengine/utils/geography.py @@ -0,0 +1,73 @@ +"""Geographic utilities and constants for PolicyEngine.""" + +# US State FIPS codes to two-letter abbreviation mapping +STATE_FIPS_TO_ABBREV = { + 1: "AL", + 2: "AK", + 4: "AZ", + 5: "AR", + 6: "CA", + 8: "CO", + 9: "CT", + 10: "DE", + 11: "DC", + 12: "FL", + 13: "GA", + 15: "HI", + 16: "ID", + 17: "IL", + 18: "IN", + 19: "IA", + 20: "KS", + 21: "KY", + 22: "LA", + 23: "ME", + 24: "MD", + 25: "MA", + 26: "MI", + 27: "MN", + 28: "MS", + 29: "MO", + 30: "MT", + 31: "NE", + 32: "NV", + 33: "NH", + 34: "NJ", + 35: "NM", + 36: "NY", + 37: "NC", + 38: "ND", + 39: "OH", + 40: "OK", + 41: "OR", + 42: "PA", + 44: "RI", + 45: "SC", + 46: "SD", + 47: "TN", + 48: "TX", + 49: "UT", + 50: "VT", + 51: "VA", + 53: "WA", + 54: "WV", + 55: "WI", + 56: "WY", + 72: "PR", +} + + +def geoid_to_district_name(geoid: int) -> str: + """Convert congressional district geoid (SSDD format) to name like 'GA-05'. + + Args: + geoid: Congressional district geoid in SSDD format where SS is the + state FIPS code and DD is the district number. + + Returns: + District name in format "XX-DD" (e.g., "GA-05", "CA-12"). + """ + state_fips = geoid // 100 + district_num = geoid % 100 + state_abbrev = STATE_FIPS_TO_ABBREV.get(state_fips, f"S{state_fips}") + return f"{state_abbrev}-{district_num:02d}" diff --git a/tests/country/test_us_congressional_districts.py b/tests/country/test_us_congressional_districts.py index 25fd463a..76937485 100644 --- a/tests/country/test_us_congressional_districts.py +++ b/tests/country/test_us_congressional_districts.py @@ -4,10 +4,10 @@ from tests.fixtures.simulation import create_mock_single_economy from policyengine.outputs.macro.comparison.calculate_economy_comparison import ( us_congressional_district_breakdown, - geoid_to_district_name, USCongressionalDistrictBreakdownWithValues, USCongressionalDistrictImpact, ) +from policyengine.utils.geography import geoid_to_district_name class TestGeoidToDistrictName: From 191dfe79c93018ae45da3fda9c97b0ffee2f2ed9 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 15 Jan 2026 01:09:46 +0300 Subject: [PATCH 5/5] chore: Add changelog entry for congressional district breakdowns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- changelog_entry.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..714008fd 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,6 @@ +- bump: minor + changes: + added: + - Congressional district breakdowns for US state-level simulations + - New `congressional_district_impact` field in `EconomyComparison` with district-level `average_household_income_change` and `relative_household_income_change` + - Geography utilities module (`policyengine/utils/geography.py`) with `STATE_FIPS_TO_ABBREV` mapping and `geoid_to_district_name()` helper