From 69bd31814364e8bad53f20069deac79bca1205d0 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 10 Feb 2026 14:58:11 +0100
Subject: [PATCH 1/8] feat: Region filtering and calculations

---
 src/policyengine/core/__init__.py             |   3 +
 src/policyengine/core/region.py               | 201 ++++++++++
 .../core/tax_benefit_model_version.py         |  19 +
 src/policyengine/countries/__init__.py        |   9 +
 src/policyengine/countries/uk/__init__.py     |   5 +
 src/policyengine/countries/uk/regions.py      | 176 +++++++++
 src/policyengine/countries/us/__init__.py     |   5 +
 .../countries/us/data/__init__.py             |  18 +
 .../countries/us/data/districts.py            |  64 ++++
 src/policyengine/countries/us/data/places.py  | 346 ++++++++++++++++++
 src/policyengine/countries/us/data/states.py  |  59 +++
 src/policyengine/countries/us/regions.py      | 106 ++++++
 .../tax_benefit_models/uk/model.py            |   5 +
 .../tax_benefit_models/us/model.py            |   5 +
 tests/fixtures/region_fixtures.py             | 127 +++++++
 tests/test_models.py                          |  30 ++
 tests/test_region.py                          | 246 +++++++++++++
 tests/test_uk_regions.py                      | 227 ++++++++++++
 tests/test_us_regions.py                      | 252 +++++++++++++
 uv.lock                                       |  22 +-
 20 files changed, 1914 insertions(+), 11 deletions(-)
 create mode 100644 src/policyengine/core/region.py
 create mode 100644 src/policyengine/countries/__init__.py
 create mode 100644 src/policyengine/countries/uk/__init__.py
 create mode 100644 src/policyengine/countries/uk/regions.py
 create mode 100644 src/policyengine/countries/us/__init__.py
 create mode 100644 src/policyengine/countries/us/data/__init__.py
 create mode 100644 src/policyengine/countries/us/data/districts.py
 create mode 100644 src/policyengine/countries/us/data/places.py
 create mode 100644 src/policyengine/countries/us/data/states.py
 create mode 100644 src/policyengine/countries/us/regions.py
 create mode 100644 tests/fixtures/region_fixtures.py
 create mode 100644 tests/test_region.py
 create mode 100644 tests/test_uk_regions.py
 create mode 100644 tests/test_us_regions.py

diff --git a/src/policyengine/core/__init__.py b/src/policyengine/core/__init__.py
index 630620a0..fdd250ea 100644
--- a/src/policyengine/core/__init__.py
+++ b/src/policyengine/core/__init__.py
@@ -8,6 +8,9 @@
 from .parameter import Parameter as Parameter
 from .parameter_value import ParameterValue as ParameterValue
 from .policy import Policy as Policy
+from .region import Region as Region
+from .region import RegionRegistry as RegionRegistry
+from .region import RegionType as RegionType
 from .simulation import Simulation as Simulation
 from .tax_benefit_model import TaxBenefitModel as TaxBenefitModel
 from .tax_benefit_model_version import (
diff --git a/src/policyengine/core/region.py b/src/policyengine/core/region.py
new file mode 100644
index 00000000..3208b35e
--- /dev/null
+++ b/src/policyengine/core/region.py
@@ -0,0 +1,201 @@
+"""Region definitions for geographic simulations.
+
+This module provides the Region and RegionRegistry classes for defining
+geographic regions that a tax-benefit model supports. Regions can have:
+1. A dedicated dataset (e.g., US states, congressional districts)
+2. Filter from a parent region's dataset (e.g., US places/cities, UK countries)
+"""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field, PrivateAttr
+
+# Region type literals for US and UK
+USRegionType = Literal["national", "state", "congressional_district", "place"]
+UKRegionType = Literal["national", "country", "constituency", "local_authority"]
+RegionType = USRegionType | UKRegionType
+
+
+class Region(BaseModel):
+    """Geographic region for tax-benefit simulations.
+
+    Regions can either have:
+    1. A dedicated dataset (dataset_path is set, requires_filter is False)
+    2. Filter from a parent region's dataset (requires_filter is True)
+
+    The unique identifier is the code field, which uses a prefixed format:
+    - National: "us", "uk"
+    - State: "state/ca", "state/ny"
+    - Congressional District: "congressional_district/CA-01"
+    - Place: "place/NJ-57000"
+    - UK Country: "country/england"
+    - Constituency: "constituency/Sheffield Central"
+    - Local Authority: "local_authority/E09000001"
+    """
+
+    # Core identification
+    code: str = Field(
+        ...,
+        description="Unique region code with type prefix (e.g., 'state/ca', 'place/NJ-57000')",
+    )
+    label: str = Field(..., description="Human-readable label (e.g., 'California')")
+    region_type: RegionType = Field(
+        ..., description="Type of region (e.g., 'state', 'place')"
+    )
+
+    # Hierarchy
+    parent_code: str | None = Field(
+        default=None,
+        description="Code of parent region (e.g., 'us' for states, 'state/nj' for places in New Jersey)",
+    )
+
+    # Dataset configuration
+    dataset_path: str | None = Field(
+        default=None,
+        description="GCS path to dedicated dataset (e.g., 'gs://policyengine-us-data/states/CA.h5')",
+    )
+
+    # Filtering configuration (for regions that filter from parent datasets)
+    requires_filter: bool = Field(
+        default=False,
+        description="True if this region filters from a parent dataset rather than having its own",
+    )
+    filter_field: str | None = Field(
+        default=None,
+        description="Dataset field to filter on (e.g., 'place_fips', 'country')",
+    )
+    filter_value: str | None = Field(
+        default=None,
+        description="Value to match when filtering (defaults to code suffix if not set)",
+    )
+
+    # Metadata (primarily for US congressional districts)
+    state_code: str | None = Field(
+        default=None, description="Two-letter state code (e.g., 'CA', 'NJ')"
+    )
+    state_name: str | None = Field(
+        default=None, description="Full state name (e.g., 'California', 'New Jersey')"
+    )
+
+    def __hash__(self) -> int:
+        """Hash by code for use in sets and dict keys."""
+        return hash(self.code)
+
+    def __eq__(self, other: object) -> bool:
+        """Equality by code."""
+        if not isinstance(other, Region):
+            return False
+        return self.code == other.code
+
+
+class RegionRegistry(BaseModel):
+    """Registry of all regions for a country model.
+
+    Provides indexed lookups for regions by code and type.
+    Indices are rebuilt automatically after initialization.
+    """
+
+    country_id: str = Field(..., description="Country identifier (e.g., 'us', 'uk')")
+    regions: list[Region] = Field(default_factory=list)
+
+    # Private indexed lookups (excluded from serialization)
+    _by_code: dict[str, Region] = PrivateAttr(default_factory=dict)
+    _by_type: dict[str, list[Region]] = PrivateAttr(default_factory=dict)
+
+    def model_post_init(self, __context: object) -> None:
+        """Build lookup indices after initialization."""
+        self._rebuild_indices()
+
+    def _rebuild_indices(self) -> None:
+        """Rebuild all lookup indices from the regions list."""
+        self._by_code = {}
+        self._by_type = {}
+
+        for region in self.regions:
+            # Index by code
+            self._by_code[region.code] = region
+
+            # Index by type
+            if region.region_type not in self._by_type:
+                self._by_type[region.region_type] = []
+            self._by_type[region.region_type].append(region)
+
+    def add_region(self, region: Region) -> None:
+        """Add a region to the registry and update indices."""
+        self.regions.append(region)
+        self._by_code[region.code] = region
+        if region.region_type not in self._by_type:
+            self._by_type[region.region_type] = []
+        self._by_type[region.region_type].append(region)
+
+    def get(self, code: str) -> Region | None:
+        """Get a region by its code.
+
+        Args:
+            code: Region code (e.g., 'state/ca', 'place/NJ-57000')
+
+        Returns:
+            The Region if found, None otherwise
+        """
+        return self._by_code.get(code)
+
+    def get_by_type(self, region_type: str) -> list[Region]:
+        """Get all regions of a given type.
+
+        Args:
+            region_type: Type to filter by (e.g., 'state', 'place')
+
+        Returns:
+            List of regions with the given type
+        """
+        return self._by_type.get(region_type, [])
+
+    def get_national(self) -> Region | None:
+        """Get the national-level region.
+
+        Returns:
+            The national Region if found, None otherwise
+        """
+        national = self.get_by_type("national")
+        return national[0] if national else None
+
+    def get_children(self, parent_code: str) -> list[Region]:
+        """Get all regions with a given parent code.
+
+        Args:
+            parent_code: Parent region code to filter by
+
+        Returns:
+            List of regions with the given parent
+        """
+        return [r for r in self.regions if r.parent_code == parent_code]
+
+    def get_dataset_regions(self) -> list[Region]:
+        """Get all regions that have dedicated datasets.
+
+        Returns:
+            List of regions with dataset_path set and requires_filter False
+        """
+        return [
+            r for r in self.regions if r.dataset_path is not None and not r.requires_filter
+        ]
+
+    def get_filter_regions(self) -> list[Region]:
+        """Get all regions that require filtering from parent datasets.
+
+        Returns:
+            List of regions with requires_filter True
+        """
+        return [r for r in self.regions if r.requires_filter]
+
+    def __len__(self) -> int:
+        """Return the number of regions in the registry."""
+        return len(self.regions)
+
+    def __iter__(self):
+        """Iterate over regions."""
+        return iter(self.regions)
+
+    def __contains__(self, code: str) -> bool:
+        """Check if a region code exists in the registry."""
+        return code in self._by_code
diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py
index be9d5af3..e74f82c1 100644
--- a/src/policyengine/core/tax_benefit_model_version.py
+++ b/src/policyengine/core/tax_benefit_model_version.py
@@ -9,6 +9,7 @@
 if TYPE_CHECKING:
     from .parameter import Parameter
     from .parameter_value import ParameterValue
+    from .region import Region, RegionRegistry
     from .simulation import Simulation
     from .variable import Variable
 
@@ -25,6 +26,11 @@ class TaxBenefitModelVersion(BaseModel):
     variables: list["Variable"] = Field(default_factory=list)
     parameters: list["Parameter"] = Field(default_factory=list)
 
+    # Region registry for geographic simulations
+    region_registry: "RegionRegistry | None" = Field(
+        default=None, description="Registry of supported geographic regions"
+    )
+
     @property
     def parameter_values(self) -> list["ParameterValue"]:
         """Aggregate all parameter values from all parameters."""
@@ -83,6 +89,19 @@ def get_variable(self, name: str) -> "Variable":
             f"Variable '{name}' not found in {self.model.id} version {self.version}"
         )
 
+    def get_region(self, code: str) -> "Region | None":
+        """Get a region by its code.
+
+        Args:
+            code: Region code (e.g., 'state/ca', 'place/NJ-57000')
+
+        Returns:
+            The Region if found, None if not found or no region registry
+        """
+        if self.region_registry is None:
+            return None
+        return self.region_registry.get(code)
+
     def __repr__(self) -> str:
         # Give the id and version, and the number of variables, parameters, parameter values
         return f"<TaxBenefitModelVersion id={self.id} variables={len(self.variables)} parameters={len(self.parameters)} parameter_values={len(self.parameter_values)}>"
diff --git a/src/policyengine/countries/__init__.py b/src/policyengine/countries/__init__.py
new file mode 100644
index 00000000..3f647fd9
--- /dev/null
+++ b/src/policyengine/countries/__init__.py
@@ -0,0 +1,9 @@
+"""Country-specific region definitions.
+
+This package contains region registries for each supported country.
+"""
+
+from .uk.regions import uk_region_registry
+from .us.regions import us_region_registry
+
+__all__ = ["us_region_registry", "uk_region_registry"]
diff --git a/src/policyengine/countries/uk/__init__.py b/src/policyengine/countries/uk/__init__.py
new file mode 100644
index 00000000..b2c255d3
--- /dev/null
+++ b/src/policyengine/countries/uk/__init__.py
@@ -0,0 +1,5 @@
+"""UK country-specific region definitions."""
+
+from .regions import uk_region_registry
+
+__all__ = ["uk_region_registry"]
diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py
new file mode 100644
index 00000000..5e551755
--- /dev/null
+++ b/src/policyengine/countries/uk/regions.py
@@ -0,0 +1,176 @@
+"""UK region definitions.
+
+This module defines all UK geographic regions:
+- National (1)
+- Countries (4: England, Scotland, Wales, Northern Ireland)
+- Constituencies (loaded from CSV at runtime)
+- Local Authorities (loaded from CSV at runtime)
+
+Note: Constituencies and local authorities use weight adjustment rather than
+data filtering. They modify household_weight based on pre-computed weights
+from H5 files stored in GCS.
+"""
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from policyengine.core.region import Region, RegionRegistry
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+UK_DATA_BUCKET = "gs://policyengine-uk-data-private"
+
+# UK countries
+UK_COUNTRIES = {
+    "england": "England",
+    "scotland": "Scotland",
+    "wales": "Wales",
+    "northern_ireland": "Northern Ireland",
+}
+
+
+def _load_constituencies_from_csv() -> list[dict]:
+    """Load UK constituency data from CSV.
+
+    Constituencies are loaded from:
+    gs://policyengine-uk-data-private/constituencies_2024.csv
+
+    Returns:
+        List of dicts with 'code' and 'name' keys
+    """
+    try:
+        from policyengine_core.tools.google_cloud import download
+    except ImportError:
+        # If policyengine_core is not available, return empty list
+        return []
+
+    try:
+        csv_path = download(
+            gcs_bucket="policyengine-uk-data-private",
+            gcs_key="constituencies_2024.csv",
+        )
+        import pandas as pd
+
+        df = pd.read_csv(csv_path)
+        return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()]
+    except Exception:
+        # If download fails, return empty list
+        return []
+
+
+def _load_local_authorities_from_csv() -> list[dict]:
+    """Load UK local authority data from CSV.
+
+    Local authorities are loaded from:
+    gs://policyengine-uk-data-private/local_authorities_2021.csv
+
+    Returns:
+        List of dicts with 'code' and 'name' keys
+    """
+    try:
+        from policyengine_core.tools.google_cloud import download
+    except ImportError:
+        # If policyengine_core is not available, return empty list
+        return []
+
+    try:
+        csv_path = download(
+            gcs_bucket="policyengine-uk-data-private",
+            gcs_key="local_authorities_2021.csv",
+        )
+        import pandas as pd
+
+        df = pd.read_csv(csv_path)
+        return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()]
+    except Exception:
+        # If download fails, return empty list
+        return []
+
+
+def build_uk_region_registry(
+    include_constituencies: bool = False,
+    include_local_authorities: bool = False,
+) -> RegionRegistry:
+    """Build the UK region registry.
+
+    Args:
+        include_constituencies: If True, load and include constituencies from CSV.
+            Defaults to False to avoid GCS dependency at import time.
+        include_local_authorities: If True, load and include local authorities from CSV.
+            Defaults to False to avoid GCS dependency at import time.
+
+    Returns:
+        RegionRegistry containing:
+        - 1 national region
+        - 4 country regions
+        - Optionally: constituencies (if include_constituencies=True)
+        - Optionally: local authorities (if include_local_authorities=True)
+    """
+    regions: list[Region] = []
+
+    # 1. National region (has dedicated dataset)
+    regions.append(
+        Region(
+            code="uk",
+            label="United Kingdom",
+            region_type="national",
+            dataset_path=f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5",
+        )
+    )
+
+    # 2. Country regions (filter from national by 'country' variable)
+    for code, name in UK_COUNTRIES.items():
+        regions.append(
+            Region(
+                code=f"country/{code}",
+                label=name,
+                region_type="country",
+                parent_code="uk",
+                requires_filter=True,
+                filter_field="country",
+                filter_value=code.upper(),
+            )
+        )
+
+    # 3. Constituencies (optional, loaded from CSV)
+    # Note: These use weight adjustment, not data filtering
+    if include_constituencies:
+        constituencies = _load_constituencies_from_csv()
+        for const in constituencies:
+            regions.append(
+                Region(
+                    code=f"constituency/{const['code']}",
+                    label=const["name"],
+                    region_type="constituency",
+                    parent_code="uk",
+                    requires_filter=True,
+                    filter_field="household_weight",  # Uses weight adjustment
+                    filter_value=const["code"],
+                )
+            )
+
+    # 4. Local Authorities (optional, loaded from CSV)
+    # Note: These use weight adjustment, not data filtering
+    if include_local_authorities:
+        local_authorities = _load_local_authorities_from_csv()
+        for la in local_authorities:
+            regions.append(
+                Region(
+                    code=f"local_authority/{la['code']}",
+                    label=la["name"],
+                    region_type="local_authority",
+                    parent_code="uk",
+                    requires_filter=True,
+                    filter_field="household_weight",  # Uses weight adjustment
+                    filter_value=la["code"],
+                )
+            )
+
+    return RegionRegistry(country_id="uk", regions=regions)
+
+
+# Default registry with just core regions (national + countries)
+# To get full registry with constituencies/LAs, call:
+#   build_uk_region_registry(include_constituencies=True, include_local_authorities=True)
+uk_region_registry = build_uk_region_registry()
diff --git a/src/policyengine/countries/us/__init__.py b/src/policyengine/countries/us/__init__.py
new file mode 100644
index 00000000..68592459
--- /dev/null
+++ b/src/policyengine/countries/us/__init__.py
@@ -0,0 +1,5 @@
+"""US country-specific region definitions."""
+
+from .regions import us_region_registry
+
+__all__ = ["us_region_registry"]
diff --git a/src/policyengine/countries/us/data/__init__.py b/src/policyengine/countries/us/data/__init__.py
new file mode 100644
index 00000000..fb833b64
--- /dev/null
+++ b/src/policyengine/countries/us/data/__init__.py
@@ -0,0 +1,18 @@
+"""US geographic data definitions.
+
+This module provides static data for US geographic regions:
+- states.py: State abbreviations and full names
+- districts.py: Congressional district counts by state
+- places.py: US Census places (cities/towns over 100K population)
+"""
+
+from .districts import AT_LARGE_STATES, DISTRICT_COUNTS
+from .places import US_PLACES
+from .states import US_STATES
+
+__all__ = [
+    "US_STATES",
+    "DISTRICT_COUNTS",
+    "AT_LARGE_STATES",
+    "US_PLACES",
+]
diff --git a/src/policyengine/countries/us/data/districts.py b/src/policyengine/countries/us/data/districts.py
new file mode 100644
index 00000000..e77d5e62
--- /dev/null
+++ b/src/policyengine/countries/us/data/districts.py
@@ -0,0 +1,64 @@
+"""US congressional district definitions.
+
+Based on 2020 Census apportionment.
+Total: 435 voting representatives + 1 DC non-voting delegate = 436
+"""
+
+# Congressional district counts by state (2020 Census apportionment)
+# States with 1 district are "at-large"
+DISTRICT_COUNTS: dict[str, int] = {
+    "AL": 7,
+    "AK": 1,
+    "AZ": 9,
+    "AR": 4,
+    "CA": 52,
+    "CO": 8,
+    "CT": 5,
+    "DE": 1,
+    "DC": 1,  # Non-voting delegate
+    "FL": 28,
+    "GA": 14,
+    "HI": 2,
+    "ID": 2,
+    "IL": 17,
+    "IN": 9,
+    "IA": 4,
+    "KS": 4,
+    "KY": 6,
+    "LA": 6,
+    "ME": 2,
+    "MD": 8,
+    "MA": 9,
+    "MI": 13,
+    "MN": 8,
+    "MS": 4,
+    "MO": 8,
+    "MT": 2,
+    "NE": 3,
+    "NV": 4,
+    "NH": 2,
+    "NJ": 12,
+    "NM": 3,
+    "NY": 26,
+    "NC": 14,
+    "ND": 1,
+    "OH": 15,
+    "OK": 5,
+    "OR": 6,
+    "PA": 17,
+    "RI": 2,
+    "SC": 7,
+    "SD": 1,
+    "TN": 9,
+    "TX": 38,
+    "UT": 4,
+    "VT": 1,
+    "VA": 11,
+    "WA": 10,
+    "WV": 2,
+    "WI": 8,
+    "WY": 1,
+}
+
+# States with at-large congressional districts (single representative)
+AT_LARGE_STATES: set[str] = {"AK", "DE", "DC", "ND", "SD", "VT", "WY"}
diff --git a/src/policyengine/countries/us/data/places.py b/src/policyengine/countries/us/data/places.py
new file mode 100644
index 00000000..f5367eca
--- /dev/null
+++ b/src/policyengine/countries/us/data/places.py
@@ -0,0 +1,346 @@
+"""US Census places with population over 100,000.
+
+Source: US Census Bureau Population Estimates 2023
+Synced with policyengine-app-v2 main branch.
+"""
+
+# US cities/places with population over 100K (from Census data)
+# These filter from their parent state's dataset using place_fips
+# Total: 333 places
+US_PLACES: list[dict[str, str]] = [
+    {"fips": "03000", "name": "Anchorage", "state": "AK", "state_name": "Alaska"},
+    {"fips": "07000", "name": "Birmingham", "state": "AL", "state_name": "Alabama"},
+    {"fips": "37000", "name": "Huntsville", "state": "AL", "state_name": "Alabama"},
+    {"fips": "50000", "name": "Mobile", "state": "AL", "state_name": "Alabama"},
+    {"fips": "51000", "name": "Montgomery", "state": "AL", "state_name": "Alabama"},
+    {"fips": "77256", "name": "Tuscaloosa", "state": "AL", "state_name": "Alabama"},
+    {"fips": "23290", "name": "Fayetteville", "state": "AR", "state_name": "Arkansas"},
+    {"fips": "41000", "name": "Little Rock", "state": "AR", "state_name": "Arkansas"},
+    {"fips": "07940", "name": "Buckeye", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "12000", "name": "Chandler", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "27400", "name": "Gilbert", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "27820", "name": "Glendale", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "28380", "name": "Goodyear", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "46000", "name": "Mesa", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "54050", "name": "Peoria", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "55000", "name": "Phoenix", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "65000", "name": "Scottsdale", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "71510", "name": "Surprise", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "73000", "name": "Tempe", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "77000", "name": "Tucson", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "85540", "name": "Yuma", "state": "AZ", "state_name": "Arizona"},
+    {"fips": "02000", "name": "Anaheim", "state": "CA", "state_name": "California"},
+    {"fips": "02252", "name": "Antioch", "state": "CA", "state_name": "California"},
+    {"fips": "03526", "name": "Bakersfield", "state": "CA", "state_name": "California"},
+    {"fips": "06000", "name": "Berkeley", "state": "CA", "state_name": "California"},
+    {"fips": "08954", "name": "Burbank", "state": "CA", "state_name": "California"},
+    {"fips": "11194", "name": "Carlsbad", "state": "CA", "state_name": "California"},
+    {"fips": "13014", "name": "Chico", "state": "CA", "state_name": "California"},
+    {"fips": "13392", "name": "Chula Vista", "state": "CA", "state_name": "California"},
+    {"fips": "14218", "name": "Clovis", "state": "CA", "state_name": "California"},
+    {"fips": "16000", "name": "Concord", "state": "CA", "state_name": "California"},
+    {"fips": "16350", "name": "Corona", "state": "CA", "state_name": "California"},
+    {"fips": "16532", "name": "Costa Mesa", "state": "CA", "state_name": "California"},
+    {"fips": "19766", "name": "Downey", "state": "CA", "state_name": "California"},
+    {"fips": "21712", "name": "El Cajon", "state": "CA", "state_name": "California"},
+    {"fips": "22230", "name": "El Monte", "state": "CA", "state_name": "California"},
+    {"fips": "22020", "name": "Elk Grove", "state": "CA", "state_name": "California"},
+    {"fips": "22804", "name": "Escondido", "state": "CA", "state_name": "California"},
+    {"fips": "23182", "name": "Fairfield", "state": "CA", "state_name": "California"},
+    {"fips": "24680", "name": "Fontana", "state": "CA", "state_name": "California"},
+    {"fips": "26000", "name": "Fremont", "state": "CA", "state_name": "California"},
+    {"fips": "27000", "name": "Fresno", "state": "CA", "state_name": "California"},
+    {"fips": "28000", "name": "Fullerton", "state": "CA", "state_name": "California"},
+    {"fips": "29000", "name": "Garden Grove", "state": "CA", "state_name": "California"},
+    {"fips": "30000", "name": "Glendale", "state": "CA", "state_name": "California"},
+    {"fips": "33000", "name": "Hayward", "state": "CA", "state_name": "California"},
+    {"fips": "33434", "name": "Hesperia", "state": "CA", "state_name": "California"},
+    {"fips": "36000", "name": "Huntington Beach", "state": "CA", "state_name": "California"},
+    {"fips": "36546", "name": "Inglewood", "state": "CA", "state_name": "California"},
+    {"fips": "36770", "name": "Irvine", "state": "CA", "state_name": "California"},
+    {"fips": "37692", "name": "Jurupa Valley", "state": "CA", "state_name": "California"},
+    {"fips": "40130", "name": "Lancaster", "state": "CA", "state_name": "California"},
+    {"fips": "43000", "name": "Long Beach", "state": "CA", "state_name": "California"},
+    {"fips": "44000", "name": "Los Angeles", "state": "CA", "state_name": "California"},
+    {"fips": "46842", "name": "Menifee", "state": "CA", "state_name": "California"},
+    {"fips": "48354", "name": "Modesto", "state": "CA", "state_name": "California"},
+    {"fips": "49270", "name": "Moreno Valley", "state": "CA", "state_name": "California"},
+    {"fips": "50076", "name": "Murrieta", "state": "CA", "state_name": "California"},
+    {"fips": "53000", "name": "Oakland", "state": "CA", "state_name": "California"},
+    {"fips": "53322", "name": "Oceanside", "state": "CA", "state_name": "California"},
+    {"fips": "53896", "name": "Ontario", "state": "CA", "state_name": "California"},
+    {"fips": "53980", "name": "Orange", "state": "CA", "state_name": "California"},
+    {"fips": "54652", "name": "Oxnard", "state": "CA", "state_name": "California"},
+    {"fips": "55156", "name": "Palmdale", "state": "CA", "state_name": "California"},
+    {"fips": "56000", "name": "Pasadena", "state": "CA", "state_name": "California"},
+    {"fips": "58072", "name": "Pomona", "state": "CA", "state_name": "California"},
+    {"fips": "59451", "name": "Rancho Cucamonga", "state": "CA", "state_name": "California"},
+    {"fips": "60466", "name": "Rialto", "state": "CA", "state_name": "California"},
+    {"fips": "60620", "name": "Richmond", "state": "CA", "state_name": "California"},
+    {"fips": "62000", "name": "Riverside", "state": "CA", "state_name": "California"},
+    {"fips": "62938", "name": "Roseville", "state": "CA", "state_name": "California"},
+    {"fips": "64000", "name": "Sacramento", "state": "CA", "state_name": "California"},
+    {"fips": "64224", "name": "Salinas", "state": "CA", "state_name": "California"},
+    {"fips": "65000", "name": "San Bernardino", "state": "CA", "state_name": "California"},
+    {"fips": "66000", "name": "San Diego", "state": "CA", "state_name": "California"},
+    {"fips": "67000", "name": "San Francisco", "state": "CA", "state_name": "California"},
+    {"fips": "68000", "name": "San Jose", "state": "CA", "state_name": "California"},
+    {"fips": "68252", "name": "San Mateo", "state": "CA", "state_name": "California"},
+    {"fips": "69000", "name": "Santa Ana", "state": "CA", "state_name": "California"},
+    {"fips": "69084", "name": "Santa Clara", "state": "CA", "state_name": "California"},
+    {"fips": "69088", "name": "Santa Clarita", "state": "CA", "state_name": "California"},
+    {"fips": "69196", "name": "Santa Maria", "state": "CA", "state_name": "California"},
+    {"fips": "70098", "name": "Santa Rosa", "state": "CA", "state_name": "California"},
+    {"fips": "72016", "name": "Simi Valley", "state": "CA", "state_name": "California"},
+    {"fips": "75000", "name": "Stockton", "state": "CA", "state_name": "California"},
+    {"fips": "77000", "name": "Sunnyvale", "state": "CA", "state_name": "California"},
+    {"fips": "78120", "name": "Temecula", "state": "CA", "state_name": "California"},
+    {"fips": "78582", "name": "Thousand Oaks", "state": "CA", "state_name": "California"},
+    {"fips": "80000", "name": "Torrance", "state": "CA", "state_name": "California"},
+    {"fips": "81554", "name": "Vacaville", "state": "CA", "state_name": "California"},
+    {"fips": "81666", "name": "Vallejo", "state": "CA", "state_name": "California"},
+    {"fips": "65042", "name": "Ventura", "state": "CA", "state_name": "California"},
+    {"fips": "82590", "name": "Victorville", "state": "CA", "state_name": "California"},
+    {"fips": "82954", "name": "Visalia", "state": "CA", "state_name": "California"},
+    {"fips": "84200", "name": "West Covina", "state": "CA", "state_name": "California"},
+    {"fips": "03455", "name": "Arvada", "state": "CO", "state_name": "Colorado"},
+    {"fips": "04000", "name": "Aurora", "state": "CO", "state_name": "Colorado"},
+    {"fips": "07850", "name": "Boulder", "state": "CO", "state_name": "Colorado"},
+    {"fips": "12815", "name": "Centennial", "state": "CO", "state_name": "Colorado"},
+    {"fips": "16000", "name": "Colorado Springs", "state": "CO", "state_name": "Colorado"},
+    {"fips": "20000", "name": "Denver", "state": "CO", "state_name": "Colorado"},
+    {"fips": "27425", "name": "Fort Collins", "state": "CO", "state_name": "Colorado"},
+    {"fips": "32155", "name": "Greeley", "state": "CO", "state_name": "Colorado"},
+    {"fips": "43000", "name": "Lakewood", "state": "CO", "state_name": "Colorado"},
+    {"fips": "62000", "name": "Pueblo", "state": "CO", "state_name": "Colorado"},
+    {"fips": "77290", "name": "Thornton", "state": "CO", "state_name": "Colorado"},
+    {"fips": "83835", "name": "Westminster", "state": "CO", "state_name": "Colorado"},
+    {"fips": "08000", "name": "Bridgeport", "state": "CT", "state_name": "Connecticut"},
+    {"fips": "37000", "name": "Hartford", "state": "CT", "state_name": "Connecticut"},
+    {"fips": "52000", "name": "New Haven", "state": "CT", "state_name": "Connecticut"},
+    {"fips": "73000", "name": "Stamford", "state": "CT", "state_name": "Connecticut"},
+    {"fips": "80000", "name": "Waterbury", "state": "CT", "state_name": "Connecticut"},
+    {"fips": "50000", "name": "Washington", "state": "DC", "state_name": "District of Columbia"},
+    {"fips": "10275", "name": "Cape Coral", "state": "FL", "state_name": "Florida"},
+    {"fips": "12875", "name": "Clearwater", "state": "FL", "state_name": "Florida"},
+    {"fips": "14400", "name": "Coral Springs", "state": "FL", "state_name": "Florida"},
+    {"fips": "16475", "name": "Davie", "state": "FL", "state_name": "Florida"},
+    {"fips": "24000", "name": "Fort Lauderdale", "state": "FL", "state_name": "Florida"},
+    {"fips": "25175", "name": "Gainesville", "state": "FL", "state_name": "Florida"},
+    {"fips": "30000", "name": "Hialeah", "state": "FL", "state_name": "Florida"},
+    {"fips": "32000", "name": "Hollywood", "state": "FL", "state_name": "Florida"},
+    {"fips": "35000", "name": "Jacksonville", "state": "FL", "state_name": "Florida"},
+    {"fips": "38250", "name": "Lakeland", "state": "FL", "state_name": "Florida"},
+    {"fips": "45060", "name": "Miami Gardens", "state": "FL", "state_name": "Florida"},
+    {"fips": "45000", "name": "Miami", "state": "FL", "state_name": "Florida"},
+    {"fips": "45975", "name": "Miramar", "state": "FL", "state_name": "Florida"},
+    {"fips": "53000", "name": "Orlando", "state": "FL", "state_name": "Florida"},
+    {"fips": "54000", "name": "Palm Bay", "state": "FL", "state_name": "Florida"},
+    {"fips": "54200", "name": "Palm Coast", "state": "FL", "state_name": "Florida"},
+    {"fips": "55775", "name": "Pembroke Pines", "state": "FL", "state_name": "Florida"},
+    {"fips": "58050", "name": "Pompano Beach", "state": "FL", "state_name": "Florida"},
+    {"fips": "58715", "name": "Port St. Lucie", "state": "FL", "state_name": "Florida"},
+    {"fips": "63000", "name": "St. Petersburg", "state": "FL", "state_name": "Florida"},
+    {"fips": "70600", "name": "Tallahassee", "state": "FL", "state_name": "Florida"},
+    {"fips": "71000", "name": "Tampa", "state": "FL", "state_name": "Florida"},
+    {"fips": "76600", "name": "West Palm Beach", "state": "FL", "state_name": "Florida"},
+    {"fips": "03440", "name": "Athens-Clarke County", "state": "GA", "state_name": "Georgia"},
+    {"fips": "04000", "name": "Atlanta", "state": "GA", "state_name": "Georgia"},
+    {"fips": "04204", "name": "Augusta-Richmond County", "state": "GA", "state_name": "Georgia"},
+    {"fips": "19000", "name": "Columbus", "state": "GA", "state_name": "Georgia"},
+    {"fips": "49008", "name": "Macon-Bibb County", "state": "GA", "state_name": "Georgia"},
+    {"fips": "68516", "name": "Sandy Springs", "state": "GA", "state_name": "Georgia"},
+    {"fips": "69000", "name": "Savannah", "state": "GA", "state_name": "Georgia"},
+    {"fips": "72122", "name": "South Fulton", "state": "GA", "state_name": "Georgia"},
+    {"fips": "71550", "name": "Urban Honolulu", "state": "HI", "state_name": "Hawaii"},
+    {"fips": "12000", "name": "Cedar Rapids", "state": "IA", "state_name": "Iowa"},
+    {"fips": "19000", "name": "Davenport", "state": "IA", "state_name": "Iowa"},
+    {"fips": "21000", "name": "Des Moines", "state": "IA", "state_name": "Iowa"},
+    {"fips": "08830", "name": "Boise City", "state": "ID", "state_name": "Idaho"},
+    {"fips": "52120", "name": "Meridian", "state": "ID", "state_name": "Idaho"},
+    {"fips": "56260", "name": "Nampa", "state": "ID", "state_name": "Idaho"},
+    {"fips": "03012", "name": "Aurora", "state": "IL", "state_name": "Illinois"},
+    {"fips": "14000", "name": "Chicago", "state": "IL", "state_name": "Illinois"},
+    {"fips": "23074", "name": "Elgin", "state": "IL", "state_name": "Illinois"},
+    {"fips": "38570", "name": "Joliet", "state": "IL", "state_name": "Illinois"},
+    {"fips": "51622", "name": "Naperville", "state": "IL", "state_name": "Illinois"},
+    {"fips": "59000", "name": "Peoria", "state": "IL", "state_name": "Illinois"},
+    {"fips": "65000", "name": "Rockford", "state": "IL", "state_name": "Illinois"},
+    {"fips": "72000", "name": "Springfield", "state": "IL", "state_name": "Illinois"},
+    {"fips": "10342", "name": "Carmel", "state": "IN", "state_name": "Indiana"},
+    {"fips": "22000", "name": "Evansville", "state": "IN", "state_name": "Indiana"},
+    {"fips": "23278", "name": "Fishers", "state": "IN", "state_name": "Indiana"},
+    {"fips": "25000", "name": "Fort Wayne", "state": "IN", "state_name": "Indiana"},
+    {"fips": "36003", "name": "Indianapolis", "state": "IN", "state_name": "Indiana"},
+    {"fips": "71000", "name": "South Bend", "state": "IN", "state_name": "Indiana"},
+    {"fips": "36000", "name": "Kansas City", "state": "KS", "state_name": "Kansas"},
+    {"fips": "52575", "name": "Olathe", "state": "KS", "state_name": "Kansas"},
+    {"fips": "53775", "name": "Overland Park", "state": "KS", "state_name": "Kansas"},
+    {"fips": "71000", "name": "Topeka", "state": "KS", "state_name": "Kansas"},
+    {"fips": "79000", "name": "Wichita", "state": "KS", "state_name": "Kansas"},
+    {"fips": "46027", "name": "Lexington-Fayette", "state": "KY", "state_name": "Kentucky"},
+    {"fips": "48006", "name": "Louisville/Jefferson County", "state": "KY", "state_name": "Kentucky"},
+    {"fips": "05000", "name": "Baton Rouge", "state": "LA", "state_name": "Louisiana"},
+    {"fips": "40735", "name": "Lafayette", "state": "LA", "state_name": "Louisiana"},
+    {"fips": "55000", "name": "New Orleans", "state": "LA", "state_name": "Louisiana"},
+    {"fips": "70000", "name": "Shreveport", "state": "LA", "state_name": "Louisiana"},
+    {"fips": "07000", "name": "Boston", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "09000", "name": "Brockton", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "11000", "name": "Cambridge", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "37000", "name": "Lowell", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "37490", "name": "Lynn", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "45000", "name": "New Bedford", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "55745", "name": "Quincy", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "67000", "name": "Springfield", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "82000", "name": "Worcester", "state": "MA", "state_name": "Massachusetts"},
+    {"fips": "04000", "name": "Baltimore", "state": "MD", "state_name": "Maryland"},
+    {"fips": "03000", "name": "Ann Arbor", "state": "MI", "state_name": "Michigan"},
+    {"fips": "21000", "name": "Dearborn", "state": "MI", "state_name": "Michigan"},
+    {"fips": "22000", "name": "Detroit", "state": "MI", "state_name": "Michigan"},
+    {"fips": "34000", "name": "Grand Rapids", "state": "MI", "state_name": "Michigan"},
+    {"fips": "46000", "name": "Lansing", "state": "MI", "state_name": "Michigan"},
+    {"fips": "76460", "name": "Sterling Heights", "state": "MI", "state_name": "Michigan"},
+    {"fips": "84000", "name": "Warren", "state": "MI", "state_name": "Michigan"},
+    {"fips": "43000", "name": "Minneapolis", "state": "MN", "state_name": "Minnesota"},
+    {"fips": "54880", "name": "Rochester", "state": "MN", "state_name": "Minnesota"},
+    {"fips": "58000", "name": "St. Paul", "state": "MN", "state_name": "Minnesota"},
+    {"fips": "15670", "name": "Columbia", "state": "MO", "state_name": "Missouri"},
+    {"fips": "35000", "name": "Independence", "state": "MO", "state_name": "Missouri"},
+    {"fips": "38000", "name": "Kansas City", "state": "MO", "state_name": "Missouri"},
+    {"fips": "41348", "name": "Lee's Summit", "state": "MO", "state_name": "Missouri"},
+    {"fips": "70000", "name": "Springfield", "state": "MO", "state_name": "Missouri"},
+    {"fips": "65000", "name": "St. Louis", "state": "MO", "state_name": "Missouri"},
+    {"fips": "36000", "name": "Jackson", "state": "MS", "state_name": "Mississippi"},
+    {"fips": "06550", "name": "Billings", "state": "MT", "state_name": "Montana"},
+    {"fips": "10740", "name": "Cary", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "12000", "name": "Charlotte", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "14100", "name": "Concord", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "19000", "name": "Durham", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "22920", "name": "Fayetteville", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "28000", "name": "Greensboro", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "31400", "name": "High Point", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "55000", "name": "Raleigh", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "74440", "name": "Wilmington", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "75000", "name": "Winston-Salem", "state": "NC", "state_name": "North Carolina"},
+    {"fips": "25700", "name": "Fargo", "state": "ND", "state_name": "North Dakota"},
+    {"fips": "28000", "name": "Lincoln", "state": "NE", "state_name": "Nebraska"},
+    {"fips": "37000", "name": "Omaha", "state": "NE", "state_name": "Nebraska"},
+    {"fips": "45140", "name": "Manchester", "state": "NH", "state_name": "New Hampshire"},
+    {"fips": "21000", "name": "Elizabeth", "state": "NJ", "state_name": "New Jersey"},
+    {"fips": "36000", "name": "Jersey City", "state": "NJ", "state_name": "New Jersey"},
+    {"fips": "51000", "name": "Newark", "state": "NJ", "state_name": "New Jersey"},
+    {"fips": "57000", "name": "Paterson", "state": "NJ", "state_name": "New Jersey"},
+    {"fips": "02000", "name": "Albuquerque", "state": "NM", "state_name": "New Mexico"},
+    {"fips": "39380", "name": "Las Cruces", "state": "NM", "state_name": "New Mexico"},
+    {"fips": "63460", "name": "Rio Rancho", "state": "NM", "state_name": "New Mexico"},
+    {"fips": "31900", "name": "Henderson", "state": "NV", "state_name": "Nevada"},
+    {"fips": "40000", "name": "Las Vegas", "state": "NV", "state_name": "Nevada"},
+    {"fips": "51800", "name": "North Las Vegas", "state": "NV", "state_name": "Nevada"},
+    {"fips": "60600", "name": "Reno", "state": "NV", "state_name": "Nevada"},
+    {"fips": "68400", "name": "Sparks", "state": "NV", "state_name": "Nevada"},
+    {"fips": "01000", "name": "Albany", "state": "NY", "state_name": "New York"},
+    {"fips": "11000", "name": "Buffalo", "state": "NY", "state_name": "New York"},
+    {"fips": "51000", "name": "New York City", "state": "NY", "state_name": "New York"},
+    {"fips": "63000", "name": "Rochester", "state": "NY", "state_name": "New York"},
+    {"fips": "73000", "name": "Syracuse", "state": "NY", "state_name": "New York"},
+    {"fips": "84000", "name": "Yonkers", "state": "NY", "state_name": "New York"},
+    {"fips": "01000", "name": "Akron", "state": "OH", "state_name": "Ohio"},
+    {"fips": "15000", "name": "Cincinnati", "state": "OH", "state_name": "Ohio"},
+    {"fips": "16000", "name": "Cleveland", "state": "OH", "state_name": "Ohio"},
+    {"fips": "18000", "name": "Columbus", "state": "OH", "state_name": "Ohio"},
+    {"fips": "21000", "name": "Dayton", "state": "OH", "state_name": "Ohio"},
+    {"fips": "77000", "name": "Toledo", "state": "OH", "state_name": "Ohio"},
+    {"fips": "09050", "name": "Broken Arrow", "state": "OK", "state_name": "Oklahoma"},
+    {"fips": "52500", "name": "Norman", "state": "OK", "state_name": "Oklahoma"},
+    {"fips": "55000", "name": "Oklahoma City", "state": "OK", "state_name": "Oklahoma"},
+    {"fips": "75000", "name": "Tulsa", "state": "OK", "state_name": "Oklahoma"},
+    {"fips": "05800", "name": "Bend", "state": "OR", "state_name": "Oregon"},
+    {"fips": "23850", "name": "Eugene", "state": "OR", "state_name": "Oregon"},
+    {"fips": "31250", "name": "Gresham", "state": "OR", "state_name": "Oregon"},
+    {"fips": "34100", "name": "Hillsboro", "state": "OR", "state_name": "Oregon"},
+    {"fips": "59000", "name": "Portland", "state": "OR", "state_name": "Oregon"},
+    {"fips": "64900", "name": "Salem", "state": "OR", "state_name": "Oregon"},
+    {"fips": "02000", "name": "Allentown", "state": "PA", "state_name": "Pennsylvania"},
+    {"fips": "60000", "name": "Philadelphia", "state": "PA", "state_name": "Pennsylvania"},
+    {"fips": "61000", "name": "Pittsburgh", "state": "PA", "state_name": "Pennsylvania"},
+    {"fips": "59000", "name": "Providence", "state": "RI", "state_name": "Rhode Island"},
+    {"fips": "13330", "name": "Charleston", "state": "SC", "state_name": "South Carolina"},
+    {"fips": "16000", "name": "Columbia", "state": "SC", "state_name": "South Carolina"},
+    {"fips": "50875", "name": "North Charleston", "state": "SC", "state_name": "South Carolina"},
+    {"fips": "59020", "name": "Sioux Falls", "state": "SD", "state_name": "South Dakota"},
+    {"fips": "14000", "name": "Chattanooga", "state": "TN", "state_name": "Tennessee"},
+    {"fips": "15160", "name": "Clarksville", "state": "TN", "state_name": "Tennessee"},
+    {"fips": "40000", "name": "Knoxville", "state": "TN", "state_name": "Tennessee"},
+    {"fips": "48000", "name": "Memphis", "state": "TN", "state_name": "Tennessee"},
+    {"fips": "51560", "name": "Murfreesboro", "state": "TN", "state_name": "Tennessee"},
+
+# Extracted 332 places
+    {"fips": "52006", "name": "Nashville-Davidson", "state": "TN", "state_name": "Tennessee"},
+    {"fips": "01000", "name": "Abilene", "state": "TX", "state_name": "Texas"},
+    {"fips": "01924", "name": "Allen", "state": "TX", "state_name": "Texas"},
+    {"fips": "03000", "name": "Amarillo", "state": "TX", "state_name": "Texas"},
+    {"fips": "04000", "name": "Arlington", "state": "TX", "state_name": "Texas"},
+    {"fips": "05000", "name": "Austin", "state": "TX", "state_name": "Texas"},
+    {"fips": "07000", "name": "Beaumont", "state": "TX", "state_name": "Texas"},
+    {"fips": "10768", "name": "Brownsville", "state": "TX", "state_name": "Texas"},
+    {"fips": "13024", "name": "Carrollton", "state": "TX", "state_name": "Texas"},
+    {"fips": "15976", "name": "College Station", "state": "TX", "state_name": "Texas"},
+    {"fips": "16432", "name": "Conroe", "state": "TX", "state_name": "Texas"},
+    {"fips": "17000", "name": "Corpus Christi", "state": "TX", "state_name": "Texas"},
+    {"fips": "19000", "name": "Dallas", "state": "TX", "state_name": "Texas"},
+    {"fips": "19972", "name": "Denton", "state": "TX", "state_name": "Texas"},
+    {"fips": "22660", "name": "Edinburg", "state": "TX", "state_name": "Texas"},
+    {"fips": "24000", "name": "El Paso", "state": "TX", "state_name": "Texas"},
+    {"fips": "27000", "name": "Fort Worth", "state": "TX", "state_name": "Texas"},
+    {"fips": "27684", "name": "Frisco", "state": "TX", "state_name": "Texas"},
+    {"fips": "29000", "name": "Garland", "state": "TX", "state_name": "Texas"},
+    {"fips": "30464", "name": "Grand Prairie", "state": "TX", "state_name": "Texas"},
+    {"fips": "35000", "name": "Houston", "state": "TX", "state_name": "Texas"},
+    {"fips": "37000", "name": "Irving", "state": "TX", "state_name": "Texas"},
+    {"fips": "39148", "name": "Killeen", "state": "TX", "state_name": "Texas"},
+    {"fips": "41464", "name": "Laredo", "state": "TX", "state_name": "Texas"},
+    {"fips": "41980", "name": "League City", "state": "TX", "state_name": "Texas"},
+    {"fips": "42508", "name": "Lewisville", "state": "TX", "state_name": "Texas"},
+    {"fips": "45000", "name": "Lubbock", "state": "TX", "state_name": "Texas"},
+    {"fips": "45384", "name": "McAllen", "state": "TX", "state_name": "Texas"},
+    {"fips": "45744", "name": "McKinney", "state": "TX", "state_name": "Texas"},
+    {"fips": "47892", "name": "Mesquite", "state": "TX", "state_name": "Texas"},
+    {"fips": "48072", "name": "Midland", "state": "TX", "state_name": "Texas"},
+    {"fips": "50820", "name": "New Braunfels", "state": "TX", "state_name": "Texas"},
+    {"fips": "53388", "name": "Odessa", "state": "TX", "state_name": "Texas"},
+    {"fips": "56000", "name": "Pasadena", "state": "TX", "state_name": "Texas"},
+    {"fips": "56348", "name": "Pearland", "state": "TX", "state_name": "Texas"},
+    {"fips": "58016", "name": "Plano", "state": "TX", "state_name": "Texas"},
+    {"fips": "61796", "name": "Richardson", "state": "TX", "state_name": "Texas"},
+    {"fips": "63500", "name": "Round Rock", "state": "TX", "state_name": "Texas"},
+    {"fips": "65000", "name": "San Antonio", "state": "TX", "state_name": "Texas"},
+    {"fips": "70808", "name": "Sugar Land", "state": "TX", "state_name": "Texas"},
+    {"fips": "74144", "name": "Tyler", "state": "TX", "state_name": "Texas"},
+    {"fips": "76000", "name": "Waco", "state": "TX", "state_name": "Texas"},
+    {"fips": "79000", "name": "Wichita Falls", "state": "TX", "state_name": "Texas"},
+    {"fips": "62470", "name": "Provo", "state": "UT", "state_name": "Utah"},
+    {"fips": "67000", "name": "Salt Lake City", "state": "UT", "state_name": "Utah"},
+    {"fips": "65330", "name": "St. George", "state": "UT", "state_name": "Utah"},
+    {"fips": "82950", "name": "West Jordan", "state": "UT", "state_name": "Utah"},
+    {"fips": "83470", "name": "West Valley City", "state": "UT", "state_name": "Utah"},
+    {"fips": "01000", "name": "Alexandria", "state": "VA", "state_name": "Virginia"},
+    {"fips": "16000", "name": "Chesapeake", "state": "VA", "state_name": "Virginia"},
+    {"fips": "35000", "name": "Hampton", "state": "VA", "state_name": "Virginia"},
+    {"fips": "56000", "name": "Newport News", "state": "VA", "state_name": "Virginia"},
+    {"fips": "57000", "name": "Norfolk", "state": "VA", "state_name": "Virginia"},
+    {"fips": "67000", "name": "Richmond", "state": "VA", "state_name": "Virginia"},
+    {"fips": "76432", "name": "Suffolk", "state": "VA", "state_name": "Virginia"},
+    {"fips": "82000", "name": "Virginia Beach", "state": "VA", "state_name": "Virginia"},
+    {"fips": "05210", "name": "Bellevue", "state": "WA", "state_name": "Washington"},
+    {"fips": "22640", "name": "Everett", "state": "WA", "state_name": "Washington"},
+    {"fips": "35415", "name": "Kent", "state": "WA", "state_name": "Washington"},
+    {"fips": "57745", "name": "Renton", "state": "WA", "state_name": "Washington"},
+    {"fips": "63000", "name": "Seattle", "state": "WA", "state_name": "Washington"},
+    {"fips": "67167", "name": "Spokane Valley", "state": "WA", "state_name": "Washington"},
+    {"fips": "67000", "name": "Spokane", "state": "WA", "state_name": "Washington"},
+    {"fips": "70000", "name": "Tacoma", "state": "WA", "state_name": "Washington"},
+    {"fips": "74060", "name": "Vancouver", "state": "WA", "state_name": "Washington"},
+    {"fips": "31000", "name": "Green Bay", "state": "WI", "state_name": "Wisconsin"},
+    {"fips": "48000", "name": "Madison", "state": "WI", "state_name": "Wisconsin"},
+    {"fips": "53000", "name": "Milwaukee", "state": "WI", "state_name": "Wisconsin"},
+]
diff --git a/src/policyengine/countries/us/data/states.py b/src/policyengine/countries/us/data/states.py
new file mode 100644
index 00000000..1309201b
--- /dev/null
+++ b/src/policyengine/countries/us/data/states.py
@@ -0,0 +1,59 @@
+"""US state definitions.
+
+All 50 states plus District of Columbia.
+"""
+
+# All US states and territories with their full names
+US_STATES: dict[str, str] = {
+    "AL": "Alabama",
+    "AK": "Alaska",
+    "AZ": "Arizona",
+    "AR": "Arkansas",
+    "CA": "California",
+    "CO": "Colorado",
+    "CT": "Connecticut",
+    "DE": "Delaware",
+    "DC": "District of Columbia",
+    "FL": "Florida",
+    "GA": "Georgia",
+    "HI": "Hawaii",
+    "ID": "Idaho",
+    "IL": "Illinois",
+    "IN": "Indiana",
+    "IA": "Iowa",
+    "KS": "Kansas",
+    "KY": "Kentucky",
+    "LA": "Louisiana",
+    "ME": "Maine",
+    "MD": "Maryland",
+    "MA": "Massachusetts",
+    "MI": "Michigan",
+    "MN": "Minnesota",
+    "MS": "Mississippi",
+    "MO": "Missouri",
+    "MT": "Montana",
+    "NE": "Nebraska",
+    "NV": "Nevada",
+    "NH": "New Hampshire",
+    "NJ": "New Jersey",
+    "NM": "New Mexico",
+    "NY": "New York",
+    "NC": "North Carolina",
+    "ND": "North Dakota",
+    "OH": "Ohio",
+    "OK": "Oklahoma",
+    "OR": "Oregon",
+    "PA": "Pennsylvania",
+    "RI": "Rhode Island",
+    "SC": "South Carolina",
+    "SD": "South Dakota",
+    "TN": "Tennessee",
+    "TX": "Texas",
+    "UT": "Utah",
+    "VT": "Vermont",
+    "VA": "Virginia",
+    "WA": "Washington",
+    "WV": "West Virginia",
+    "WI": "Wisconsin",
+    "WY": "Wyoming",
+}
diff --git a/src/policyengine/countries/us/regions.py b/src/policyengine/countries/us/regions.py
new file mode 100644
index 00000000..6320578e
--- /dev/null
+++ b/src/policyengine/countries/us/regions.py
@@ -0,0 +1,106 @@
+"""US region registry builder.
+
+This module builds the complete US region registry from the data definitions
+in the data/ subdirectory:
+- data/states.py: State definitions
+- data/districts.py: Congressional district counts
+- data/places.py: Census places over 100K population
+"""
+
+from policyengine.core.region import Region, RegionRegistry
+
+from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES
+
+US_DATA_BUCKET = "gs://policyengine-us-data"
+
+
+def _ordinal(n: int) -> str:
+    """Return ordinal suffix for a number (1st, 2nd, 3rd, etc.)."""
+    if 11 <= n % 100 <= 13:
+        return f"{n}th"
+    return f"{n}" + {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
+
+
+def build_us_region_registry() -> RegionRegistry:
+    """Build the complete US region registry.
+
+    Returns:
+        RegionRegistry containing:
+        - 1 national region
+        - 51 state regions (50 states + DC)
+        - 436 congressional district regions (435 + DC delegate)
+        - 333 place/city regions (Census places over 100K population)
+    """
+    regions: list[Region] = []
+
+    # 1. National region (has dedicated dataset)
+    regions.append(
+        Region(
+            code="us",
+            label="United States",
+            region_type="national",
+            dataset_path=f"{US_DATA_BUCKET}/enhanced_cps_2024.h5",
+        )
+    )
+
+    # 2. State regions (each has dedicated dataset)
+    for abbrev, name in US_STATES.items():
+        regions.append(
+            Region(
+                code=f"state/{abbrev.lower()}",
+                label=name,
+                region_type="state",
+                parent_code="us",
+                dataset_path=f"{US_DATA_BUCKET}/states/{abbrev}.h5",
+                state_code=abbrev,
+                state_name=name,
+            )
+        )
+
+    # 3. Congressional district regions (each has dedicated dataset)
+    for state_abbrev, count in DISTRICT_COUNTS.items():
+        state_name = US_STATES[state_abbrev]
+        for i in range(1, count + 1):
+            district_code = f"{state_abbrev}-{i:02d}"
+
+            # Create appropriate label
+            if state_abbrev in AT_LARGE_STATES:
+                label = f"{state_name}'s at-large congressional district"
+            else:
+                label = f"{state_name}'s {_ordinal(i)} congressional district"
+
+            regions.append(
+                Region(
+                    code=f"congressional_district/{district_code}",
+                    label=label,
+                    region_type="congressional_district",
+                    parent_code=f"state/{state_abbrev.lower()}",
+                    dataset_path=f"{US_DATA_BUCKET}/districts/{district_code}.h5",
+                    state_code=state_abbrev,
+                    state_name=state_name,
+                )
+            )
+
+    # 4. Place/city regions (filter from state datasets)
+    for place in US_PLACES:
+        state_abbrev = place["state"]
+        fips = place["fips"]
+        regions.append(
+            Region(
+                code=f"place/{state_abbrev}-{fips}",
+                label=place["name"],
+                region_type="place",
+                parent_code=f"state/{state_abbrev.lower()}",
+                requires_filter=True,
+                filter_field="place_fips",
+                filter_value=fips,
+                state_code=state_abbrev,
+                state_name=place["state_name"],
+            )
+        )
+
+    return RegionRegistry(country_id="us", regions=regions)
+
+
+# Singleton instance for import
+us_region_registry = build_us_region_registry()
diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py
index fac5b91f..71cf78dc 100644
--- a/src/policyengine/tax_benefit_models/uk/model.py
+++ b/src/policyengine/tax_benefit_models/uk/model.py
@@ -123,6 +123,11 @@ def __init__(self, **kwargs: dict):
         from policyengine_core.enums import Enum
         from policyengine_uk.system import system
 
+        # Attach region registry
+        from policyengine.countries.uk.regions import uk_region_registry
+
+        self.region_registry = uk_region_registry
+
         self.id = f"{self.model.id}@{self.version}"
 
         for var_obj in system.variables.values():
diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py
index 487e4d51..b5191a19 100644
--- a/src/policyengine/tax_benefit_models/us/model.py
+++ b/src/policyengine/tax_benefit_models/us/model.py
@@ -116,6 +116,11 @@ def __init__(self, **kwargs: dict):
         from policyengine_core.enums import Enum
         from policyengine_us.system import system
 
+        # Attach region registry
+        from policyengine.countries.us.regions import us_region_registry
+
+        self.region_registry = us_region_registry
+
         self.id = f"{self.model.id}@{self.version}"
 
         for var_obj in system.variables.values():
diff --git a/tests/fixtures/region_fixtures.py b/tests/fixtures/region_fixtures.py
new file mode 100644
index 00000000..ca1adfe2
--- /dev/null
+++ b/tests/fixtures/region_fixtures.py
@@ -0,0 +1,127 @@
+"""Fixtures for Region and RegionRegistry tests."""
+
+import pytest
+
+from policyengine.core.region import Region, RegionRegistry
+
+
+def create_national_region(
+    country_code: str = "us",
+    label: str = "United States",
+    dataset_path: str = "gs://policyengine-us-data/enhanced_cps_2024.h5",
+) -> Region:
+    """Create a national region."""
+    return Region(
+        code=country_code,
+        label=label,
+        region_type="national",
+        dataset_path=dataset_path,
+    )
+
+
+def create_state_region(
+    state_code: str,
+    state_name: str,
+    parent_code: str = "us",
+    bucket: str = "gs://policyengine-us-data",
+) -> Region:
+    """Create a state region with dedicated dataset."""
+    return Region(
+        code=f"state/{state_code.lower()}",
+        label=state_name,
+        region_type="state",
+        parent_code=parent_code,
+        dataset_path=f"{bucket}/states/{state_code}.h5",
+        state_code=state_code,
+        state_name=state_name,
+    )
+
+
+def create_place_region(
+    state_code: str,
+    fips: str,
+    name: str,
+    state_name: str,
+) -> Region:
+    """Create a place region that filters from parent state."""
+    return Region(
+        code=f"place/{state_code}-{fips}",
+        label=name,
+        region_type="place",
+        parent_code=f"state/{state_code.lower()}",
+        requires_filter=True,
+        filter_field="place_fips",
+        filter_value=fips,
+        state_code=state_code,
+        state_name=state_name,
+    )
+
+
+def create_sample_us_registry() -> RegionRegistry:
+    """Create a minimal US-like registry for testing.
+
+    Contains:
+    - 1 national region (US)
+    - 2 state regions (CA, NY)
+    - 1 place region (Los Angeles)
+    """
+    return RegionRegistry(
+        country_id="us",
+        regions=[
+            create_national_region(),
+            create_state_region("CA", "California"),
+            create_state_region("NY", "New York"),
+            create_place_region("CA", "44000", "Los Angeles city", "California"),
+        ],
+    )
+
+
+# Pre-built fixtures for common test scenarios
+
+NATIONAL_US = create_national_region()
+
+STATE_CALIFORNIA = create_state_region("CA", "California")
+
+STATE_NEW_YORK = create_state_region("NY", "New York")
+
+PLACE_LOS_ANGELES = create_place_region("CA", "44000", "Los Angeles city", "California")
+
+SIMPLE_REGION = Region(
+    code="state/ca",
+    label="California",
+    region_type="state",
+)
+
+REGION_WITH_DATASET = Region(
+    code="state/ca",
+    label="California",
+    region_type="state",
+    parent_code="us",
+    dataset_path="gs://policyengine-us-data/states/CA.h5",
+    state_code="CA",
+    state_name="California",
+)
+
+FILTER_REGION = Region(
+    code="place/NJ-57000",
+    label="Paterson",
+    region_type="place",
+    parent_code="state/nj",
+    requires_filter=True,
+    filter_field="place_fips",
+    filter_value="57000",
+    state_code="NJ",
+    state_name="New Jersey",
+)
+
+
+@pytest.fixture
+def sample_registry() -> RegionRegistry:
+    """Pytest fixture for a sample US-like registry."""
+    return create_sample_us_registry()
+
+
+@pytest.fixture
+def empty_registry() -> RegionRegistry:
+    """Pytest fixture for an empty registry."""
+    return RegionRegistry(country_id="test", regions=[])
diff --git a/tests/test_models.py b/tests/test_models.py
index 3132abdf..e5b4484e 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -9,6 +9,21 @@
 class TestUKModel:
     """Tests for PolicyEngine UK model."""
 
+    def test_has_region_registry(self):
+        """UK model should have a region registry attached."""
+        assert uk_latest.region_registry is not None
+        assert uk_latest.region_registry.country_id == "uk"
+
+    def test_can_get_region_by_code(self):
+        """UK model should be able to look up regions by code."""
+        uk = uk_latest.get_region("uk")
+        assert uk is not None
+        assert uk.label == "United Kingdom"
+
+        england = uk_latest.get_region("country/england")
+        assert england is not None
+        assert england.label == "England"
+
     def test_has_hundreds_of_parameters(self):
         """UK model should have hundreds of parameters."""
         assert len(uk_latest.parameters) >= 100
@@ -65,6 +80,21 @@ def test__given_bracket_label__then_follows_expected_format(self):
 class TestUSModel:
     """Tests for PolicyEngine US model."""
 
+    def test_has_region_registry(self):
+        """US model should have a region registry attached."""
+        assert us_latest.region_registry is not None
+        assert us_latest.region_registry.country_id == "us"
+
+    def test_can_get_region_by_code(self):
+        """US model should be able to look up regions by code."""
+        us = us_latest.get_region("us")
+        assert us is not None
+        assert us.label == "United States"
+
+        ca = us_latest.get_region("state/ca")
+        assert ca is not None
+        assert ca.label == "California"
+
     def test_has_hundreds_of_parameters(self):
         """US model should have hundreds of parameters."""
         assert len(us_latest.parameters) >= 100
diff --git a/tests/test_region.py b/tests/test_region.py
new file mode 100644
index 00000000..6669ec1c
--- /dev/null
+++ b/tests/test_region.py
@@ -0,0 +1,246 @@
+"""Tests for Region and RegionRegistry classes."""
+
+import pytest
+
+from policyengine.core.region import Region, RegionRegistry
+
+from tests.fixtures.region_fixtures import (
+    FILTER_REGION,
+    REGION_WITH_DATASET,
+    SIMPLE_REGION,
+    create_sample_us_registry,
+    create_state_region,
+    sample_registry,
+)
+
+
+class TestRegion:
+    """Tests for the Region class."""
+
+    def test__given_required_fields__then_region_created(self):
+        """Given: Required fields (code, label, region_type)
+        When: Creating a Region
+        Then: Region is created with those values
+        """
+        # Given
+        code = "state/ca"
+        label = "California"
+        region_type = "state"
+
+        # When
+        region = Region(code=code, label=label, region_type=region_type)
+
+        # Then
+        assert region.code == code
+        assert region.label == label
+        assert region.region_type == region_type
+
+    def test__given_dataset_path__then_region_has_dedicated_dataset(self):
+        """Given: Region with dataset_path specified
+        When: Creating the Region
+        Then: Region has dataset_path and requires_filter is False
+        """
+        # Given (using fixture)
+        region = REGION_WITH_DATASET
+
+        # Then
+        assert region.dataset_path == "gs://policyengine-us-data/states/CA.h5"
+        assert region.parent_code == "us"
+        assert region.state_code == "CA"
+        assert not region.requires_filter
+
+    def test__given_filter_configuration__then_region_requires_filter(self):
+        """Given: Region with requires_filter=True and filter fields
+        When: Creating the Region
+        Then: Region is configured for filtering from parent
+        """
+        # Given (using fixture)
+        region = FILTER_REGION
+
+        # Then
+        assert region.requires_filter is True
+        assert region.filter_field == "place_fips"
+        assert region.filter_value == "57000"
+
+    def test__given_same_codes__then_regions_are_equal(self):
+        """Given: Two regions with the same code
+        When: Comparing them
+        Then: They are equal regardless of other fields
+        """
+        # Given
+        region1 = Region(code="state/ca", label="California", region_type="state")
+        region2 = Region(code="state/ca", label="California (different)", region_type="state")
+        region3 = Region(code="state/ny", label="New York", region_type="state")
+
+        # Then
+        assert region1 == region2
+        assert region1 != region3
+
+    def test__given_region__then_can_use_as_dict_key_or_in_set(self):
+        """Given: Multiple regions
+        When: Using them in sets or as dict keys
+        Then: Regions with same code are deduplicated
+        """
+        # Given
+        region1 = Region(code="state/ca", label="California", region_type="state")
+        region2 = Region(code="state/ca", label="California (duplicate)", region_type="state")
+        region3 = Region(code="state/ny", label="New York", region_type="state")
+
+        # When
+        region_set = {region1, region2, region3}
+        region_dict = {region1: "first", region3: "third"}
+
+        # Then
+        assert len(region_set) == 2  # region1 and region2 are same
+        assert region_dict[region2] == "first"  # region2 == region1
+
+
+class TestRegionRegistry:
+    """Tests for the RegionRegistry class."""
+
+    def test__given_registry_with_regions__then_length_is_correct(self, sample_registry):
+        """Given: Registry with 4 regions
+        When: Checking length
+        Then: Length is 4
+        """
+        # Then
+        assert len(sample_registry) == 4
+
+    def test__given_registry__then_can_iterate_over_regions(self, sample_registry):
+        """Given: Registry with regions
+        When: Iterating
+        Then: All region codes are accessible
+        """
+        # When
+        codes = [r.code for r in sample_registry]
+
+        # Then
+        assert "us" in codes
+        assert "state/ca" in codes
+        assert "place/CA-44000" in codes
+
+    def test__given_existing_code__then_code_is_in_registry(self, sample_registry):
+        """Given: Registry with state/ca
+        When: Checking if code exists
+        Then: Returns True for existing, False for missing
+        """
+        # Then
+        assert "state/ca" in sample_registry
+        assert "state/tx" not in sample_registry
+
+    def test__given_valid_code__then_get_returns_region(self, sample_registry):
+        """Given: Registry with state/ca
+        When: Getting by code
+        Then: Returns Region for existing, None for missing
+        """
+        # When
+        ca = sample_registry.get("state/ca")
+        missing = sample_registry.get("state/tx")
+
+        # Then
+        assert ca is not None
+        assert ca.label == "California"
+        assert missing is None
+
+    def test__given_type__then_get_by_type_returns_matching_regions(self, sample_registry):
+        """Given: Registry with 2 states and 1 place
+        When: Getting by type
+        Then: Returns correct regions for each type
+        """
+        # When
+        states = sample_registry.get_by_type("state")
+        places = sample_registry.get_by_type("place")
+        counties = sample_registry.get_by_type("county")
+
+        # Then
+        assert len(states) == 2
+        assert all(r.region_type == "state" for r in states)
+        assert len(places) == 1
+        assert counties == []
+
+    def test__given_registry__then_get_national_returns_national_region(self, sample_registry):
+        """Given: Registry with national region
+        When: Getting national
+        Then: Returns the national region
+        """
+        # When
+        national = sample_registry.get_national()
+
+        # Then
+        assert national is not None
+        assert national.code == "us"
+        assert national.region_type == "national"
+
+    def test__given_parent_code__then_get_children_returns_child_regions(self, sample_registry):
+        """Given: Registry with states under "us"
+        When: Getting children of "us"
+        Then: Returns state regions
+        """
+        # When
+        us_children = sample_registry.get_children("us")
+        ca_children = sample_registry.get_children("state/ca")
+
+        # Then
+        assert len(us_children) == 2  # CA and NY states
+        assert len(ca_children) == 1  # Los Angeles place
+        assert ca_children[0].code == "place/CA-44000"
+
+    def test__given_registry__then_get_dataset_regions_returns_regions_with_datasets(
+        self, sample_registry
+    ):
+        """Given: Registry with 3 dataset regions (US, CA, NY)
+        When: Getting dataset regions
+        Then: Returns only regions with dataset_path and no filter
+        """
+        # When
+        dataset_regions = sample_registry.get_dataset_regions()
+
+        # Then
+        assert len(dataset_regions) == 3  # us, ca, ny
+        assert all(r.dataset_path is not None for r in dataset_regions)
+        assert all(not r.requires_filter for r in dataset_regions)
+
+    def test__given_registry__then_get_filter_regions_returns_regions_requiring_filter(
+        self, sample_registry
+    ):
+        """Given: Registry with 1 filter region (Los Angeles)
+        When: Getting filter regions
+        Then: Returns only regions with requires_filter=True
+        """
+        # When
+        filter_regions = sample_registry.get_filter_regions()
+
+        # Then
+        assert len(filter_regions) == 1
+        assert filter_regions[0].code == "place/CA-44000"
+
+    def test__given_registry__then_can_add_region_dynamically(self, sample_registry):
+        """Given: Registry with 4 regions
+        When: Adding a new region
+        Then: Registry contains 5 regions and new region is indexed
+        """
+        # Given
+        new_region = create_state_region("TX", "Texas")
+
+        # When
+        sample_registry.add_region(new_region)
+
+        # Then
+        assert len(sample_registry) == 5
+        assert "state/tx" in sample_registry
+        assert sample_registry.get("state/tx").label == "Texas"
+        assert len(sample_registry.get_by_type("state")) == 3
+
+    def test__given_empty_registry__then_lookups_return_empty_results(self):
+        """Given: Empty registry
+        When: Performing lookups
+        Then: Returns empty results without errors
+        """
+        # Given
+        registry = RegionRegistry(country_id="test", regions=[])
+
+        # Then
+        assert len(registry) == 0
+        assert registry.get("anything") is None
+        assert registry.get_national() is None
+        assert registry.get_by_type("state") == []
diff --git a/tests/test_uk_regions.py b/tests/test_uk_regions.py
new file mode 100644
index 00000000..b13026f0
--- /dev/null
+++ b/tests/test_uk_regions.py
@@ -0,0 +1,227 @@
+"""Tests for UK region definitions."""
+
+from policyengine.countries.uk.regions import (
+    UK_COUNTRIES,
+    UK_DATA_BUCKET,
+    build_uk_region_registry,
+    uk_region_registry,
+)
+
+
+class TestUKCountries:
+    """Tests for UK country definitions."""
+
+    def test__given_uk_countries__then_has_four_entries(self):
+        """Given: UK_COUNTRIES dictionary
+        When: Checking length
+        Then: Contains 4 countries
+        """
+        # Then
+        assert len(UK_COUNTRIES) == 4
+
+    def test__given_uk_countries__then_all_countries_present(self):
+        """Given: UK_COUNTRIES dictionary
+        When: Checking for countries
+        Then: England, Scotland, Wales, NI are all present
+        """
+        # Then
+        assert "england" in UK_COUNTRIES
+        assert "scotland" in UK_COUNTRIES
+        assert "wales" in UK_COUNTRIES
+        assert "northern_ireland" in UK_COUNTRIES
+
+    def test__given_uk_countries__then_labels_capitalized(self):
+        """Given: UK_COUNTRIES dictionary
+        When: Checking labels
+        Then: Labels are properly capitalized
+        """
+        # Then
+        assert UK_COUNTRIES["england"] == "England"
+        assert UK_COUNTRIES["scotland"] == "Scotland"
+        assert UK_COUNTRIES["wales"] == "Wales"
+        assert UK_COUNTRIES["northern_ireland"] == "Northern Ireland"
+
+
+class TestUKRegionRegistry:
+    """Tests for the UK region registry."""
+
+    def test__given_uk_registry__then_country_id_is_uk(self):
+        """Given: UK region registry
+        When: Checking country_id
+        Then: Value is "uk"
+        """
+        # Then
+        assert uk_region_registry.country_id == "uk"
+
+    def test__given_uk_registry__then_has_national_region(self):
+        """Given: UK region registry
+        When: Getting national region
+        Then: Returns UK with correct dataset path
+        """
+        # When
+        national = uk_region_registry.get_national()
+
+        # Then
+        assert national is not None
+        assert national.code == "uk"
+        assert national.label == "United Kingdom"
+        assert national.region_type == "national"
+        assert national.dataset_path == f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5"
+        assert not national.requires_filter
+
+    def test__given_uk_registry__then_has_four_country_regions(self):
+        """Given: UK region registry
+        When: Getting country regions
+        Then: Contains 4 countries
+        """
+        # When
+        countries = uk_region_registry.get_by_type("country")
+
+        # Then
+        assert len(countries) == 4
+
+    def test__given_england_region__then_filters_from_national(self):
+        """Given: England country region
+        When: Checking its properties
+        Then: Filters from national with country field
+        """
+        # When
+        england = uk_region_registry.get("country/england")
+
+        # Then
+        assert england is not None
+        assert england.label == "England"
+        assert england.region_type == "country"
+        assert england.parent_code == "uk"
+        assert england.requires_filter
+        assert england.filter_field == "country"
+        assert england.filter_value == "ENGLAND"
+        assert england.dataset_path is None
+
+    def test__given_scotland_region__then_filters_from_national(self):
+        """Given: Scotland country region
+        When: Checking its properties
+        Then: Filters from national with correct value
+        """
+        # When
+        scotland = uk_region_registry.get("country/scotland")
+
+        # Then
+        assert scotland is not None
+        assert scotland.label == "Scotland"
+        assert scotland.requires_filter
+        assert scotland.filter_value == "SCOTLAND"
+
+    def test__given_wales_region__then_filters_from_national(self):
+        """Given: Wales country region
+        When: Checking its properties
+        Then: Filters from national with correct value
+        """
+        # When
+        wales = uk_region_registry.get("country/wales")
+
+        # Then
+        assert wales is not None
+        assert wales.label == "Wales"
+        assert wales.requires_filter
+        assert wales.filter_value == "WALES"
+
+    def test__given_northern_ireland_region__then_filters_from_national(self):
+        """Given: Northern Ireland country region
+        When: Checking its properties
+        Then: Filters from national with correct value
+        """
+        # When
+        ni = uk_region_registry.get("country/northern_ireland")
+
+        # Then
+        assert ni is not None
+        assert ni.label == "Northern Ireland"
+        assert ni.requires_filter
+        assert ni.filter_value == "NORTHERN_IRELAND"
+
+    def test__given_uk_national__then_children_are_countries(self):
+        """Given: UK national region
+        When: Getting its children
+        Then: All children are country regions
+        """
+        # When
+        uk_children = uk_region_registry.get_children("uk")
+
+        # Then
+        assert len(uk_children) == 4
+        assert all(c.region_type == "country" for c in uk_children)
+
+    def test__given_uk_registry__then_only_national_has_dataset(self):
+        """Given: UK region registry
+        When: Getting dataset regions
+        Then: Only national has dedicated dataset
+        """
+        # When
+        dataset_regions = uk_region_registry.get_dataset_regions()
+
+        # Then
+        assert len(dataset_regions) == 1
+        assert dataset_regions[0].code == "uk"
+
+    def test__given_uk_registry__then_filter_regions_are_countries(self):
+        """Given: UK region registry
+        When: Getting filter regions
+        Then: All 4 countries require filter
+        """
+        # When
+        filter_regions = uk_region_registry.get_filter_regions()
+
+        # Then
+        assert len(filter_regions) == 4
+        assert all(r.region_type == "country" for r in filter_regions)
+
+    def test__given_default_registry__then_has_5_regions(self):
+        """Given: Default UK registry
+        When: Counting regions
+        Then: Contains 1 national + 4 countries = 5
+        """
+        # Then
+        assert len(uk_region_registry) == 5
+
+
+class TestUKRegionRegistryBuilder:
+    """Tests for UK registry builder with optional regions."""
+
+    def test__given_builder_without_optional_regions__then_returns_5_regions(self):
+        """Given: build_uk_region_registry with optional regions disabled
+        When: Building registry
+        Then: Returns 5 base regions only
+        """
+        # When
+        registry = build_uk_region_registry(
+            include_constituencies=False,
+            include_local_authorities=False,
+        )
+
+        # Then
+        assert len(registry) == 5  # national + 4 countries
+
+    def test__given_builder__then_accepts_include_constituencies_flag(self):
+        """Given: build_uk_region_registry
+        When: Passing include_constituencies=False
+        Then: Returns registry without constituencies
+        """
+        # When
+        registry = build_uk_region_registry(include_constituencies=False)
+
+        # Then
+        assert registry is not None
+        assert len(registry.get_by_type("constituency")) == 0
+
+    def test__given_builder__then_accepts_include_local_authorities_flag(self):
+        """Given: build_uk_region_registry
+        When: Passing include_local_authorities=False
+        Then: Returns registry without local authorities
+        """
+        # When
+        registry = build_uk_region_registry(include_local_authorities=False)
+
+        # Then
+        assert registry is not None
+        assert len(registry.get_by_type("local_authority")) == 0
diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py
new file mode 100644
index 00000000..5f7d39b7
--- /dev/null
+++ b/tests/test_us_regions.py
@@ -0,0 +1,252 @@
+"""Tests for US region definitions."""
+
+from policyengine.countries.us.data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES
+from policyengine.countries.us.regions import US_DATA_BUCKET, us_region_registry
+
+
+class TestUSStates:
+    """Tests for US state definitions."""
+
+    def test__given_us_states_dict__then_has_51_entries(self):
+        """Given: US_STATES dictionary
+        When: Checking length
+        Then: Contains 50 states + DC = 51 entries
+        """
+        # Then
+        assert len(US_STATES) == 51
+
+    def test__given_us_states__then_includes_dc(self):
+        """Given: US_STATES dictionary
+        When: Looking for DC
+        Then: DC is present with full name
+        """
+        # Then
+        assert "DC" in US_STATES
+        assert US_STATES["DC"] == "District of Columbia"
+
+    def test__given_us_states__then_includes_major_states(self):
+        """Given: US_STATES dictionary
+        When: Checking for major states
+        Then: CA, TX, NY, FL are present
+        """
+        # Then
+        assert "CA" in US_STATES
+        assert "TX" in US_STATES
+        assert "NY" in US_STATES
+        assert "FL" in US_STATES
+
+
+class TestUSDistrictCounts:
+    """Tests for congressional district counts."""
+
+    def test__given_district_counts__then_every_state_has_count(self):
+        """Given: DISTRICT_COUNTS dictionary
+        When: Checking against US_STATES
+        Then: Every state has a district count
+        """
+        # When/Then
+        for state in US_STATES:
+            assert state in DISTRICT_COUNTS, f"Missing district count for {state}"
+
+    def test__given_district_counts__then_total_is_436(self):
+        """Given: DISTRICT_COUNTS dictionary
+        When: Summing all values
+        Then: Total is 435 voting + 1 DC delegate = 436
+        """
+        # When
+        total = sum(DISTRICT_COUNTS.values())
+
+        # Then
+        assert total == 436
+
+    def test__given_district_counts__then_dc_has_one(self):
+        """Given: DISTRICT_COUNTS for DC
+        When: Checking value
+        Then: DC has 1 at-large delegate
+        """
+        # Then
+        assert DISTRICT_COUNTS["DC"] == 1
+
+    def test__given_district_counts__then_large_states_have_many(self):
+        """Given: DISTRICT_COUNTS for large states
+        When: Checking CA and TX
+        Then: CA >= 50, TX >= 35 (based on 2020 census)
+        """
+        # Then
+        assert DISTRICT_COUNTS["CA"] >= 50  # CA has 52
+        assert DISTRICT_COUNTS["TX"] >= 35  # TX has 38
+
+
+class TestUSRegionRegistry:
+    """Tests for the US region registry."""
+
+    def test__given_us_registry__then_country_id_is_us(self):
+        """Given: US region registry
+        When: Checking country_id
+        Then: Value is "us"
+        """
+        # Then
+        assert us_region_registry.country_id == "us"
+
+    def test__given_us_registry__then_has_national_region(self):
+        """Given: US region registry
+        When: Getting national region
+        Then: Returns US with correct dataset path
+        """
+        # When
+        national = us_region_registry.get_national()
+
+        # Then
+        assert national is not None
+        assert national.code == "us"
+        assert national.label == "United States"
+        assert national.region_type == "national"
+        assert national.dataset_path == f"{US_DATA_BUCKET}/enhanced_cps_2024.h5"
+
+    def test__given_us_registry__then_has_51_states(self):
+        """Given: US region registry
+        When: Getting state regions
+        Then: Contains 51 states (including DC)
+        """
+        # When
+        states = us_region_registry.get_by_type("state")
+
+        # Then
+        assert len(states) == 51
+
+    def test__given_california_region__then_has_correct_format(self):
+        """Given: California state region
+        When: Checking its properties
+        Then: Has correct code, label, dataset path, and metadata
+        """
+        # When
+        ca = us_region_registry.get("state/ca")
+
+        # Then
+        assert ca is not None
+        assert ca.label == "California"
+        assert ca.region_type == "state"
+        assert ca.parent_code == "us"
+        assert ca.dataset_path == f"{US_DATA_BUCKET}/states/CA.h5"
+        assert ca.state_code == "CA"
+        assert ca.state_name == "California"
+        assert not ca.requires_filter
+
+    def test__given_us_registry__then_has_436_congressional_districts(self):
+        """Given: US region registry
+        When: Getting congressional district regions
+        Then: Contains 436 districts
+        """
+        # When
+        districts = us_region_registry.get_by_type("congressional_district")
+
+        # Then
+        assert len(districts) == 436
+
+    def test__given_ca_first_district__then_has_correct_format(self):
+        """Given: California's 1st congressional district
+        When: Checking its properties
+        Then: Has correct code, label, and dataset path
+        """
+        # When
+        ca01 = us_region_registry.get("congressional_district/CA-01")
+
+        # Then
+        assert ca01 is not None
+        assert "California" in ca01.label
+        assert "1st" in ca01.label.lower() or "1 " in ca01.label
+        assert ca01.region_type == "congressional_district"
+        assert ca01.parent_code == "state/ca"
+        assert ca01.dataset_path == f"{US_DATA_BUCKET}/districts/CA-01.h5"
+        assert ca01.state_code == "CA"
+        assert not ca01.requires_filter
+
+    def test__given_dc_district__then_is_at_large(self):
+        """Given: DC's congressional district
+        When: Checking its properties
+        Then: Is labeled as at-large
+        """
+        # When
+        dc_al = us_region_registry.get("congressional_district/DC-01")
+
+        # Then
+        assert dc_al is not None
+        assert dc_al.label == "District of Columbia's at-large congressional district"
+        assert dc_al.parent_code == "state/dc"
+
+    def test__given_us_registry__then_has_places(self):
+        """Given: US region registry
+        When: Getting place regions
+        Then: Contains 100+ large cities
+        """
+        # When
+        places = us_region_registry.get_by_type("place")
+
+        # Then
+        assert len(places) >= 100
+
+    def test__given_los_angeles_region__then_has_correct_format(self):
+        """Given: Los Angeles place region
+        When: Checking its properties
+        Then: Requires filter with place_fips field
+        """
+        # When
+        la = us_region_registry.get("place/CA-44000")
+
+        # Then
+        assert la is not None
+        assert "Los Angeles" in la.label
+        assert la.region_type == "place"
+        assert la.parent_code == "state/ca"
+        assert la.requires_filter
+        assert la.filter_field == "place_fips"
+        assert la.filter_value == "44000"
+        assert la.state_code == "CA"
+        assert la.dataset_path is None  # No dedicated dataset
+
+    def test__given_california__then_children_include_districts_and_places(self):
+        """Given: California state region
+        When: Getting its children
+        Then: Includes all 52 districts and 10+ places
+        """
+        # When
+        ca_children = us_region_registry.get_children("state/ca")
+        district_children = [c for c in ca_children if c.region_type == "congressional_district"]
+        place_children = [c for c in ca_children if c.region_type == "place"]
+
+        # Then
+        assert len(district_children) == DISTRICT_COUNTS["CA"]
+        assert len(place_children) >= 10  # CA has many large cities
+
+    def test__given_us_registry__then_dataset_regions_is_488(self):
+        """Given: US region registry
+        When: Getting regions with datasets
+        Then: Returns 1 national + 51 states + 436 districts = 488
+        """
+        # When
+        dataset_regions = us_region_registry.get_dataset_regions()
+
+        # Then
+        assert len(dataset_regions) == 488
+
+    def test__given_us_registry__then_filter_regions_are_all_places(self):
+        """Given: US region registry
+        When: Getting regions requiring filter
+        Then: All are place regions
+        """
+        # When
+        filter_regions = us_region_registry.get_filter_regions()
+
+        # Then
+        assert all(r.region_type == "place" for r in filter_regions)
+
+    def test__given_us_registry__then_total_exceeds_588(self):
+        """Given: US region registry
+        When: Counting all regions
+        Then: Total is at least 488 (dataset) + 100 (places)
+        """
+        # When
+        total = len(us_region_registry)
+
+        # Then
+        assert total >= 488 + 100
diff --git a/uv.lock b/uv.lock
index 55fe0b9c..8cf942d2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -775,15 +775,15 @@ wheels = [
 
 [[package]]
 name = "microdf-python"
-version = "1.0.2"
+version = "1.2.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
     { name = "pandas" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/77/25/55c2b0495ae4c3142d61f1283d675494aac4c254e40ecf1ea4b337a051c7/microdf_python-1.0.2.tar.gz", hash = "sha256:5c845974d485598a7002c151f58ec7438e94c04954fc8fdea9238265e7bf02f5", size = 14826, upload-time = "2025-07-24T12:21:08.17Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/96/6f9f37f79f2c6440d91036a7bf8111dd4b983c577a7e96d45bf3ca4171f3/microdf_python-1.2.1.tar.gz", hash = "sha256:d4f58e4e0c21decd0c6d425b115db8acc72751c558f48d2a1c3a6619f168a94a", size = 19641, upload-time = "2026-01-25T13:40:57.147Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9c/1a/aac40a7e58de4133a9cc7630913a8b8e6c76326288b168cbb47f7714c4fd/microdf_python-1.0.2-py3-none-any.whl", hash = "sha256:f7883785e4557d1c8822dbf0d69d7eeab9399f8e67a9bdb716f74554c7580ae7", size = 15823, upload-time = "2025-07-24T12:21:07.356Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/2e/375ab71f8d91b691597247b186a4d7b156d2ed975dfb00450e560beae747/microdf_python-1.2.1-py3-none-any.whl", hash = "sha256:3c3d318a82cba7db0ef5a72e8a73a6072fe0bc7a9cb59b1eac01a26ee8c82e7c", size = 20879, upload-time = "2026-01-25T13:40:55.877Z" },
 ]
 
 [[package]]
@@ -1080,7 +1080,7 @@ wheels = [
 
 [[package]]
 name = "policyengine"
-version = "3.1.14"
+version = "3.1.16"
 source = { editable = "." }
 dependencies = [
     { name = "microdf-python" },
@@ -1124,12 +1124,12 @@ requires-dist = [
     { name = "furo", marker = "extra == 'dev'" },
     { name = "itables", marker = "extra == 'dev'" },
     { name = "jupyter-book", marker = "extra == 'dev'" },
-    { name = "microdf-python" },
+    { name = "microdf-python", specifier = ">=1.2.1" },
     { name = "pandas", specifier = ">=2.0.0" },
     { name = "plotly", specifier = ">=5.0.0" },
-    { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.10" },
-    { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.10" },
-    { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.10" },
+    { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.23.6" },
+    { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.23.6" },
+    { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.23.6" },
     { name = "policyengine-uk", marker = "extra == 'dev'", specifier = ">=2.51.0" },
     { name = "policyengine-uk", marker = "extra == 'uk'", specifier = ">=2.51.0" },
     { name = "policyengine-us", marker = "extra == 'dev'", specifier = ">=1.213.1" },
@@ -1146,7 +1146,7 @@ provides-extras = ["uk", "us", "dev"]
 
 [[package]]
 name = "policyengine-core"
-version = "3.20.0"
+version = "3.23.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dpath" },
@@ -1166,9 +1166,9 @@ dependencies = [
     { name = "standard-imghdr" },
     { name = "wheel" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d3/d7/cd4ae165221b3d5630a5c95e6df0a10be06d461b6545552a5f4a11c40907/policyengine_core-3.20.0.tar.gz", hash = "sha256:10c428467c8629861986f356f7f13ff8bf23ec907961779cf9f6add63f147fdf", size = 159655, upload-time = "2025-08-12T15:54:35.437Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5d/de/5bc5b02626703ea7d288c84c474ec51e823aa726d55ebabafe7c85e7285f/policyengine_core-3.23.6.tar.gz", hash = "sha256:81bb4057f5d6380f2d7f1af2fe4932bd3bd37fdfda7b841f7ee38b30aa5cc8e6", size = 163499, upload-time = "2026-01-25T14:04:43.233Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/47/9cd4a2cfd675c5267dc905b2f23313b37df734f94f65490ca083422db39f/policyengine_core-3.20.0-py3-none-any.whl", hash = "sha256:c802edf10059242f7c03b54f7e8c78703ae053defcfe4ec75c677ed0714c07a6", size = 220871, upload-time = "2025-08-12T15:54:33.799Z" },
+    { url = "https://files.pythonhosted.org/packages/82/7a/b47b239fb0a85a36b36b47e7665db981800fcac3384aeec6dadf92a9e548/policyengine_core-3.23.6-py3-none-any.whl", hash = "sha256:f0834107335de6f2452d39e53db7a72a57088ed26d3703a4c4eaded55a4e7bce", size = 225309, upload-time = "2026-01-25T14:04:41.844Z" },
 ]
 
 [[package]]

From 0c7e9eab7d2ab0205b0f1ee5f519c0b76a5194e0 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 10 Feb 2026 17:00:40 +0100
Subject: [PATCH 2/8] style: Fix linting and formatting issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove unused imports
- Sort imports correctly
- Format code with ruff

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/policyengine/core/region.py              |   19 +-
 src/policyengine/countries/uk/regions.py     |   13 +-
 src/policyengine/countries/us/data/places.py | 2061 +++++++++++++++---
 src/policyengine/utils/parameter_labels.py   |   11 +-
 tests/fixtures/region_fixtures.py            |    8 +-
 tests/test_pandas3_compatibility.py          |    1 +
 tests/test_parameter_labels.py               |   50 +-
 tests/test_region.py                         |   62 +-
 tests/test_uk_regions.py                     |    9 +-
 tests/test_us_regions.py                     |   28 +-
 10 files changed, 1913 insertions(+), 349 deletions(-)

diff --git a/src/policyengine/core/region.py b/src/policyengine/core/region.py
index 3208b35e..36cbc71e 100644
--- a/src/policyengine/core/region.py
+++ b/src/policyengine/core/region.py
@@ -12,7 +12,9 @@
 
 # Region type literals for US and UK
 USRegionType = Literal["national", "state", "congressional_district", "place"]
-UKRegionType = Literal["national", "country", "constituency", "local_authority"]
+UKRegionType = Literal[
+    "national", "country", "constituency", "local_authority"
+]
 RegionType = USRegionType | UKRegionType
 
 
@@ -38,7 +40,9 @@ class Region(BaseModel):
         ...,
         description="Unique region code with type prefix (e.g., 'state/ca', 'place/NJ-57000')",
     )
-    label: str = Field(..., description="Human-readable label (e.g., 'California')")
+    label: str = Field(
+        ..., description="Human-readable label (e.g., 'California')"
+    )
     region_type: RegionType = Field(
         ..., description="Type of region (e.g., 'state', 'place')"
     )
@@ -74,7 +78,8 @@ class Region(BaseModel):
         default=None, description="Two-letter state code (e.g., 'CA', 'NJ')"
     )
     state_name: str | None = Field(
-        default=None, description="Full state name (e.g., 'California', 'New Jersey')"
+        default=None,
+        description="Full state name (e.g., 'California', 'New Jersey')",
     )
 
     def __hash__(self) -> int:
@@ -95,7 +100,9 @@ class RegionRegistry(BaseModel):
     Indices are rebuilt automatically after initialization.
     """
 
-    country_id: str = Field(..., description="Country identifier (e.g., 'us', 'uk')")
+    country_id: str = Field(
+        ..., description="Country identifier (e.g., 'us', 'uk')"
+    )
     regions: list[Region] = Field(default_factory=list)
 
     # Private indexed lookups (excluded from serialization)
@@ -177,7 +184,9 @@ def get_dataset_regions(self) -> list[Region]:
             List of regions with dataset_path set and requires_filter False
         """
         return [
-            r for r in self.regions if r.dataset_path is not None and not r.requires_filter
+            r
+            for r in self.regions
+            if r.dataset_path is not None and not r.requires_filter
         ]
 
     def get_filter_regions(self) -> list[Region]:
diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py
index 5e551755..340a29e8 100644
--- a/src/policyengine/countries/uk/regions.py
+++ b/src/policyengine/countries/uk/regions.py
@@ -11,13 +11,12 @@
 from H5 files stored in GCS.
 """
 
-from pathlib import Path
 from typing import TYPE_CHECKING
 
 from policyengine.core.region import Region, RegionRegistry
 
 if TYPE_CHECKING:
-    import pandas as pd
+    pass
 
 UK_DATA_BUCKET = "gs://policyengine-uk-data-private"
 
@@ -53,7 +52,10 @@ def _load_constituencies_from_csv() -> list[dict]:
         import pandas as pd
 
         df = pd.read_csv(csv_path)
-        return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()]
+        return [
+            {"code": row["code"], "name": row["name"]}
+            for _, row in df.iterrows()
+        ]
     except Exception:
         # If download fails, return empty list
         return []
@@ -82,7 +84,10 @@ def _load_local_authorities_from_csv() -> list[dict]:
         import pandas as pd
 
         df = pd.read_csv(csv_path)
-        return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()]
+        return [
+            {"code": row["code"], "name": row["name"]}
+            for _, row in df.iterrows()
+        ]
     except Exception:
         # If download fails, return empty list
         return []
diff --git a/src/policyengine/countries/us/data/places.py b/src/policyengine/countries/us/data/places.py
index f5367eca..a5fe632f 100644
--- a/src/policyengine/countries/us/data/places.py
+++ b/src/policyengine/countries/us/data/places.py
@@ -8,339 +8,1808 @@
 # These filter from their parent state's dataset using place_fips
 # Total: 333 places
 US_PLACES: list[dict[str, str]] = [
-    {"fips": "03000", "name": "Anchorage", "state": "AK", "state_name": "Alaska"},
-    {"fips": "07000", "name": "Birmingham", "state": "AL", "state_name": "Alabama"},
-    {"fips": "37000", "name": "Huntsville", "state": "AL", "state_name": "Alabama"},
-    {"fips": "50000", "name": "Mobile", "state": "AL", "state_name": "Alabama"},
-    {"fips": "51000", "name": "Montgomery", "state": "AL", "state_name": "Alabama"},
-    {"fips": "77256", "name": "Tuscaloosa", "state": "AL", "state_name": "Alabama"},
-    {"fips": "23290", "name": "Fayetteville", "state": "AR", "state_name": "Arkansas"},
-    {"fips": "41000", "name": "Little Rock", "state": "AR", "state_name": "Arkansas"},
-    {"fips": "07940", "name": "Buckeye", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "12000", "name": "Chandler", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "27400", "name": "Gilbert", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "27820", "name": "Glendale", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "28380", "name": "Goodyear", "state": "AZ", "state_name": "Arizona"},
+    {
+        "fips": "03000",
+        "name": "Anchorage",
+        "state": "AK",
+        "state_name": "Alaska",
+    },
+    {
+        "fips": "07000",
+        "name": "Birmingham",
+        "state": "AL",
+        "state_name": "Alabama",
+    },
+    {
+        "fips": "37000",
+        "name": "Huntsville",
+        "state": "AL",
+        "state_name": "Alabama",
+    },
+    {
+        "fips": "50000",
+        "name": "Mobile",
+        "state": "AL",
+        "state_name": "Alabama",
+    },
+    {
+        "fips": "51000",
+        "name": "Montgomery",
+        "state": "AL",
+        "state_name": "Alabama",
+    },
+    {
+        "fips": "77256",
+        "name": "Tuscaloosa",
+        "state": "AL",
+        "state_name": "Alabama",
+    },
+    {
+        "fips": "23290",
+        "name": "Fayetteville",
+        "state": "AR",
+        "state_name": "Arkansas",
+    },
+    {
+        "fips": "41000",
+        "name": "Little Rock",
+        "state": "AR",
+        "state_name": "Arkansas",
+    },
+    {
+        "fips": "07940",
+        "name": "Buckeye",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "12000",
+        "name": "Chandler",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "27400",
+        "name": "Gilbert",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "27820",
+        "name": "Glendale",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "28380",
+        "name": "Goodyear",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
     {"fips": "46000", "name": "Mesa", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "54050", "name": "Peoria", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "55000", "name": "Phoenix", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "65000", "name": "Scottsdale", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "71510", "name": "Surprise", "state": "AZ", "state_name": "Arizona"},
+    {
+        "fips": "54050",
+        "name": "Peoria",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "55000",
+        "name": "Phoenix",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "65000",
+        "name": "Scottsdale",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
+    {
+        "fips": "71510",
+        "name": "Surprise",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
     {"fips": "73000", "name": "Tempe", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "77000", "name": "Tucson", "state": "AZ", "state_name": "Arizona"},
+    {
+        "fips": "77000",
+        "name": "Tucson",
+        "state": "AZ",
+        "state_name": "Arizona",
+    },
     {"fips": "85540", "name": "Yuma", "state": "AZ", "state_name": "Arizona"},
-    {"fips": "02000", "name": "Anaheim", "state": "CA", "state_name": "California"},
-    {"fips": "02252", "name": "Antioch", "state": "CA", "state_name": "California"},
-    {"fips": "03526", "name": "Bakersfield", "state": "CA", "state_name": "California"},
-    {"fips": "06000", "name": "Berkeley", "state": "CA", "state_name": "California"},
-    {"fips": "08954", "name": "Burbank", "state": "CA", "state_name": "California"},
-    {"fips": "11194", "name": "Carlsbad", "state": "CA", "state_name": "California"},
-    {"fips": "13014", "name": "Chico", "state": "CA", "state_name": "California"},
-    {"fips": "13392", "name": "Chula Vista", "state": "CA", "state_name": "California"},
-    {"fips": "14218", "name": "Clovis", "state": "CA", "state_name": "California"},
-    {"fips": "16000", "name": "Concord", "state": "CA", "state_name": "California"},
-    {"fips": "16350", "name": "Corona", "state": "CA", "state_name": "California"},
-    {"fips": "16532", "name": "Costa Mesa", "state": "CA", "state_name": "California"},
-    {"fips": "19766", "name": "Downey", "state": "CA", "state_name": "California"},
-    {"fips": "21712", "name": "El Cajon", "state": "CA", "state_name": "California"},
-    {"fips": "22230", "name": "El Monte", "state": "CA", "state_name": "California"},
-    {"fips": "22020", "name": "Elk Grove", "state": "CA", "state_name": "California"},
-    {"fips": "22804", "name": "Escondido", "state": "CA", "state_name": "California"},
-    {"fips": "23182", "name": "Fairfield", "state": "CA", "state_name": "California"},
-    {"fips": "24680", "name": "Fontana", "state": "CA", "state_name": "California"},
-    {"fips": "26000", "name": "Fremont", "state": "CA", "state_name": "California"},
-    {"fips": "27000", "name": "Fresno", "state": "CA", "state_name": "California"},
-    {"fips": "28000", "name": "Fullerton", "state": "CA", "state_name": "California"},
-    {"fips": "29000", "name": "Garden Grove", "state": "CA", "state_name": "California"},
-    {"fips": "30000", "name": "Glendale", "state": "CA", "state_name": "California"},
-    {"fips": "33000", "name": "Hayward", "state": "CA", "state_name": "California"},
-    {"fips": "33434", "name": "Hesperia", "state": "CA", "state_name": "California"},
-    {"fips": "36000", "name": "Huntington Beach", "state": "CA", "state_name": "California"},
-    {"fips": "36546", "name": "Inglewood", "state": "CA", "state_name": "California"},
-    {"fips": "36770", "name": "Irvine", "state": "CA", "state_name": "California"},
-    {"fips": "37692", "name": "Jurupa Valley", "state": "CA", "state_name": "California"},
-    {"fips": "40130", "name": "Lancaster", "state": "CA", "state_name": "California"},
-    {"fips": "43000", "name": "Long Beach", "state": "CA", "state_name": "California"},
-    {"fips": "44000", "name": "Los Angeles", "state": "CA", "state_name": "California"},
-    {"fips": "46842", "name": "Menifee", "state": "CA", "state_name": "California"},
-    {"fips": "48354", "name": "Modesto", "state": "CA", "state_name": "California"},
-    {"fips": "49270", "name": "Moreno Valley", "state": "CA", "state_name": "California"},
-    {"fips": "50076", "name": "Murrieta", "state": "CA", "state_name": "California"},
-    {"fips": "53000", "name": "Oakland", "state": "CA", "state_name": "California"},
-    {"fips": "53322", "name": "Oceanside", "state": "CA", "state_name": "California"},
-    {"fips": "53896", "name": "Ontario", "state": "CA", "state_name": "California"},
-    {"fips": "53980", "name": "Orange", "state": "CA", "state_name": "California"},
-    {"fips": "54652", "name": "Oxnard", "state": "CA", "state_name": "California"},
-    {"fips": "55156", "name": "Palmdale", "state": "CA", "state_name": "California"},
-    {"fips": "56000", "name": "Pasadena", "state": "CA", "state_name": "California"},
-    {"fips": "58072", "name": "Pomona", "state": "CA", "state_name": "California"},
-    {"fips": "59451", "name": "Rancho Cucamonga", "state": "CA", "state_name": "California"},
-    {"fips": "60466", "name": "Rialto", "state": "CA", "state_name": "California"},
-    {"fips": "60620", "name": "Richmond", "state": "CA", "state_name": "California"},
-    {"fips": "62000", "name": "Riverside", "state": "CA", "state_name": "California"},
-    {"fips": "62938", "name": "Roseville", "state": "CA", "state_name": "California"},
-    {"fips": "64000", "name": "Sacramento", "state": "CA", "state_name": "California"},
-    {"fips": "64224", "name": "Salinas", "state": "CA", "state_name": "California"},
-    {"fips": "65000", "name": "San Bernardino", "state": "CA", "state_name": "California"},
-    {"fips": "66000", "name": "San Diego", "state": "CA", "state_name": "California"},
-    {"fips": "67000", "name": "San Francisco", "state": "CA", "state_name": "California"},
-    {"fips": "68000", "name": "San Jose", "state": "CA", "state_name": "California"},
-    {"fips": "68252", "name": "San Mateo", "state": "CA", "state_name": "California"},
-    {"fips": "69000", "name": "Santa Ana", "state": "CA", "state_name": "California"},
-    {"fips": "69084", "name": "Santa Clara", "state": "CA", "state_name": "California"},
-    {"fips": "69088", "name": "Santa Clarita", "state": "CA", "state_name": "California"},
-    {"fips": "69196", "name": "Santa Maria", "state": "CA", "state_name": "California"},
-    {"fips": "70098", "name": "Santa Rosa", "state": "CA", "state_name": "California"},
-    {"fips": "72016", "name": "Simi Valley", "state": "CA", "state_name": "California"},
-    {"fips": "75000", "name": "Stockton", "state": "CA", "state_name": "California"},
-    {"fips": "77000", "name": "Sunnyvale", "state": "CA", "state_name": "California"},
-    {"fips": "78120", "name": "Temecula", "state": "CA", "state_name": "California"},
-    {"fips": "78582", "name": "Thousand Oaks", "state": "CA", "state_name": "California"},
-    {"fips": "80000", "name": "Torrance", "state": "CA", "state_name": "California"},
-    {"fips": "81554", "name": "Vacaville", "state": "CA", "state_name": "California"},
-    {"fips": "81666", "name": "Vallejo", "state": "CA", "state_name": "California"},
-    {"fips": "65042", "name": "Ventura", "state": "CA", "state_name": "California"},
-    {"fips": "82590", "name": "Victorville", "state": "CA", "state_name": "California"},
-    {"fips": "82954", "name": "Visalia", "state": "CA", "state_name": "California"},
-    {"fips": "84200", "name": "West Covina", "state": "CA", "state_name": "California"},
-    {"fips": "03455", "name": "Arvada", "state": "CO", "state_name": "Colorado"},
-    {"fips": "04000", "name": "Aurora", "state": "CO", "state_name": "Colorado"},
-    {"fips": "07850", "name": "Boulder", "state": "CO", "state_name": "Colorado"},
-    {"fips": "12815", "name": "Centennial", "state": "CO", "state_name": "Colorado"},
-    {"fips": "16000", "name": "Colorado Springs", "state": "CO", "state_name": "Colorado"},
-    {"fips": "20000", "name": "Denver", "state": "CO", "state_name": "Colorado"},
-    {"fips": "27425", "name": "Fort Collins", "state": "CO", "state_name": "Colorado"},
-    {"fips": "32155", "name": "Greeley", "state": "CO", "state_name": "Colorado"},
-    {"fips": "43000", "name": "Lakewood", "state": "CO", "state_name": "Colorado"},
-    {"fips": "62000", "name": "Pueblo", "state": "CO", "state_name": "Colorado"},
-    {"fips": "77290", "name": "Thornton", "state": "CO", "state_name": "Colorado"},
-    {"fips": "83835", "name": "Westminster", "state": "CO", "state_name": "Colorado"},
-    {"fips": "08000", "name": "Bridgeport", "state": "CT", "state_name": "Connecticut"},
-    {"fips": "37000", "name": "Hartford", "state": "CT", "state_name": "Connecticut"},
-    {"fips": "52000", "name": "New Haven", "state": "CT", "state_name": "Connecticut"},
-    {"fips": "73000", "name": "Stamford", "state": "CT", "state_name": "Connecticut"},
-    {"fips": "80000", "name": "Waterbury", "state": "CT", "state_name": "Connecticut"},
-    {"fips": "50000", "name": "Washington", "state": "DC", "state_name": "District of Columbia"},
-    {"fips": "10275", "name": "Cape Coral", "state": "FL", "state_name": "Florida"},
-    {"fips": "12875", "name": "Clearwater", "state": "FL", "state_name": "Florida"},
-    {"fips": "14400", "name": "Coral Springs", "state": "FL", "state_name": "Florida"},
+    {
+        "fips": "02000",
+        "name": "Anaheim",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "02252",
+        "name": "Antioch",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "03526",
+        "name": "Bakersfield",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "06000",
+        "name": "Berkeley",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "08954",
+        "name": "Burbank",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "11194",
+        "name": "Carlsbad",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "13014",
+        "name": "Chico",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "13392",
+        "name": "Chula Vista",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "14218",
+        "name": "Clovis",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "16000",
+        "name": "Concord",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "16350",
+        "name": "Corona",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "16532",
+        "name": "Costa Mesa",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "19766",
+        "name": "Downey",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "21712",
+        "name": "El Cajon",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "22230",
+        "name": "El Monte",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "22020",
+        "name": "Elk Grove",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "22804",
+        "name": "Escondido",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "23182",
+        "name": "Fairfield",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "24680",
+        "name": "Fontana",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "26000",
+        "name": "Fremont",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "27000",
+        "name": "Fresno",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "28000",
+        "name": "Fullerton",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "29000",
+        "name": "Garden Grove",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "30000",
+        "name": "Glendale",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "33000",
+        "name": "Hayward",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "33434",
+        "name": "Hesperia",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "36000",
+        "name": "Huntington Beach",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "36546",
+        "name": "Inglewood",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "36770",
+        "name": "Irvine",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "37692",
+        "name": "Jurupa Valley",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "40130",
+        "name": "Lancaster",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "43000",
+        "name": "Long Beach",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "44000",
+        "name": "Los Angeles",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "46842",
+        "name": "Menifee",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "48354",
+        "name": "Modesto",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "49270",
+        "name": "Moreno Valley",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "50076",
+        "name": "Murrieta",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "53000",
+        "name": "Oakland",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "53322",
+        "name": "Oceanside",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "53896",
+        "name": "Ontario",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "53980",
+        "name": "Orange",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "54652",
+        "name": "Oxnard",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "55156",
+        "name": "Palmdale",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "56000",
+        "name": "Pasadena",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "58072",
+        "name": "Pomona",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "59451",
+        "name": "Rancho Cucamonga",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "60466",
+        "name": "Rialto",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "60620",
+        "name": "Richmond",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "62000",
+        "name": "Riverside",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "62938",
+        "name": "Roseville",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "64000",
+        "name": "Sacramento",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "64224",
+        "name": "Salinas",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "65000",
+        "name": "San Bernardino",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "66000",
+        "name": "San Diego",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "67000",
+        "name": "San Francisco",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "68000",
+        "name": "San Jose",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "68252",
+        "name": "San Mateo",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "69000",
+        "name": "Santa Ana",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "69084",
+        "name": "Santa Clara",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "69088",
+        "name": "Santa Clarita",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "69196",
+        "name": "Santa Maria",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "70098",
+        "name": "Santa Rosa",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "72016",
+        "name": "Simi Valley",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "75000",
+        "name": "Stockton",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "77000",
+        "name": "Sunnyvale",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "78120",
+        "name": "Temecula",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "78582",
+        "name": "Thousand Oaks",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "80000",
+        "name": "Torrance",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "81554",
+        "name": "Vacaville",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "81666",
+        "name": "Vallejo",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "65042",
+        "name": "Ventura",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "82590",
+        "name": "Victorville",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "82954",
+        "name": "Visalia",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "84200",
+        "name": "West Covina",
+        "state": "CA",
+        "state_name": "California",
+    },
+    {
+        "fips": "03455",
+        "name": "Arvada",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "04000",
+        "name": "Aurora",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "07850",
+        "name": "Boulder",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "12815",
+        "name": "Centennial",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "16000",
+        "name": "Colorado Springs",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "20000",
+        "name": "Denver",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "27425",
+        "name": "Fort Collins",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "32155",
+        "name": "Greeley",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "43000",
+        "name": "Lakewood",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "62000",
+        "name": "Pueblo",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "77290",
+        "name": "Thornton",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "83835",
+        "name": "Westminster",
+        "state": "CO",
+        "state_name": "Colorado",
+    },
+    {
+        "fips": "08000",
+        "name": "Bridgeport",
+        "state": "CT",
+        "state_name": "Connecticut",
+    },
+    {
+        "fips": "37000",
+        "name": "Hartford",
+        "state": "CT",
+        "state_name": "Connecticut",
+    },
+    {
+        "fips": "52000",
+        "name": "New Haven",
+        "state": "CT",
+        "state_name": "Connecticut",
+    },
+    {
+        "fips": "73000",
+        "name": "Stamford",
+        "state": "CT",
+        "state_name": "Connecticut",
+    },
+    {
+        "fips": "80000",
+        "name": "Waterbury",
+        "state": "CT",
+        "state_name": "Connecticut",
+    },
+    {
+        "fips": "50000",
+        "name": "Washington",
+        "state": "DC",
+        "state_name": "District of Columbia",
+    },
+    {
+        "fips": "10275",
+        "name": "Cape Coral",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "12875",
+        "name": "Clearwater",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "14400",
+        "name": "Coral Springs",
+        "state": "FL",
+        "state_name": "Florida",
+    },
     {"fips": "16475", "name": "Davie", "state": "FL", "state_name": "Florida"},
-    {"fips": "24000", "name": "Fort Lauderdale", "state": "FL", "state_name": "Florida"},
-    {"fips": "25175", "name": "Gainesville", "state": "FL", "state_name": "Florida"},
-    {"fips": "30000", "name": "Hialeah", "state": "FL", "state_name": "Florida"},
-    {"fips": "32000", "name": "Hollywood", "state": "FL", "state_name": "Florida"},
-    {"fips": "35000", "name": "Jacksonville", "state": "FL", "state_name": "Florida"},
-    {"fips": "38250", "name": "Lakeland", "state": "FL", "state_name": "Florida"},
-    {"fips": "45060", "name": "Miami Gardens", "state": "FL", "state_name": "Florida"},
+    {
+        "fips": "24000",
+        "name": "Fort Lauderdale",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "25175",
+        "name": "Gainesville",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "30000",
+        "name": "Hialeah",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "32000",
+        "name": "Hollywood",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "35000",
+        "name": "Jacksonville",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "38250",
+        "name": "Lakeland",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "45060",
+        "name": "Miami Gardens",
+        "state": "FL",
+        "state_name": "Florida",
+    },
     {"fips": "45000", "name": "Miami", "state": "FL", "state_name": "Florida"},
-    {"fips": "45975", "name": "Miramar", "state": "FL", "state_name": "Florida"},
-    {"fips": "53000", "name": "Orlando", "state": "FL", "state_name": "Florida"},
-    {"fips": "54000", "name": "Palm Bay", "state": "FL", "state_name": "Florida"},
-    {"fips": "54200", "name": "Palm Coast", "state": "FL", "state_name": "Florida"},
-    {"fips": "55775", "name": "Pembroke Pines", "state": "FL", "state_name": "Florida"},
-    {"fips": "58050", "name": "Pompano Beach", "state": "FL", "state_name": "Florida"},
-    {"fips": "58715", "name": "Port St. Lucie", "state": "FL", "state_name": "Florida"},
-    {"fips": "63000", "name": "St. Petersburg", "state": "FL", "state_name": "Florida"},
-    {"fips": "70600", "name": "Tallahassee", "state": "FL", "state_name": "Florida"},
+    {
+        "fips": "45975",
+        "name": "Miramar",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "53000",
+        "name": "Orlando",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "54000",
+        "name": "Palm Bay",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "54200",
+        "name": "Palm Coast",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "55775",
+        "name": "Pembroke Pines",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "58050",
+        "name": "Pompano Beach",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "58715",
+        "name": "Port St. Lucie",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "63000",
+        "name": "St. Petersburg",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "70600",
+        "name": "Tallahassee",
+        "state": "FL",
+        "state_name": "Florida",
+    },
     {"fips": "71000", "name": "Tampa", "state": "FL", "state_name": "Florida"},
-    {"fips": "76600", "name": "West Palm Beach", "state": "FL", "state_name": "Florida"},
-    {"fips": "03440", "name": "Athens-Clarke County", "state": "GA", "state_name": "Georgia"},
-    {"fips": "04000", "name": "Atlanta", "state": "GA", "state_name": "Georgia"},
-    {"fips": "04204", "name": "Augusta-Richmond County", "state": "GA", "state_name": "Georgia"},
-    {"fips": "19000", "name": "Columbus", "state": "GA", "state_name": "Georgia"},
-    {"fips": "49008", "name": "Macon-Bibb County", "state": "GA", "state_name": "Georgia"},
-    {"fips": "68516", "name": "Sandy Springs", "state": "GA", "state_name": "Georgia"},
-    {"fips": "69000", "name": "Savannah", "state": "GA", "state_name": "Georgia"},
-    {"fips": "72122", "name": "South Fulton", "state": "GA", "state_name": "Georgia"},
-    {"fips": "71550", "name": "Urban Honolulu", "state": "HI", "state_name": "Hawaii"},
-    {"fips": "12000", "name": "Cedar Rapids", "state": "IA", "state_name": "Iowa"},
-    {"fips": "19000", "name": "Davenport", "state": "IA", "state_name": "Iowa"},
-    {"fips": "21000", "name": "Des Moines", "state": "IA", "state_name": "Iowa"},
-    {"fips": "08830", "name": "Boise City", "state": "ID", "state_name": "Idaho"},
-    {"fips": "52120", "name": "Meridian", "state": "ID", "state_name": "Idaho"},
+    {
+        "fips": "76600",
+        "name": "West Palm Beach",
+        "state": "FL",
+        "state_name": "Florida",
+    },
+    {
+        "fips": "03440",
+        "name": "Athens-Clarke County",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "04000",
+        "name": "Atlanta",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "04204",
+        "name": "Augusta-Richmond County",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "19000",
+        "name": "Columbus",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "49008",
+        "name": "Macon-Bibb County",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "68516",
+        "name": "Sandy Springs",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "69000",
+        "name": "Savannah",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "72122",
+        "name": "South Fulton",
+        "state": "GA",
+        "state_name": "Georgia",
+    },
+    {
+        "fips": "71550",
+        "name": "Urban Honolulu",
+        "state": "HI",
+        "state_name": "Hawaii",
+    },
+    {
+        "fips": "12000",
+        "name": "Cedar Rapids",
+        "state": "IA",
+        "state_name": "Iowa",
+    },
+    {
+        "fips": "19000",
+        "name": "Davenport",
+        "state": "IA",
+        "state_name": "Iowa",
+    },
+    {
+        "fips": "21000",
+        "name": "Des Moines",
+        "state": "IA",
+        "state_name": "Iowa",
+    },
+    {
+        "fips": "08830",
+        "name": "Boise City",
+        "state": "ID",
+        "state_name": "Idaho",
+    },
+    {
+        "fips": "52120",
+        "name": "Meridian",
+        "state": "ID",
+        "state_name": "Idaho",
+    },
     {"fips": "56260", "name": "Nampa", "state": "ID", "state_name": "Idaho"},
-    {"fips": "03012", "name": "Aurora", "state": "IL", "state_name": "Illinois"},
-    {"fips": "14000", "name": "Chicago", "state": "IL", "state_name": "Illinois"},
-    {"fips": "23074", "name": "Elgin", "state": "IL", "state_name": "Illinois"},
-    {"fips": "38570", "name": "Joliet", "state": "IL", "state_name": "Illinois"},
-    {"fips": "51622", "name": "Naperville", "state": "IL", "state_name": "Illinois"},
-    {"fips": "59000", "name": "Peoria", "state": "IL", "state_name": "Illinois"},
-    {"fips": "65000", "name": "Rockford", "state": "IL", "state_name": "Illinois"},
-    {"fips": "72000", "name": "Springfield", "state": "IL", "state_name": "Illinois"},
-    {"fips": "10342", "name": "Carmel", "state": "IN", "state_name": "Indiana"},
-    {"fips": "22000", "name": "Evansville", "state": "IN", "state_name": "Indiana"},
-    {"fips": "23278", "name": "Fishers", "state": "IN", "state_name": "Indiana"},
-    {"fips": "25000", "name": "Fort Wayne", "state": "IN", "state_name": "Indiana"},
-    {"fips": "36003", "name": "Indianapolis", "state": "IN", "state_name": "Indiana"},
-    {"fips": "71000", "name": "South Bend", "state": "IN", "state_name": "Indiana"},
-    {"fips": "36000", "name": "Kansas City", "state": "KS", "state_name": "Kansas"},
+    {
+        "fips": "03012",
+        "name": "Aurora",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "14000",
+        "name": "Chicago",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "23074",
+        "name": "Elgin",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "38570",
+        "name": "Joliet",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "51622",
+        "name": "Naperville",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "59000",
+        "name": "Peoria",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "65000",
+        "name": "Rockford",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "72000",
+        "name": "Springfield",
+        "state": "IL",
+        "state_name": "Illinois",
+    },
+    {
+        "fips": "10342",
+        "name": "Carmel",
+        "state": "IN",
+        "state_name": "Indiana",
+    },
+    {
+        "fips": "22000",
+        "name": "Evansville",
+        "state": "IN",
+        "state_name": "Indiana",
+    },
+    {
+        "fips": "23278",
+        "name": "Fishers",
+        "state": "IN",
+        "state_name": "Indiana",
+    },
+    {
+        "fips": "25000",
+        "name": "Fort Wayne",
+        "state": "IN",
+        "state_name": "Indiana",
+    },
+    {
+        "fips": "36003",
+        "name": "Indianapolis",
+        "state": "IN",
+        "state_name": "Indiana",
+    },
+    {
+        "fips": "71000",
+        "name": "South Bend",
+        "state": "IN",
+        "state_name": "Indiana",
+    },
+    {
+        "fips": "36000",
+        "name": "Kansas City",
+        "state": "KS",
+        "state_name": "Kansas",
+    },
     {"fips": "52575", "name": "Olathe", "state": "KS", "state_name": "Kansas"},
-    {"fips": "53775", "name": "Overland Park", "state": "KS", "state_name": "Kansas"},
+    {
+        "fips": "53775",
+        "name": "Overland Park",
+        "state": "KS",
+        "state_name": "Kansas",
+    },
     {"fips": "71000", "name": "Topeka", "state": "KS", "state_name": "Kansas"},
-    {"fips": "79000", "name": "Wichita", "state": "KS", "state_name": "Kansas"},
-    {"fips": "46027", "name": "Lexington-Fayette", "state": "KY", "state_name": "Kentucky"},
-    {"fips": "48006", "name": "Louisville/Jefferson County", "state": "KY", "state_name": "Kentucky"},
-    {"fips": "05000", "name": "Baton Rouge", "state": "LA", "state_name": "Louisiana"},
-    {"fips": "40735", "name": "Lafayette", "state": "LA", "state_name": "Louisiana"},
-    {"fips": "55000", "name": "New Orleans", "state": "LA", "state_name": "Louisiana"},
-    {"fips": "70000", "name": "Shreveport", "state": "LA", "state_name": "Louisiana"},
-    {"fips": "07000", "name": "Boston", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "09000", "name": "Brockton", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "11000", "name": "Cambridge", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "37000", "name": "Lowell", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "37490", "name": "Lynn", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "45000", "name": "New Bedford", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "55745", "name": "Quincy", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "67000", "name": "Springfield", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "82000", "name": "Worcester", "state": "MA", "state_name": "Massachusetts"},
-    {"fips": "04000", "name": "Baltimore", "state": "MD", "state_name": "Maryland"},
-    {"fips": "03000", "name": "Ann Arbor", "state": "MI", "state_name": "Michigan"},
-    {"fips": "21000", "name": "Dearborn", "state": "MI", "state_name": "Michigan"},
-    {"fips": "22000", "name": "Detroit", "state": "MI", "state_name": "Michigan"},
-    {"fips": "34000", "name": "Grand Rapids", "state": "MI", "state_name": "Michigan"},
-    {"fips": "46000", "name": "Lansing", "state": "MI", "state_name": "Michigan"},
-    {"fips": "76460", "name": "Sterling Heights", "state": "MI", "state_name": "Michigan"},
-    {"fips": "84000", "name": "Warren", "state": "MI", "state_name": "Michigan"},
-    {"fips": "43000", "name": "Minneapolis", "state": "MN", "state_name": "Minnesota"},
-    {"fips": "54880", "name": "Rochester", "state": "MN", "state_name": "Minnesota"},
-    {"fips": "58000", "name": "St. Paul", "state": "MN", "state_name": "Minnesota"},
-    {"fips": "15670", "name": "Columbia", "state": "MO", "state_name": "Missouri"},
-    {"fips": "35000", "name": "Independence", "state": "MO", "state_name": "Missouri"},
-    {"fips": "38000", "name": "Kansas City", "state": "MO", "state_name": "Missouri"},
-    {"fips": "41348", "name": "Lee's Summit", "state": "MO", "state_name": "Missouri"},
-    {"fips": "70000", "name": "Springfield", "state": "MO", "state_name": "Missouri"},
-    {"fips": "65000", "name": "St. Louis", "state": "MO", "state_name": "Missouri"},
-    {"fips": "36000", "name": "Jackson", "state": "MS", "state_name": "Mississippi"},
-    {"fips": "06550", "name": "Billings", "state": "MT", "state_name": "Montana"},
-    {"fips": "10740", "name": "Cary", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "12000", "name": "Charlotte", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "14100", "name": "Concord", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "19000", "name": "Durham", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "22920", "name": "Fayetteville", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "28000", "name": "Greensboro", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "31400", "name": "High Point", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "55000", "name": "Raleigh", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "74440", "name": "Wilmington", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "75000", "name": "Winston-Salem", "state": "NC", "state_name": "North Carolina"},
-    {"fips": "25700", "name": "Fargo", "state": "ND", "state_name": "North Dakota"},
-    {"fips": "28000", "name": "Lincoln", "state": "NE", "state_name": "Nebraska"},
-    {"fips": "37000", "name": "Omaha", "state": "NE", "state_name": "Nebraska"},
-    {"fips": "45140", "name": "Manchester", "state": "NH", "state_name": "New Hampshire"},
-    {"fips": "21000", "name": "Elizabeth", "state": "NJ", "state_name": "New Jersey"},
-    {"fips": "36000", "name": "Jersey City", "state": "NJ", "state_name": "New Jersey"},
-    {"fips": "51000", "name": "Newark", "state": "NJ", "state_name": "New Jersey"},
-    {"fips": "57000", "name": "Paterson", "state": "NJ", "state_name": "New Jersey"},
-    {"fips": "02000", "name": "Albuquerque", "state": "NM", "state_name": "New Mexico"},
-    {"fips": "39380", "name": "Las Cruces", "state": "NM", "state_name": "New Mexico"},
-    {"fips": "63460", "name": "Rio Rancho", "state": "NM", "state_name": "New Mexico"},
-    {"fips": "31900", "name": "Henderson", "state": "NV", "state_name": "Nevada"},
-    {"fips": "40000", "name": "Las Vegas", "state": "NV", "state_name": "Nevada"},
-    {"fips": "51800", "name": "North Las Vegas", "state": "NV", "state_name": "Nevada"},
+    {
+        "fips": "79000",
+        "name": "Wichita",
+        "state": "KS",
+        "state_name": "Kansas",
+    },
+    {
+        "fips": "46027",
+        "name": "Lexington-Fayette",
+        "state": "KY",
+        "state_name": "Kentucky",
+    },
+    {
+        "fips": "48006",
+        "name": "Louisville/Jefferson County",
+        "state": "KY",
+        "state_name": "Kentucky",
+    },
+    {
+        "fips": "05000",
+        "name": "Baton Rouge",
+        "state": "LA",
+        "state_name": "Louisiana",
+    },
+    {
+        "fips": "40735",
+        "name": "Lafayette",
+        "state": "LA",
+        "state_name": "Louisiana",
+    },
+    {
+        "fips": "55000",
+        "name": "New Orleans",
+        "state": "LA",
+        "state_name": "Louisiana",
+    },
+    {
+        "fips": "70000",
+        "name": "Shreveport",
+        "state": "LA",
+        "state_name": "Louisiana",
+    },
+    {
+        "fips": "07000",
+        "name": "Boston",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "09000",
+        "name": "Brockton",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "11000",
+        "name": "Cambridge",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "37000",
+        "name": "Lowell",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "37490",
+        "name": "Lynn",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "45000",
+        "name": "New Bedford",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "55745",
+        "name": "Quincy",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "67000",
+        "name": "Springfield",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "82000",
+        "name": "Worcester",
+        "state": "MA",
+        "state_name": "Massachusetts",
+    },
+    {
+        "fips": "04000",
+        "name": "Baltimore",
+        "state": "MD",
+        "state_name": "Maryland",
+    },
+    {
+        "fips": "03000",
+        "name": "Ann Arbor",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "21000",
+        "name": "Dearborn",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "22000",
+        "name": "Detroit",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "34000",
+        "name": "Grand Rapids",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "46000",
+        "name": "Lansing",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "76460",
+        "name": "Sterling Heights",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "84000",
+        "name": "Warren",
+        "state": "MI",
+        "state_name": "Michigan",
+    },
+    {
+        "fips": "43000",
+        "name": "Minneapolis",
+        "state": "MN",
+        "state_name": "Minnesota",
+    },
+    {
+        "fips": "54880",
+        "name": "Rochester",
+        "state": "MN",
+        "state_name": "Minnesota",
+    },
+    {
+        "fips": "58000",
+        "name": "St. Paul",
+        "state": "MN",
+        "state_name": "Minnesota",
+    },
+    {
+        "fips": "15670",
+        "name": "Columbia",
+        "state": "MO",
+        "state_name": "Missouri",
+    },
+    {
+        "fips": "35000",
+        "name": "Independence",
+        "state": "MO",
+        "state_name": "Missouri",
+    },
+    {
+        "fips": "38000",
+        "name": "Kansas City",
+        "state": "MO",
+        "state_name": "Missouri",
+    },
+    {
+        "fips": "41348",
+        "name": "Lee's Summit",
+        "state": "MO",
+        "state_name": "Missouri",
+    },
+    {
+        "fips": "70000",
+        "name": "Springfield",
+        "state": "MO",
+        "state_name": "Missouri",
+    },
+    {
+        "fips": "65000",
+        "name": "St. Louis",
+        "state": "MO",
+        "state_name": "Missouri",
+    },
+    {
+        "fips": "36000",
+        "name": "Jackson",
+        "state": "MS",
+        "state_name": "Mississippi",
+    },
+    {
+        "fips": "06550",
+        "name": "Billings",
+        "state": "MT",
+        "state_name": "Montana",
+    },
+    {
+        "fips": "10740",
+        "name": "Cary",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "12000",
+        "name": "Charlotte",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "14100",
+        "name": "Concord",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "19000",
+        "name": "Durham",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "22920",
+        "name": "Fayetteville",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "28000",
+        "name": "Greensboro",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "31400",
+        "name": "High Point",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "55000",
+        "name": "Raleigh",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "74440",
+        "name": "Wilmington",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "75000",
+        "name": "Winston-Salem",
+        "state": "NC",
+        "state_name": "North Carolina",
+    },
+    {
+        "fips": "25700",
+        "name": "Fargo",
+        "state": "ND",
+        "state_name": "North Dakota",
+    },
+    {
+        "fips": "28000",
+        "name": "Lincoln",
+        "state": "NE",
+        "state_name": "Nebraska",
+    },
+    {
+        "fips": "37000",
+        "name": "Omaha",
+        "state": "NE",
+        "state_name": "Nebraska",
+    },
+    {
+        "fips": "45140",
+        "name": "Manchester",
+        "state": "NH",
+        "state_name": "New Hampshire",
+    },
+    {
+        "fips": "21000",
+        "name": "Elizabeth",
+        "state": "NJ",
+        "state_name": "New Jersey",
+    },
+    {
+        "fips": "36000",
+        "name": "Jersey City",
+        "state": "NJ",
+        "state_name": "New Jersey",
+    },
+    {
+        "fips": "51000",
+        "name": "Newark",
+        "state": "NJ",
+        "state_name": "New Jersey",
+    },
+    {
+        "fips": "57000",
+        "name": "Paterson",
+        "state": "NJ",
+        "state_name": "New Jersey",
+    },
+    {
+        "fips": "02000",
+        "name": "Albuquerque",
+        "state": "NM",
+        "state_name": "New Mexico",
+    },
+    {
+        "fips": "39380",
+        "name": "Las Cruces",
+        "state": "NM",
+        "state_name": "New Mexico",
+    },
+    {
+        "fips": "63460",
+        "name": "Rio Rancho",
+        "state": "NM",
+        "state_name": "New Mexico",
+    },
+    {
+        "fips": "31900",
+        "name": "Henderson",
+        "state": "NV",
+        "state_name": "Nevada",
+    },
+    {
+        "fips": "40000",
+        "name": "Las Vegas",
+        "state": "NV",
+        "state_name": "Nevada",
+    },
+    {
+        "fips": "51800",
+        "name": "North Las Vegas",
+        "state": "NV",
+        "state_name": "Nevada",
+    },
     {"fips": "60600", "name": "Reno", "state": "NV", "state_name": "Nevada"},
     {"fips": "68400", "name": "Sparks", "state": "NV", "state_name": "Nevada"},
-    {"fips": "01000", "name": "Albany", "state": "NY", "state_name": "New York"},
-    {"fips": "11000", "name": "Buffalo", "state": "NY", "state_name": "New York"},
-    {"fips": "51000", "name": "New York City", "state": "NY", "state_name": "New York"},
-    {"fips": "63000", "name": "Rochester", "state": "NY", "state_name": "New York"},
-    {"fips": "73000", "name": "Syracuse", "state": "NY", "state_name": "New York"},
-    {"fips": "84000", "name": "Yonkers", "state": "NY", "state_name": "New York"},
+    {
+        "fips": "01000",
+        "name": "Albany",
+        "state": "NY",
+        "state_name": "New York",
+    },
+    {
+        "fips": "11000",
+        "name": "Buffalo",
+        "state": "NY",
+        "state_name": "New York",
+    },
+    {
+        "fips": "51000",
+        "name": "New York City",
+        "state": "NY",
+        "state_name": "New York",
+    },
+    {
+        "fips": "63000",
+        "name": "Rochester",
+        "state": "NY",
+        "state_name": "New York",
+    },
+    {
+        "fips": "73000",
+        "name": "Syracuse",
+        "state": "NY",
+        "state_name": "New York",
+    },
+    {
+        "fips": "84000",
+        "name": "Yonkers",
+        "state": "NY",
+        "state_name": "New York",
+    },
     {"fips": "01000", "name": "Akron", "state": "OH", "state_name": "Ohio"},
-    {"fips": "15000", "name": "Cincinnati", "state": "OH", "state_name": "Ohio"},
-    {"fips": "16000", "name": "Cleveland", "state": "OH", "state_name": "Ohio"},
+    {
+        "fips": "15000",
+        "name": "Cincinnati",
+        "state": "OH",
+        "state_name": "Ohio",
+    },
+    {
+        "fips": "16000",
+        "name": "Cleveland",
+        "state": "OH",
+        "state_name": "Ohio",
+    },
     {"fips": "18000", "name": "Columbus", "state": "OH", "state_name": "Ohio"},
     {"fips": "21000", "name": "Dayton", "state": "OH", "state_name": "Ohio"},
     {"fips": "77000", "name": "Toledo", "state": "OH", "state_name": "Ohio"},
-    {"fips": "09050", "name": "Broken Arrow", "state": "OK", "state_name": "Oklahoma"},
-    {"fips": "52500", "name": "Norman", "state": "OK", "state_name": "Oklahoma"},
-    {"fips": "55000", "name": "Oklahoma City", "state": "OK", "state_name": "Oklahoma"},
-    {"fips": "75000", "name": "Tulsa", "state": "OK", "state_name": "Oklahoma"},
+    {
+        "fips": "09050",
+        "name": "Broken Arrow",
+        "state": "OK",
+        "state_name": "Oklahoma",
+    },
+    {
+        "fips": "52500",
+        "name": "Norman",
+        "state": "OK",
+        "state_name": "Oklahoma",
+    },
+    {
+        "fips": "55000",
+        "name": "Oklahoma City",
+        "state": "OK",
+        "state_name": "Oklahoma",
+    },
+    {
+        "fips": "75000",
+        "name": "Tulsa",
+        "state": "OK",
+        "state_name": "Oklahoma",
+    },
     {"fips": "05800", "name": "Bend", "state": "OR", "state_name": "Oregon"},
     {"fips": "23850", "name": "Eugene", "state": "OR", "state_name": "Oregon"},
-    {"fips": "31250", "name": "Gresham", "state": "OR", "state_name": "Oregon"},
-    {"fips": "34100", "name": "Hillsboro", "state": "OR", "state_name": "Oregon"},
-    {"fips": "59000", "name": "Portland", "state": "OR", "state_name": "Oregon"},
+    {
+        "fips": "31250",
+        "name": "Gresham",
+        "state": "OR",
+        "state_name": "Oregon",
+    },
+    {
+        "fips": "34100",
+        "name": "Hillsboro",
+        "state": "OR",
+        "state_name": "Oregon",
+    },
+    {
+        "fips": "59000",
+        "name": "Portland",
+        "state": "OR",
+        "state_name": "Oregon",
+    },
     {"fips": "64900", "name": "Salem", "state": "OR", "state_name": "Oregon"},
-    {"fips": "02000", "name": "Allentown", "state": "PA", "state_name": "Pennsylvania"},
-    {"fips": "60000", "name": "Philadelphia", "state": "PA", "state_name": "Pennsylvania"},
-    {"fips": "61000", "name": "Pittsburgh", "state": "PA", "state_name": "Pennsylvania"},
-    {"fips": "59000", "name": "Providence", "state": "RI", "state_name": "Rhode Island"},
-    {"fips": "13330", "name": "Charleston", "state": "SC", "state_name": "South Carolina"},
-    {"fips": "16000", "name": "Columbia", "state": "SC", "state_name": "South Carolina"},
-    {"fips": "50875", "name": "North Charleston", "state": "SC", "state_name": "South Carolina"},
-    {"fips": "59020", "name": "Sioux Falls", "state": "SD", "state_name": "South Dakota"},
-    {"fips": "14000", "name": "Chattanooga", "state": "TN", "state_name": "Tennessee"},
-    {"fips": "15160", "name": "Clarksville", "state": "TN", "state_name": "Tennessee"},
-    {"fips": "40000", "name": "Knoxville", "state": "TN", "state_name": "Tennessee"},
-    {"fips": "48000", "name": "Memphis", "state": "TN", "state_name": "Tennessee"},
-    {"fips": "51560", "name": "Murfreesboro", "state": "TN", "state_name": "Tennessee"},
-
-# Extracted 332 places
-    {"fips": "52006", "name": "Nashville-Davidson", "state": "TN", "state_name": "Tennessee"},
+    {
+        "fips": "02000",
+        "name": "Allentown",
+        "state": "PA",
+        "state_name": "Pennsylvania",
+    },
+    {
+        "fips": "60000",
+        "name": "Philadelphia",
+        "state": "PA",
+        "state_name": "Pennsylvania",
+    },
+    {
+        "fips": "61000",
+        "name": "Pittsburgh",
+        "state": "PA",
+        "state_name": "Pennsylvania",
+    },
+    {
+        "fips": "59000",
+        "name": "Providence",
+        "state": "RI",
+        "state_name": "Rhode Island",
+    },
+    {
+        "fips": "13330",
+        "name": "Charleston",
+        "state": "SC",
+        "state_name": "South Carolina",
+    },
+    {
+        "fips": "16000",
+        "name": "Columbia",
+        "state": "SC",
+        "state_name": "South Carolina",
+    },
+    {
+        "fips": "50875",
+        "name": "North Charleston",
+        "state": "SC",
+        "state_name": "South Carolina",
+    },
+    {
+        "fips": "59020",
+        "name": "Sioux Falls",
+        "state": "SD",
+        "state_name": "South Dakota",
+    },
+    {
+        "fips": "14000",
+        "name": "Chattanooga",
+        "state": "TN",
+        "state_name": "Tennessee",
+    },
+    {
+        "fips": "15160",
+        "name": "Clarksville",
+        "state": "TN",
+        "state_name": "Tennessee",
+    },
+    {
+        "fips": "40000",
+        "name": "Knoxville",
+        "state": "TN",
+        "state_name": "Tennessee",
+    },
+    {
+        "fips": "48000",
+        "name": "Memphis",
+        "state": "TN",
+        "state_name": "Tennessee",
+    },
+    {
+        "fips": "51560",
+        "name": "Murfreesboro",
+        "state": "TN",
+        "state_name": "Tennessee",
+    },
+    # Extracted 332 places
+    {
+        "fips": "52006",
+        "name": "Nashville-Davidson",
+        "state": "TN",
+        "state_name": "Tennessee",
+    },
     {"fips": "01000", "name": "Abilene", "state": "TX", "state_name": "Texas"},
     {"fips": "01924", "name": "Allen", "state": "TX", "state_name": "Texas"},
-    {"fips": "03000", "name": "Amarillo", "state": "TX", "state_name": "Texas"},
-    {"fips": "04000", "name": "Arlington", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "03000",
+        "name": "Amarillo",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "04000",
+        "name": "Arlington",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "05000", "name": "Austin", "state": "TX", "state_name": "Texas"},
-    {"fips": "07000", "name": "Beaumont", "state": "TX", "state_name": "Texas"},
-    {"fips": "10768", "name": "Brownsville", "state": "TX", "state_name": "Texas"},
-    {"fips": "13024", "name": "Carrollton", "state": "TX", "state_name": "Texas"},
-    {"fips": "15976", "name": "College Station", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "07000",
+        "name": "Beaumont",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "10768",
+        "name": "Brownsville",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "13024",
+        "name": "Carrollton",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "15976",
+        "name": "College Station",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "16432", "name": "Conroe", "state": "TX", "state_name": "Texas"},
-    {"fips": "17000", "name": "Corpus Christi", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "17000",
+        "name": "Corpus Christi",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "19000", "name": "Dallas", "state": "TX", "state_name": "Texas"},
     {"fips": "19972", "name": "Denton", "state": "TX", "state_name": "Texas"},
-    {"fips": "22660", "name": "Edinburg", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "22660",
+        "name": "Edinburg",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "24000", "name": "El Paso", "state": "TX", "state_name": "Texas"},
-    {"fips": "27000", "name": "Fort Worth", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "27000",
+        "name": "Fort Worth",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "27684", "name": "Frisco", "state": "TX", "state_name": "Texas"},
     {"fips": "29000", "name": "Garland", "state": "TX", "state_name": "Texas"},
-    {"fips": "30464", "name": "Grand Prairie", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "30464",
+        "name": "Grand Prairie",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "35000", "name": "Houston", "state": "TX", "state_name": "Texas"},
     {"fips": "37000", "name": "Irving", "state": "TX", "state_name": "Texas"},
     {"fips": "39148", "name": "Killeen", "state": "TX", "state_name": "Texas"},
     {"fips": "41464", "name": "Laredo", "state": "TX", "state_name": "Texas"},
-    {"fips": "41980", "name": "League City", "state": "TX", "state_name": "Texas"},
-    {"fips": "42508", "name": "Lewisville", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "41980",
+        "name": "League City",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "42508",
+        "name": "Lewisville",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "45000", "name": "Lubbock", "state": "TX", "state_name": "Texas"},
     {"fips": "45384", "name": "McAllen", "state": "TX", "state_name": "Texas"},
-    {"fips": "45744", "name": "McKinney", "state": "TX", "state_name": "Texas"},
-    {"fips": "47892", "name": "Mesquite", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "45744",
+        "name": "McKinney",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "47892",
+        "name": "Mesquite",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "48072", "name": "Midland", "state": "TX", "state_name": "Texas"},
-    {"fips": "50820", "name": "New Braunfels", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "50820",
+        "name": "New Braunfels",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "53388", "name": "Odessa", "state": "TX", "state_name": "Texas"},
-    {"fips": "56000", "name": "Pasadena", "state": "TX", "state_name": "Texas"},
-    {"fips": "56348", "name": "Pearland", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "56000",
+        "name": "Pasadena",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "56348",
+        "name": "Pearland",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "58016", "name": "Plano", "state": "TX", "state_name": "Texas"},
-    {"fips": "61796", "name": "Richardson", "state": "TX", "state_name": "Texas"},
-    {"fips": "63500", "name": "Round Rock", "state": "TX", "state_name": "Texas"},
-    {"fips": "65000", "name": "San Antonio", "state": "TX", "state_name": "Texas"},
-    {"fips": "70808", "name": "Sugar Land", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "61796",
+        "name": "Richardson",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "63500",
+        "name": "Round Rock",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "65000",
+        "name": "San Antonio",
+        "state": "TX",
+        "state_name": "Texas",
+    },
+    {
+        "fips": "70808",
+        "name": "Sugar Land",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "74144", "name": "Tyler", "state": "TX", "state_name": "Texas"},
     {"fips": "76000", "name": "Waco", "state": "TX", "state_name": "Texas"},
-    {"fips": "79000", "name": "Wichita Falls", "state": "TX", "state_name": "Texas"},
+    {
+        "fips": "79000",
+        "name": "Wichita Falls",
+        "state": "TX",
+        "state_name": "Texas",
+    },
     {"fips": "62470", "name": "Provo", "state": "UT", "state_name": "Utah"},
-    {"fips": "67000", "name": "Salt Lake City", "state": "UT", "state_name": "Utah"},
-    {"fips": "65330", "name": "St. George", "state": "UT", "state_name": "Utah"},
-    {"fips": "82950", "name": "West Jordan", "state": "UT", "state_name": "Utah"},
-    {"fips": "83470", "name": "West Valley City", "state": "UT", "state_name": "Utah"},
-    {"fips": "01000", "name": "Alexandria", "state": "VA", "state_name": "Virginia"},
-    {"fips": "16000", "name": "Chesapeake", "state": "VA", "state_name": "Virginia"},
-    {"fips": "35000", "name": "Hampton", "state": "VA", "state_name": "Virginia"},
-    {"fips": "56000", "name": "Newport News", "state": "VA", "state_name": "Virginia"},
-    {"fips": "57000", "name": "Norfolk", "state": "VA", "state_name": "Virginia"},
-    {"fips": "67000", "name": "Richmond", "state": "VA", "state_name": "Virginia"},
-    {"fips": "76432", "name": "Suffolk", "state": "VA", "state_name": "Virginia"},
-    {"fips": "82000", "name": "Virginia Beach", "state": "VA", "state_name": "Virginia"},
-    {"fips": "05210", "name": "Bellevue", "state": "WA", "state_name": "Washington"},
-    {"fips": "22640", "name": "Everett", "state": "WA", "state_name": "Washington"},
-    {"fips": "35415", "name": "Kent", "state": "WA", "state_name": "Washington"},
-    {"fips": "57745", "name": "Renton", "state": "WA", "state_name": "Washington"},
-    {"fips": "63000", "name": "Seattle", "state": "WA", "state_name": "Washington"},
-    {"fips": "67167", "name": "Spokane Valley", "state": "WA", "state_name": "Washington"},
-    {"fips": "67000", "name": "Spokane", "state": "WA", "state_name": "Washington"},
-    {"fips": "70000", "name": "Tacoma", "state": "WA", "state_name": "Washington"},
-    {"fips": "74060", "name": "Vancouver", "state": "WA", "state_name": "Washington"},
-    {"fips": "31000", "name": "Green Bay", "state": "WI", "state_name": "Wisconsin"},
-    {"fips": "48000", "name": "Madison", "state": "WI", "state_name": "Wisconsin"},
-    {"fips": "53000", "name": "Milwaukee", "state": "WI", "state_name": "Wisconsin"},
+    {
+        "fips": "67000",
+        "name": "Salt Lake City",
+        "state": "UT",
+        "state_name": "Utah",
+    },
+    {
+        "fips": "65330",
+        "name": "St. George",
+        "state": "UT",
+        "state_name": "Utah",
+    },
+    {
+        "fips": "82950",
+        "name": "West Jordan",
+        "state": "UT",
+        "state_name": "Utah",
+    },
+    {
+        "fips": "83470",
+        "name": "West Valley City",
+        "state": "UT",
+        "state_name": "Utah",
+    },
+    {
+        "fips": "01000",
+        "name": "Alexandria",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "16000",
+        "name": "Chesapeake",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "35000",
+        "name": "Hampton",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "56000",
+        "name": "Newport News",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "57000",
+        "name": "Norfolk",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "67000",
+        "name": "Richmond",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "76432",
+        "name": "Suffolk",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "82000",
+        "name": "Virginia Beach",
+        "state": "VA",
+        "state_name": "Virginia",
+    },
+    {
+        "fips": "05210",
+        "name": "Bellevue",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "22640",
+        "name": "Everett",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "35415",
+        "name": "Kent",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "57745",
+        "name": "Renton",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "63000",
+        "name": "Seattle",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "67167",
+        "name": "Spokane Valley",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "67000",
+        "name": "Spokane",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "70000",
+        "name": "Tacoma",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "74060",
+        "name": "Vancouver",
+        "state": "WA",
+        "state_name": "Washington",
+    },
+    {
+        "fips": "31000",
+        "name": "Green Bay",
+        "state": "WI",
+        "state_name": "Wisconsin",
+    },
+    {
+        "fips": "48000",
+        "name": "Madison",
+        "state": "WI",
+        "state_name": "Wisconsin",
+    },
+    {
+        "fips": "53000",
+        "name": "Milwaukee",
+        "state": "WI",
+        "state_name": "Wisconsin",
+    },
 ]
diff --git a/src/policyengine/utils/parameter_labels.py b/src/policyengine/utils/parameter_labels.py
index 2fd3e25c..6a574be8 100644
--- a/src/policyengine/utils/parameter_labels.py
+++ b/src/policyengine/utils/parameter_labels.py
@@ -81,9 +81,7 @@ def _generate_breakdown_label(param_node, system, breakdown_parent=None):
     breakdown_labels = breakdown_parent.metadata.get("breakdown_labels", [])
 
     # Collect dimension values from breakdown parent to param_node
-    dimension_values = _collect_dimension_values(
-        param_node, breakdown_parent
-    )
+    dimension_values = _collect_dimension_values(param_node, breakdown_parent)
 
     if not dimension_values:
         return None
@@ -146,7 +144,12 @@ def _format_dimension_value(value, var_name, dim_label, system):
         str: Formatted dimension value
     """
     # First, try to get enum display value
-    if var_name and isinstance(var_name, str) and not var_name.startswith("range(") and not var_name.startswith("list("):
+    if (
+        var_name
+        and isinstance(var_name, str)
+        and not var_name.startswith("range(")
+        and not var_name.startswith("list(")
+    ):
         var = system.variables.get(var_name)
         if var and hasattr(var, "possible_values") and var.possible_values:
             try:
diff --git a/tests/fixtures/region_fixtures.py b/tests/fixtures/region_fixtures.py
index ca1adfe2..d08fb6d3 100644
--- a/tests/fixtures/region_fixtures.py
+++ b/tests/fixtures/region_fixtures.py
@@ -71,7 +71,9 @@ def create_sample_us_registry() -> RegionRegistry:
             create_national_region(),
             create_state_region("CA", "California"),
             create_state_region("NY", "New York"),
-            create_place_region("CA", "44000", "Los Angeles city", "California"),
+            create_place_region(
+                "CA", "44000", "Los Angeles city", "California"
+            ),
         ],
     )
 
@@ -84,7 +86,9 @@ def create_sample_us_registry() -> RegionRegistry:
 
 STATE_NEW_YORK = create_state_region("NY", "New York")
 
-PLACE_LOS_ANGELES = create_place_region("CA", "44000", "Los Angeles city", "California")
+PLACE_LOS_ANGELES = create_place_region(
+    "CA", "44000", "Los Angeles city", "California"
+)
 
 SIMPLE_REGION = Region(
     code="state/ca",
diff --git a/tests/test_pandas3_compatibility.py b/tests/test_pandas3_compatibility.py
index 93fecd4d..98481aa1 100644
--- a/tests/test_pandas3_compatibility.py
+++ b/tests/test_pandas3_compatibility.py
@@ -1,4 +1,5 @@
 """Test pandas 3.0 compatibility with enum encoding."""
+
 import pandas as pd
 from policyengine_core.enums import Enum
 
diff --git a/tests/test_parameter_labels.py b/tests/test_parameter_labels.py
index f0584419..4e66fc0a 100644
--- a/tests/test_parameter_labels.py
+++ b/tests/test_parameter_labels.py
@@ -435,7 +435,9 @@ def test__given_single_level_breakdown_with_enum__then_generates_label_with_enum
         result = generate_label_for_parameter(param, system, scale_lookup)
 
         # Then: Label uses enum display value
-        assert result == "Tax exemption by filing status (Married filing jointly)"
+        assert (
+            result == "Tax exemption by filing status (Married filing jointly)"
+        )
 
     def test__given_single_level_breakdown_without_enum__then_generates_label_with_raw_key(
         self,
@@ -642,7 +644,10 @@ def test__given_nested_breakdown_with_enum_and_range__then_generates_full_label(
 
         # Then
         # Without snap_region enum in system, uses breakdown_label for first dimension too
-        assert result == "SNAP max allotment (SNAP region CONTIGUOUS_US, Household size 1)"
+        assert (
+            result
+            == "SNAP max allotment (SNAP region CONTIGUOUS_US, Household size 1)"
+        )
 
     def test__given_breakdown_labels_for_range__then_includes_semantic_label(
         self,
@@ -720,7 +725,10 @@ def test__given_three_level_nesting__then_generates_all_dimensions(self):
         result = generate_label_for_parameter(param, system, scale_lookup)
 
         # Then
-        assert result == "State sales tax (CA, Income bracket 3, Exemption count 5)"
+        assert (
+            result
+            == "State sales tax (CA, Income bracket 3, Exemption count 5)"
+        )
 
     def test__given_missing_breakdown_labels__then_uses_raw_values(self):
         # Given
@@ -785,9 +793,13 @@ def test__given_enum_range_enum_nesting__then_formats_each_correctly(self):
         result = generate_label_for_parameter(param, system, scale_lookup)
 
         # Then: Enum values use display names, range uses breakdown_label
-        assert result == "Earned income credit (CA, Number of children 2, Single)"
+        assert (
+            result == "Earned income credit (CA, Number of children 2, Single)"
+        )
 
-    def test__given_range_enum_range_nesting__then_formats_each_correctly(self):
+    def test__given_range_enum_range_nesting__then_formats_each_correctly(
+        self,
+    ):
         # Given: range -> enum -> range nesting
         breakdown_parent = create_mock_parent_node(
             name="gov.childcare.subsidy",
@@ -821,13 +833,18 @@ def test__given_range_enum_range_nesting__then_formats_each_correctly(self):
             == "Childcare subsidy (Age group 2, Head of household, Household size 5)"
         )
 
-    def test__given_partial_breakdown_labels__then_uses_labels_where_available(self):
+    def test__given_partial_breakdown_labels__then_uses_labels_where_available(
+        self,
+    ):
         # Given: breakdown_labels list shorter than breakdown list
         breakdown_parent = create_mock_parent_node(
             name="gov.benefits.utility",
             label="Utility allowance",
             breakdown=["area_code", "range(1, 20)", "housing_type"],
-            breakdown_labels=["Area", "Household size"],  # Missing label for housing_type
+            breakdown_labels=[
+                "Area",
+                "Household size",
+            ],  # Missing label for housing_type
         )
         level1 = create_mock_parent_node(
             name="gov.benefits.utility.AREA_1",
@@ -848,7 +865,10 @@ def test__given_partial_breakdown_labels__then_uses_labels_where_available(self)
         result = generate_label_for_parameter(param, system, scale_lookup)
 
         # Then: Uses breakdown_labels where available, raw value for missing label
-        assert result == "Utility allowance (Area AREA_1, Household size 3, RENTER)"
+        assert (
+            result
+            == "Utility allowance (Area AREA_1, Household size 3, RENTER)"
+        )
 
     def test__given_four_level_nesting_with_mixed_types__then_generates_all_dimensions(
         self,
@@ -857,8 +877,18 @@ def test__given_four_level_nesting_with_mixed_types__then_generates_all_dimensio
         breakdown_parent = create_mock_parent_node(
             name="gov.irs.deductions.sales_tax",
             label="State sales tax deduction",
-            breakdown=["state_code", "filing_status", "range(1, 7)", "range(1, 20)"],
-            breakdown_labels=["State", "Filing status", "Exemption count", "Income bracket"],
+            breakdown=[
+                "state_code",
+                "filing_status",
+                "range(1, 7)",
+                "range(1, 20)",
+            ],
+            breakdown_labels=[
+                "State",
+                "Filing status",
+                "Exemption count",
+                "Income bracket",
+            ],
         )
         level1 = create_mock_parent_node(
             name="gov.irs.deductions.sales_tax.NY",
diff --git a/tests/test_region.py b/tests/test_region.py
index 6669ec1c..bc7ee0f6 100644
--- a/tests/test_region.py
+++ b/tests/test_region.py
@@ -1,16 +1,10 @@
 """Tests for Region and RegionRegistry classes."""
 
-import pytest
-
 from policyengine.core.region import Region, RegionRegistry
-
 from tests.fixtures.region_fixtures import (
     FILTER_REGION,
     REGION_WITH_DATASET,
-    SIMPLE_REGION,
-    create_sample_us_registry,
     create_state_region,
-    sample_registry,
 )
 
 
@@ -68,9 +62,17 @@ def test__given_same_codes__then_regions_are_equal(self):
         Then: They are equal regardless of other fields
         """
         # Given
-        region1 = Region(code="state/ca", label="California", region_type="state")
-        region2 = Region(code="state/ca", label="California (different)", region_type="state")
-        region3 = Region(code="state/ny", label="New York", region_type="state")
+        region1 = Region(
+            code="state/ca", label="California", region_type="state"
+        )
+        region2 = Region(
+            code="state/ca",
+            label="California (different)",
+            region_type="state",
+        )
+        region3 = Region(
+            code="state/ny", label="New York", region_type="state"
+        )
 
         # Then
         assert region1 == region2
@@ -82,9 +84,17 @@ def test__given_region__then_can_use_as_dict_key_or_in_set(self):
         Then: Regions with same code are deduplicated
         """
         # Given
-        region1 = Region(code="state/ca", label="California", region_type="state")
-        region2 = Region(code="state/ca", label="California (duplicate)", region_type="state")
-        region3 = Region(code="state/ny", label="New York", region_type="state")
+        region1 = Region(
+            code="state/ca", label="California", region_type="state"
+        )
+        region2 = Region(
+            code="state/ca",
+            label="California (duplicate)",
+            region_type="state",
+        )
+        region3 = Region(
+            code="state/ny", label="New York", region_type="state"
+        )
 
         # When
         region_set = {region1, region2, region3}
@@ -98,7 +108,9 @@ def test__given_region__then_can_use_as_dict_key_or_in_set(self):
 class TestRegionRegistry:
     """Tests for the RegionRegistry class."""
 
-    def test__given_registry_with_regions__then_length_is_correct(self, sample_registry):
+    def test__given_registry_with_regions__then_length_is_correct(
+        self, sample_registry
+    ):
         """Given: Registry with 4 regions
         When: Checking length
         Then: Length is 4
@@ -106,7 +118,9 @@ def test__given_registry_with_regions__then_length_is_correct(self, sample_regis
         # Then
         assert len(sample_registry) == 4
 
-    def test__given_registry__then_can_iterate_over_regions(self, sample_registry):
+    def test__given_registry__then_can_iterate_over_regions(
+        self, sample_registry
+    ):
         """Given: Registry with regions
         When: Iterating
         Then: All region codes are accessible
@@ -119,7 +133,9 @@ def test__given_registry__then_can_iterate_over_regions(self, sample_registry):
         assert "state/ca" in codes
         assert "place/CA-44000" in codes
 
-    def test__given_existing_code__then_code_is_in_registry(self, sample_registry):
+    def test__given_existing_code__then_code_is_in_registry(
+        self, sample_registry
+    ):
         """Given: Registry with state/ca
         When: Checking if code exists
         Then: Returns True for existing, False for missing
@@ -142,7 +158,9 @@ def test__given_valid_code__then_get_returns_region(self, sample_registry):
         assert ca.label == "California"
         assert missing is None
 
-    def test__given_type__then_get_by_type_returns_matching_regions(self, sample_registry):
+    def test__given_type__then_get_by_type_returns_matching_regions(
+        self, sample_registry
+    ):
         """Given: Registry with 2 states and 1 place
         When: Getting by type
         Then: Returns correct regions for each type
@@ -158,7 +176,9 @@ def test__given_type__then_get_by_type_returns_matching_regions(self, sample_reg
         assert len(places) == 1
         assert counties == []
 
-    def test__given_registry__then_get_national_returns_national_region(self, sample_registry):
+    def test__given_registry__then_get_national_returns_national_region(
+        self, sample_registry
+    ):
         """Given: Registry with national region
         When: Getting national
         Then: Returns the national region
@@ -171,7 +191,9 @@ def test__given_registry__then_get_national_returns_national_region(self, sample
         assert national.code == "us"
         assert national.region_type == "national"
 
-    def test__given_parent_code__then_get_children_returns_child_regions(self, sample_registry):
+    def test__given_parent_code__then_get_children_returns_child_regions(
+        self, sample_registry
+    ):
         """Given: Registry with states under "us"
         When: Getting children of "us"
         Then: Returns state regions
@@ -214,7 +236,9 @@ def test__given_registry__then_get_filter_regions_returns_regions_requiring_filt
         assert len(filter_regions) == 1
         assert filter_regions[0].code == "place/CA-44000"
 
-    def test__given_registry__then_can_add_region_dynamically(self, sample_registry):
+    def test__given_registry__then_can_add_region_dynamically(
+        self, sample_registry
+    ):
         """Given: Registry with 4 regions
         When: Adding a new region
         Then: Registry contains 5 regions and new region is indexed
diff --git a/tests/test_uk_regions.py b/tests/test_uk_regions.py
index b13026f0..cbaa5328 100644
--- a/tests/test_uk_regions.py
+++ b/tests/test_uk_regions.py
@@ -66,7 +66,10 @@ def test__given_uk_registry__then_has_national_region(self):
         assert national.code == "uk"
         assert national.label == "United Kingdom"
         assert national.region_type == "national"
-        assert national.dataset_path == f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5"
+        assert (
+            national.dataset_path
+            == f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5"
+        )
         assert not national.requires_filter
 
     def test__given_uk_registry__then_has_four_country_regions(self):
@@ -188,7 +191,9 @@ def test__given_default_registry__then_has_5_regions(self):
 class TestUKRegionRegistryBuilder:
     """Tests for UK registry builder with optional regions."""
 
-    def test__given_builder_without_optional_regions__then_returns_5_regions(self):
+    def test__given_builder_without_optional_regions__then_returns_5_regions(
+        self,
+    ):
         """Given: build_uk_region_registry with optional regions disabled
         When: Building registry
         Then: Returns 5 base regions only
diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py
index 5f7d39b7..54149305 100644
--- a/tests/test_us_regions.py
+++ b/tests/test_us_regions.py
@@ -1,7 +1,10 @@
 """Tests for US region definitions."""
 
-from policyengine.countries.us.data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES
-from policyengine.countries.us.regions import US_DATA_BUCKET, us_region_registry
+from policyengine.countries.us.data import DISTRICT_COUNTS, US_STATES
+from policyengine.countries.us.regions import (
+    US_DATA_BUCKET,
+    us_region_registry,
+)
 
 
 class TestUSStates:
@@ -46,7 +49,9 @@ def test__given_district_counts__then_every_state_has_count(self):
         """
         # When/Then
         for state in US_STATES:
-            assert state in DISTRICT_COUNTS, f"Missing district count for {state}"
+            assert state in DISTRICT_COUNTS, (
+                f"Missing district count for {state}"
+            )
 
     def test__given_district_counts__then_total_is_436(self):
         """Given: DISTRICT_COUNTS dictionary
@@ -101,7 +106,9 @@ def test__given_us_registry__then_has_national_region(self):
         assert national.code == "us"
         assert national.label == "United States"
         assert national.region_type == "national"
-        assert national.dataset_path == f"{US_DATA_BUCKET}/enhanced_cps_2024.h5"
+        assert (
+            national.dataset_path == f"{US_DATA_BUCKET}/enhanced_cps_2024.h5"
+        )
 
     def test__given_us_registry__then_has_51_states(self):
         """Given: US region registry
@@ -171,7 +178,10 @@ def test__given_dc_district__then_is_at_large(self):
 
         # Then
         assert dc_al is not None
-        assert dc_al.label == "District of Columbia's at-large congressional district"
+        assert (
+            dc_al.label
+            == "District of Columbia's at-large congressional district"
+        )
         assert dc_al.parent_code == "state/dc"
 
     def test__given_us_registry__then_has_places(self):
@@ -204,14 +214,18 @@ def test__given_los_angeles_region__then_has_correct_format(self):
         assert la.state_code == "CA"
         assert la.dataset_path is None  # No dedicated dataset
 
-    def test__given_california__then_children_include_districts_and_places(self):
+    def test__given_california__then_children_include_districts_and_places(
+        self,
+    ):
         """Given: California state region
         When: Getting its children
         Then: Includes all 52 districts and 10+ places
         """
         # When
         ca_children = us_region_registry.get_children("state/ca")
-        district_children = [c for c in ca_children if c.region_type == "congressional_district"]
+        district_children = [
+            c for c in ca_children if c.region_type == "congressional_district"
+        ]
         place_children = [c for c in ca_children if c.region_type == "place"]
 
         # Then

From cf799e502168978f1f47f5bd73f221f1778e407d Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 10 Feb 2026 18:19:35 +0100
Subject: [PATCH 3/8] fix: Add conftest.py for pytest fixture discovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sample_registry fixture was not being discovered by pytest
after linting removed unused imports. Moving fixture imports to
conftest.py is the standard pytest pattern for shared fixtures.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 tests/conftest.py | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 tests/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..40c6f69b
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,7 @@
+"""Pytest configuration and shared fixtures."""
+
+# Import fixtures from fixtures module so pytest can discover them
+from tests.fixtures.region_fixtures import (  # noqa: F401
+    empty_registry,
+    sample_registry,
+)

From 65ff6de0a38050672675f6c63ed5b41b571af3c6 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 10 Feb 2026 19:50:13 +0100
Subject: [PATCH 4/8] feat: Add regional dataset filtering support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add filter_field and filter_value parameters to Simulation class
- Add _build_entity_relationships() to US and UK models for mapping
  persons to all containing entities
- Add _filter_dataset_by_household_variable() to filter datasets while
  preserving entity integrity
- Apply filtering in run() method when filter parameters are set

This enables filtering datasets by household-level variables like
place_fips (US) or country (UK) for regional analysis.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/policyengine/core/simulation.py           |  10 +
 .../tax_benefit_models/uk/model.py            | 144 +++++++++++++
 .../tax_benefit_models/us/model.py            | 189 ++++++++++++++++++
 3 files changed, 343 insertions(+)

diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py
index 0bbc4ccc..d3208be8 100644
--- a/src/policyengine/core/simulation.py
+++ b/src/policyengine/core/simulation.py
@@ -21,6 +21,16 @@ class Simulation(BaseModel):
     dynamic: Dynamic | None = None
     dataset: Dataset = None
 
+    # Regional filtering parameters
+    filter_field: str | None = Field(
+        default=None,
+        description="Household-level variable to filter dataset by (e.g., 'place_fips', 'country')",
+    )
+    filter_value: str | None = Field(
+        default=None,
+        description="Value to match when filtering (e.g., '44000', 'ENGLAND')",
+    )
+
     tax_benefit_model_version: TaxBenefitModelVersion = None
     output_dataset: Dataset | None = None
 
diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py
index 71cf78dc..dedbdc29 100644
--- a/src/policyengine/tax_benefit_models/uk/model.py
+++ b/src/policyengine/tax_benefit_models/uk/model.py
@@ -182,6 +182,143 @@ def __init__(self, **kwargs: dict):
                 )
                 self.add_parameter(parameter)
 
+    def _build_entity_relationships(
+        self, dataset: PolicyEngineUKDataset
+    ) -> pd.DataFrame:
+        """Build a DataFrame mapping each person to their containing entities.
+
+        Creates an explicit relationship map between persons and all entity
+        types (benunit, household). This enables filtering at any entity
+        level while preserving the integrity of all related entities.
+
+        Args:
+            dataset: The dataset to extract relationships from.
+
+        Returns:
+            A DataFrame indexed by person with columns for each entity ID.
+        """
+        person_data = pd.DataFrame(dataset.data.person)
+
+        # Determine column naming convention
+        benunit_id_col = (
+            "person_benunit_id"
+            if "person_benunit_id" in person_data.columns
+            else "benunit_id"
+        )
+        household_id_col = (
+            "person_household_id"
+            if "person_household_id" in person_data.columns
+            else "household_id"
+        )
+
+        entity_rel = pd.DataFrame(
+            {
+                "person_id": person_data["person_id"].values,
+                "benunit_id": person_data[benunit_id_col].values,
+                "household_id": person_data[household_id_col].values,
+            }
+        )
+
+        return entity_rel
+
+    def _filter_dataset_by_household_variable(
+        self,
+        dataset: PolicyEngineUKDataset,
+        variable_name: str,
+        variable_value: str,
+    ) -> PolicyEngineUKDataset:
+        """Filter a dataset to only include households where a variable matches.
+
+        Uses the entity relationship approach: builds an explicit map of all
+        entity relationships, filters at the household level, and keeps all
+        persons in matching households to preserve entity integrity.
+
+        Args:
+            dataset: The dataset to filter.
+            variable_name: The name of the household-level variable to filter on.
+            variable_value: The value to match. Handles both str and bytes encoding.
+
+        Returns:
+            A new filtered dataset containing only matching households.
+        """
+        # Build entity relationships
+        entity_rel = self._build_entity_relationships(dataset)
+
+        # Get household-level variable values
+        household_data = pd.DataFrame(dataset.data.household)
+
+        if variable_name not in household_data.columns:
+            raise ValueError(
+                f"Variable '{variable_name}' not found in household data. "
+                f"Available columns: {list(household_data.columns)}"
+            )
+
+        hh_values = household_data[variable_name].values
+        hh_ids = household_data["household_id"].values
+
+        # Create mask for matching households, handling bytes encoding
+        if isinstance(variable_value, str):
+            hh_mask = (hh_values == variable_value) | (
+                hh_values == variable_value.encode()
+            )
+        else:
+            hh_mask = hh_values == variable_value
+
+        matching_hh_ids = set(hh_ids[hh_mask])
+
+        if len(matching_hh_ids) == 0:
+            raise ValueError(
+                f"No households found matching {variable_name}={variable_value}"
+            )
+
+        # Filter entity_rel to persons in matching households
+        person_mask = entity_rel["household_id"].isin(matching_hh_ids)
+        filtered_entity_rel = entity_rel[person_mask]
+
+        # Get the filtered entity IDs
+        filtered_person_ids = set(filtered_entity_rel["person_id"])
+        filtered_household_ids = matching_hh_ids
+        filtered_benunit_ids = set(filtered_entity_rel["benunit_id"])
+
+        # Filter each entity DataFrame
+        person_df = pd.DataFrame(dataset.data.person)
+        household_df = pd.DataFrame(dataset.data.household)
+        benunit_df = pd.DataFrame(dataset.data.benunit)
+
+        filtered_person = person_df[
+            person_df["person_id"].isin(filtered_person_ids)
+        ]
+        filtered_household = household_df[
+            household_df["household_id"].isin(filtered_household_ids)
+        ]
+        filtered_benunit = benunit_df[
+            benunit_df["benunit_id"].isin(filtered_benunit_ids)
+        ]
+
+        # Create filtered dataset
+        return PolicyEngineUKDataset(
+            id=dataset.id + f"_filtered_{variable_name}_{variable_value}",
+            name=dataset.name,
+            description=f"{dataset.description} (filtered: {variable_name}={variable_value})",
+            filepath=dataset.filepath,
+            year=dataset.year,
+            is_output_dataset=dataset.is_output_dataset,
+            data=UKYearData(
+                person=MicroDataFrame(
+                    filtered_person.reset_index(drop=True),
+                    weights="person_weight",
+                ),
+                benunit=MicroDataFrame(
+                    filtered_benunit.reset_index(drop=True),
+                    weights="benunit_weight",
+                ),
+                household=MicroDataFrame(
+                    filtered_household.reset_index(drop=True),
+                    weights="household_weight",
+                ),
+            ),
+        )
+
     def run(self, simulation: "Simulation") -> "Simulation":
         from policyengine_uk import Microsimulation
         from policyengine_uk.data import UKSingleYearDataset
@@ -194,6 +331,13 @@ def run(self, simulation: "Simulation") -> "Simulation":
 
         dataset = simulation.dataset
         dataset.load()
+
+        # Apply regional filtering if specified
+        if simulation.filter_field and simulation.filter_value:
+            dataset = self._filter_dataset_by_household_variable(
+                dataset, simulation.filter_field, simulation.filter_value
+            )
+
         input_data = UKSingleYearDataset(
             person=dataset.data.person,
             benunit=dataset.data.benunit,
diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py
index b5191a19..1860c5c0 100644
--- a/src/policyengine/tax_benefit_models/us/model.py
+++ b/src/policyengine/tax_benefit_models/us/model.py
@@ -175,6 +175,189 @@ def __init__(self, **kwargs: dict):
                 )
                 self.add_parameter(parameter)
 
+    def _build_entity_relationships(
+        self, dataset: PolicyEngineUSDataset
+    ) -> pd.DataFrame:
+        """Build a DataFrame mapping each person to their containing entities.
+
+        Creates an explicit relationship map between persons and all entity
+        types (household, tax_unit, spm_unit, family, marital_unit). This
+        enables filtering at any entity level while preserving the integrity
+        of all related entities.
+
+        Args:
+            dataset: The dataset to extract relationships from.
+
+        Returns:
+            A DataFrame indexed by person with columns for each entity ID.
+        """
+        person_data = pd.DataFrame(dataset.data.person)
+
+        # Determine column naming convention
+        household_id_col = (
+            "person_household_id"
+            if "person_household_id" in person_data.columns
+            else "household_id"
+        )
+        tax_unit_id_col = (
+            "person_tax_unit_id"
+            if "person_tax_unit_id" in person_data.columns
+            else "tax_unit_id"
+        )
+        spm_unit_id_col = (
+            "person_spm_unit_id"
+            if "person_spm_unit_id" in person_data.columns
+            else "spm_unit_id"
+        )
+        family_id_col = (
+            "person_family_id"
+            if "person_family_id" in person_data.columns
+            else "family_id"
+        )
+        marital_unit_id_col = (
+            "person_marital_unit_id"
+            if "person_marital_unit_id" in person_data.columns
+            else "marital_unit_id"
+        )
+
+        entity_rel = pd.DataFrame(
+            {
+                "person_id": person_data["person_id"].values,
+                "household_id": person_data[household_id_col].values,
+                "tax_unit_id": person_data[tax_unit_id_col].values,
+                "spm_unit_id": person_data[spm_unit_id_col].values,
+                "family_id": person_data[family_id_col].values,
+                "marital_unit_id": person_data[marital_unit_id_col].values,
+            }
+        )
+
+        return entity_rel
+
+    def _filter_dataset_by_household_variable(
+        self,
+        dataset: PolicyEngineUSDataset,
+        variable_name: str,
+        variable_value: str,
+    ) -> PolicyEngineUSDataset:
+        """Filter a dataset to only include households where a variable matches.
+
+        Uses the entity relationship approach: builds an explicit map of all
+        entity relationships, filters at the household level, and keeps all
+        persons in matching households to preserve entity integrity.
+
+        Args:
+            dataset: The dataset to filter.
+            variable_name: The name of the household-level variable to filter on.
+            variable_value: The value to match. Handles both str and bytes encoding.
+
+        Returns:
+            A new filtered dataset containing only matching households.
+        """
+        # Build entity relationships
+        entity_rel = self._build_entity_relationships(dataset)
+
+        # Get household-level variable values
+        household_data = pd.DataFrame(dataset.data.household)
+
+        if variable_name not in household_data.columns:
+            raise ValueError(
+                f"Variable '{variable_name}' not found in household data. "
+                f"Available columns: {list(household_data.columns)}"
+            )
+
+        hh_values = household_data[variable_name].values
+        hh_ids = household_data["household_id"].values
+
+        # Create mask for matching households, handling bytes encoding
+        if isinstance(variable_value, str):
+            hh_mask = (hh_values == variable_value) | (
+                hh_values == variable_value.encode()
+            )
+        else:
+            hh_mask = hh_values == variable_value
+
+        matching_hh_ids = set(hh_ids[hh_mask])
+
+        if len(matching_hh_ids) == 0:
+            raise ValueError(
+                f"No households found matching {variable_name}={variable_value}"
+            )
+
+        # Filter entity_rel to persons in matching households
+        person_mask = entity_rel["household_id"].isin(matching_hh_ids)
+        filtered_entity_rel = entity_rel[person_mask]
+
+        # Get the filtered entity IDs
+        filtered_person_ids = set(filtered_entity_rel["person_id"])
+        filtered_household_ids = matching_hh_ids
+        filtered_tax_unit_ids = set(filtered_entity_rel["tax_unit_id"])
+        filtered_spm_unit_ids = set(filtered_entity_rel["spm_unit_id"])
+        filtered_family_ids = set(filtered_entity_rel["family_id"])
+        filtered_marital_unit_ids = set(filtered_entity_rel["marital_unit_id"])
+
+        # Filter each entity DataFrame
+        person_df = pd.DataFrame(dataset.data.person)
+        household_df = pd.DataFrame(dataset.data.household)
+        tax_unit_df = pd.DataFrame(dataset.data.tax_unit)
+        spm_unit_df = pd.DataFrame(dataset.data.spm_unit)
+        family_df = pd.DataFrame(dataset.data.family)
+        marital_unit_df = pd.DataFrame(dataset.data.marital_unit)
+
+        filtered_person = person_df[
+            person_df["person_id"].isin(filtered_person_ids)
+        ]
+        filtered_household = household_df[
+            household_df["household_id"].isin(filtered_household_ids)
+        ]
+        filtered_tax_unit = tax_unit_df[
+            tax_unit_df["tax_unit_id"].isin(filtered_tax_unit_ids)
+        ]
+        filtered_spm_unit = spm_unit_df[
+            spm_unit_df["spm_unit_id"].isin(filtered_spm_unit_ids)
+        ]
+        filtered_family = family_df[
+            family_df["family_id"].isin(filtered_family_ids)
+        ]
+        filtered_marital_unit = marital_unit_df[
+            marital_unit_df["marital_unit_id"].isin(filtered_marital_unit_ids)
+        ]
+
+        # Create filtered dataset
+        return PolicyEngineUSDataset(
+            id=dataset.id + f"_filtered_{variable_name}_{variable_value}",
+            name=dataset.name,
+            description=f"{dataset.description} (filtered: {variable_name}={variable_value})",
+            filepath=dataset.filepath,
+            year=dataset.year,
+            is_output_dataset=dataset.is_output_dataset,
+            data=USYearData(
+                person=MicroDataFrame(
+                    filtered_person.reset_index(drop=True),
+                    weights="person_weight",
+                ),
+                household=MicroDataFrame(
+                    filtered_household.reset_index(drop=True),
+                    weights="household_weight",
+                ),
+                tax_unit=MicroDataFrame(
+                    filtered_tax_unit.reset_index(drop=True),
+                    weights="tax_unit_weight",
+                ),
+                spm_unit=MicroDataFrame(
+                    filtered_spm_unit.reset_index(drop=True),
+                    weights="spm_unit_weight",
+                ),
+                family=MicroDataFrame(
+                    filtered_family.reset_index(drop=True),
+                    weights="family_weight",
+                ),
+                marital_unit=MicroDataFrame(
+                    filtered_marital_unit.reset_index(drop=True),
+                    weights="marital_unit_weight",
+                ),
+            ),
+        )
+
     def run(self, simulation: "Simulation") -> "Simulation":
         from policyengine_us import Microsimulation
         from policyengine_us.system import system
@@ -188,6 +371,12 @@ def run(self, simulation: "Simulation") -> "Simulation":
         dataset = simulation.dataset
         dataset.load()
 
+        # Apply regional filtering if specified
+        if simulation.filter_field and simulation.filter_value:
+            dataset = self._filter_dataset_by_household_variable(
+                dataset, simulation.filter_field, simulation.filter_value
+            )
+
         # Build simulation from entity IDs using PolicyEngine Core pattern
         microsim = Microsimulation()
         self._build_simulation_from_dataset(microsim, dataset, system)

From ab42f8a4baca4e189957d923fa4a1b9a8e60fce5 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 10 Feb 2026 20:11:28 +0100
Subject: [PATCH 5/8] test: Add unit tests for dataset filtering functionality
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add filtering_fixtures.py with US and UK test datasets
- Add 18 unit tests for _build_entity_relationships and
  _filter_dataset_by_household_variable methods
- Tests follow given-when-then pattern
- All tests pass

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 tests/conftest.py                    |   4 +
 tests/fixtures/filtering_fixtures.py | 165 ++++++++++
 tests/test_filtering.py              | 439 +++++++++++++++++++++++++++
 3 files changed, 608 insertions(+)
 create mode 100644 tests/fixtures/filtering_fixtures.py
 create mode 100644 tests/test_filtering.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 40c6f69b..a54a3d79 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,10 @@
 """Pytest configuration and shared fixtures."""
 
 # Import fixtures from fixtures module so pytest can discover them
+from tests.fixtures.filtering_fixtures import (  # noqa: F401
+    uk_test_dataset,
+    us_test_dataset,
+)
 from tests.fixtures.region_fixtures import (  # noqa: F401
     empty_registry,
     sample_registry,
diff --git a/tests/fixtures/filtering_fixtures.py b/tests/fixtures/filtering_fixtures.py
new file mode 100644
index 00000000..074f6655
--- /dev/null
+++ b/tests/fixtures/filtering_fixtures.py
@@ -0,0 +1,165 @@
+"""Fixtures for testing dataset filtering functionality."""
+
+import pandas as pd
+import pytest
+from microdf import MicroDataFrame
+
+from policyengine.tax_benefit_models.uk.datasets import (
+    PolicyEngineUKDataset,
+    UKYearData,
+)
+from policyengine.tax_benefit_models.us.datasets import (
+    PolicyEngineUSDataset,
+    USYearData,
+)
+
+
+def create_us_test_dataset() -> PolicyEngineUSDataset:
+    """Create a minimal US dataset for filtering tests.
+
+    Creates a dataset with 6 persons across 3 households:
+    - Household 1 (place_fips="44000"): 2 persons
+    - Household 2 (place_fips="44000"): 2 persons
+    - Household 3 (place_fips="57000"): 2 persons
+    """
+    # Person data - 6 persons across 3 households
+    person_data = pd.DataFrame(
+        {
+            "person_id": [1, 2, 3, 4, 5, 6],
+            "household_id": [1, 1, 2, 2, 3, 3],
+            "tax_unit_id": [1, 1, 2, 2, 3, 3],
+            "spm_unit_id": [1, 1, 2, 2, 3, 3],
+            "family_id": [1, 1, 2, 2, 3, 3],
+            "marital_unit_id": [1, 1, 2, 2, 3, 3],
+            "person_weight": [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0],
+            "age": [35, 30, 45, 40, 25, 28],
+        }
+    )
+
+    # Household data - 3 households, 2 in place 44000, 1 in place 57000
+    household_data = pd.DataFrame(
+        {
+            "household_id": [1, 2, 3],
+            "household_weight": [1000.0, 1000.0, 1000.0],
+            "place_fips": ["44000", "44000", "57000"],
+            "state_fips": [6, 6, 34],  # CA, CA, NJ
+        }
+    )
+
+    # Tax unit data
+    tax_unit_data = pd.DataFrame(
+        {
+            "tax_unit_id": [1, 2, 3],
+            "tax_unit_weight": [1000.0, 1000.0, 1000.0],
+        }
+    )
+
+    # SPM unit data
+    spm_unit_data = pd.DataFrame(
+        {
+            "spm_unit_id": [1, 2, 3],
+            "spm_unit_weight": [1000.0, 1000.0, 1000.0],
+        }
+    )
+
+    # Family data
+    family_data = pd.DataFrame(
+        {
+            "family_id": [1, 2, 3],
+            "family_weight": [1000.0, 1000.0, 1000.0],
+        }
+    )
+
+    # Marital unit data
+    marital_unit_data = pd.DataFrame(
+        {
+            "marital_unit_id": [1, 2, 3],
+            "marital_unit_weight": [1000.0, 1000.0, 1000.0],
+        }
+    )
+
+    return PolicyEngineUSDataset(
+        id="test_us_dataset",
+        name="Test US Dataset",
+        description="Test dataset for filtering",
+        filepath="/tmp/test_us.h5",
+        year=2024,
+        is_output_dataset=False,
+        data=USYearData(
+            person=MicroDataFrame(person_data, weights="person_weight"),
+            household=MicroDataFrame(
+                household_data, weights="household_weight"
+            ),
+            tax_unit=MicroDataFrame(tax_unit_data, weights="tax_unit_weight"),
+            spm_unit=MicroDataFrame(spm_unit_data, weights="spm_unit_weight"),
+            family=MicroDataFrame(family_data, weights="family_weight"),
+            marital_unit=MicroDataFrame(
+                marital_unit_data, weights="marital_unit_weight"
+            ),
+        ),
+    )
+
+
+def create_uk_test_dataset() -> PolicyEngineUKDataset:
+    """Create a minimal UK dataset for filtering tests.
+
+    Creates a dataset with 6 persons across 3 households:
+    - Household 1 (country="ENGLAND"): 2 persons
+    - Household 2 (country="ENGLAND"): 2 persons
+    - Household 3 (country="SCOTLAND"): 2 persons
+    """
+    # Person data - 6 persons across 3 households
+    person_data = pd.DataFrame(
+        {
+            "person_id": [1, 2, 3, 4, 5, 6],
+            "benunit_id": [1, 1, 2, 2, 3, 3],
+            "household_id": [1, 1, 2, 2, 3, 3],
+            "person_weight": [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0],
+            "age": [35, 30, 45, 40, 25, 28],
+        }
+    )
+
+    # Benunit data
+    benunit_data = pd.DataFrame(
+        {
+            "benunit_id": [1, 2, 3],
+            "benunit_weight": [1000.0, 1000.0, 1000.0],
+        }
+    )
+
+    # Household data - 3 households, 2 in England, 1 in Scotland
+    household_data = pd.DataFrame(
+        {
+            "household_id": [1, 2, 3],
+            "household_weight": [1000.0, 1000.0, 1000.0],
+            "country": ["ENGLAND", "ENGLAND", "SCOTLAND"],
+        }
+    )
+
+    return PolicyEngineUKDataset(
+        id="test_uk_dataset",
+        name="Test UK Dataset",
+        description="Test dataset for filtering",
+        filepath="/tmp/test_uk.h5",
+        year=2024,
+        is_output_dataset=False,
+        data=UKYearData(
+            person=MicroDataFrame(person_data, weights="person_weight"),
+            benunit=MicroDataFrame(benunit_data, weights="benunit_weight"),
+            household=MicroDataFrame(
+                household_data, weights="household_weight"
+            ),
+        ),
+    )
+
+
+@pytest.fixture
+def us_test_dataset() -> PolicyEngineUSDataset:
+    """Pytest fixture for US test dataset."""
+    return create_us_test_dataset()
+
+
+@pytest.fixture
+def uk_test_dataset() -> PolicyEngineUKDataset:
+    """Pytest fixture for UK test dataset."""
+    return create_uk_test_dataset()
diff --git a/tests/test_filtering.py b/tests/test_filtering.py
new file mode 100644
index 00000000..54c5c9af
--- /dev/null
+++ b/tests/test_filtering.py
@@ -0,0 +1,439 @@
+"""Tests for dataset filtering functionality.
+
+Tests the _build_entity_relationships and _filter_dataset_by_household_variable
+methods in both US and UK models.
+"""
+
+import pandas as pd
+import pytest
+
+from policyengine.core.simulation import Simulation
+
+
+class TestSimulationFilterParameters:
+    """Tests for Simulation filter_field and filter_value parameters."""
+
+    def test__given_no_filter_params__then_simulation_has_none_values(self):
+        """Given: Simulation created without filter parameters
+        When: Accessing filter_field and filter_value
+        Then: Both are None
+        """
+        # When
+        simulation = Simulation()
+
+        # Then
+        assert simulation.filter_field is None
+        assert simulation.filter_value is None
+
+    def test__given_filter_params__then_simulation_stores_them(self):
+        """Given: Simulation created with filter parameters
+        When: Accessing filter_field and filter_value
+        Then: Values are stored correctly
+        """
+        # When
+        simulation = Simulation(
+            filter_field="place_fips",
+            filter_value="44000",
+        )
+
+        # Then
+        assert simulation.filter_field == "place_fips"
+        assert simulation.filter_value == "44000"
+
+
+class TestUSBuildEntityRelationships:
+    """Tests for US model _build_entity_relationships method."""
+
+    def test__given_us_dataset__then_entity_relationships_has_all_columns(
+        self, us_test_dataset
+    ):
+        """Given: US dataset with persons and entities
+        When: Building entity relationships
+        Then: DataFrame has all entity ID columns
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        entity_rel = model._build_entity_relationships(us_test_dataset)
+
+        # Then
+        expected_columns = {
+            "person_id",
+            "household_id",
+            "tax_unit_id",
+            "spm_unit_id",
+            "family_id",
+            "marital_unit_id",
+        }
+        assert set(entity_rel.columns) == expected_columns
+
+    def test__given_us_dataset__then_entity_relationships_has_correct_row_count(
+        self, us_test_dataset
+    ):
+        """Given: US dataset with 6 persons
+        When: Building entity relationships
+        Then: DataFrame has 6 rows (one per person)
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        entity_rel = model._build_entity_relationships(us_test_dataset)
+
+        # Then
+        assert len(entity_rel) == 6
+
+    def test__given_us_dataset__then_entity_relationships_preserves_mappings(
+        self, us_test_dataset
+    ):
+        """Given: US dataset where persons 1,2 belong to household 1
+        When: Building entity relationships
+        Then: Mappings are preserved correctly
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        entity_rel = model._build_entity_relationships(us_test_dataset)
+
+        # Then
+        person_1_row = entity_rel[entity_rel["person_id"] == 1].iloc[0]
+        assert person_1_row["household_id"] == 1
+        assert person_1_row["tax_unit_id"] == 1
+
+
+class TestUSFilterDatasetByHouseholdVariable:
+    """Tests for US model _filter_dataset_by_household_variable method."""
+
+    def test__given_filter_by_place_fips__then_returns_matching_households(
+        self, us_test_dataset
+    ):
+        """Given: US dataset with households in places 44000 and 57000
+        When: Filtering by place_fips=44000
+        Then: Returns only households in place 44000
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            us_test_dataset,
+            variable_name="place_fips",
+            variable_value="44000",
+        )
+
+        # Then
+        household_df = pd.DataFrame(filtered.data.household)
+        assert len(household_df) == 2
+        assert all(household_df["place_fips"] == "44000")
+
+    def test__given_filter_by_place_fips__then_preserves_related_persons(
+        self, us_test_dataset
+    ):
+        """Given: US dataset with 4 persons in place 44000
+        When: Filtering by place_fips=44000
+        Then: Returns all 4 persons in matching households
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            us_test_dataset,
+            variable_name="place_fips",
+            variable_value="44000",
+        )
+
+        # Then
+        person_df = pd.DataFrame(filtered.data.person)
+        assert len(person_df) == 4
+        assert set(person_df["person_id"]) == {1, 2, 3, 4}
+
+    def test__given_filter_by_place_fips__then_preserves_related_entities(
+        self, us_test_dataset
+    ):
+        """Given: US dataset with 2 tax units in place 44000
+        When: Filtering by place_fips=44000
+        Then: Returns all related entities (tax_unit, spm_unit, etc.)
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            us_test_dataset,
+            variable_name="place_fips",
+            variable_value="44000",
+        )
+
+        # Then
+        assert len(pd.DataFrame(filtered.data.tax_unit)) == 2
+        assert len(pd.DataFrame(filtered.data.spm_unit)) == 2
+        assert len(pd.DataFrame(filtered.data.family)) == 2
+        assert len(pd.DataFrame(filtered.data.marital_unit)) == 2
+
+    def test__given_no_matching_households__then_raises_value_error(
+        self, us_test_dataset
+    ):
+        """Given: US dataset with no households matching filter
+        When: Filtering by place_fips=99999
+        Then: Raises ValueError
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # Then
+        with pytest.raises(ValueError, match="No households found"):
+            model._filter_dataset_by_household_variable(
+                us_test_dataset,
+                variable_name="place_fips",
+                variable_value="99999",
+            )
+
+    def test__given_invalid_variable_name__then_raises_value_error(
+        self, us_test_dataset
+    ):
+        """Given: US dataset
+        When: Filtering by non-existent variable
+        Then: Raises ValueError with helpful message
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # Then
+        with pytest.raises(ValueError, match="not found in household data"):
+            model._filter_dataset_by_household_variable(
+                us_test_dataset,
+                variable_name="nonexistent_var",
+                variable_value="value",
+            )
+
+    def test__given_filtered_dataset__then_has_updated_metadata(
+        self, us_test_dataset
+    ):
+        """Given: US dataset
+        When: Filtering by place_fips
+        Then: Filtered dataset has updated id and description
+        """
+        # Given
+        from policyengine.tax_benefit_models.us.model import (
+            PolicyEngineUSLatest,
+        )
+
+        model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            us_test_dataset,
+            variable_name="place_fips",
+            variable_value="44000",
+        )
+
+        # Then
+        assert "filtered" in filtered.id
+        assert "place_fips=44000" in filtered.description
+
+
+class TestUKBuildEntityRelationships:
+    """Tests for UK model _build_entity_relationships method."""
+
+    def test__given_uk_dataset__then_entity_relationships_has_all_columns(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset with persons and entities
+        When: Building entity relationships
+        Then: DataFrame has all entity ID columns
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # When
+        entity_rel = model._build_entity_relationships(uk_test_dataset)
+
+        # Then
+        expected_columns = {"person_id", "benunit_id", "household_id"}
+        assert set(entity_rel.columns) == expected_columns
+
+    def test__given_uk_dataset__then_entity_relationships_has_correct_row_count(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset with 6 persons
+        When: Building entity relationships
+        Then: DataFrame has 6 rows (one per person)
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # When
+        entity_rel = model._build_entity_relationships(uk_test_dataset)
+
+        # Then
+        assert len(entity_rel) == 6
+
+
+class TestUKFilterDatasetByHouseholdVariable:
+    """Tests for UK model _filter_dataset_by_household_variable method."""
+
+    def test__given_filter_by_country__then_returns_matching_households(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset with households in England and Scotland
+        When: Filtering by country=ENGLAND
+        Then: Returns only households in England
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            uk_test_dataset,
+            variable_name="country",
+            variable_value="ENGLAND",
+        )
+
+        # Then
+        household_df = pd.DataFrame(filtered.data.household)
+        assert len(household_df) == 2
+        assert all(household_df["country"] == "ENGLAND")
+
+    def test__given_filter_by_country__then_preserves_related_persons(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset with 4 persons in England
+        When: Filtering by country=ENGLAND
+        Then: Returns all 4 persons in matching households
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            uk_test_dataset,
+            variable_name="country",
+            variable_value="ENGLAND",
+        )
+
+        # Then
+        person_df = pd.DataFrame(filtered.data.person)
+        assert len(person_df) == 4
+        assert set(person_df["person_id"]) == {1, 2, 3, 4}
+
+    def test__given_filter_by_country__then_preserves_related_benunits(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset with 2 benunits in England
+        When: Filtering by country=ENGLAND
+        Then: Returns all related benunits
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            uk_test_dataset,
+            variable_name="country",
+            variable_value="ENGLAND",
+        )
+
+        # Then
+        assert len(pd.DataFrame(filtered.data.benunit)) == 2
+
+    def test__given_no_matching_households__then_raises_value_error(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset with no households matching filter
+        When: Filtering by country=WALES
+        Then: Raises ValueError
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # Then
+        with pytest.raises(ValueError, match="No households found"):
+            model._filter_dataset_by_household_variable(
+                uk_test_dataset,
+                variable_name="country",
+                variable_value="WALES",
+            )
+
+    def test__given_filtered_dataset__then_has_updated_metadata(
+        self, uk_test_dataset
+    ):
+        """Given: UK dataset
+        When: Filtering by country
+        Then: Filtered dataset has updated id and description
+        """
+        # Given
+        from policyengine.tax_benefit_models.uk.model import (
+            PolicyEngineUKLatest,
+        )
+
+        model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest)
+
+        # When
+        filtered = model._filter_dataset_by_household_variable(
+            uk_test_dataset,
+            variable_name="country",
+            variable_value="ENGLAND",
+        )
+
+        # Then
+        assert "filtered" in filtered.id
+        assert "country=ENGLAND" in filtered.description

From 096ffeeaac20db964bdca04fb062520a19b6f61b Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Wed, 11 Feb 2026 21:26:35 +0100
Subject: [PATCH 6/8] fix: Apply US reforms at Microsimulation construction
 time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The US country package uses a shared singleton TaxBenefitSystem, which
means p.update() after Microsimulation construction has no effect on
calculations. This fix:

- Adds reform_dict_from_parameter_values() utility to convert
  ParameterValue objects to the dict format accepted by Microsimulation
- Updates US model.py to build reform dict and pass it at construction
  time instead of using simulation_modifier (p.update) after
- Adds comprehensive unit tests for the utility function and US reform
  application

The UK model continues to use p.update() since policyengine-uk was
refactored to give each simulation its own TaxBenefitSystem instance.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../tax_benefit_models/us/model.py            |  72 +++--
 src/policyengine/utils/parametric_reforms.py  |  40 +++
 tests/conftest.py                             |  13 +
 tests/fixtures/parametric_reforms_fixtures.py | 136 +++++++++
 tests/fixtures/us_reform_fixtures.py          | 124 +++++++++
 tests/test_parametric_reforms.py              | 262 ++++++++++++++++++
 tests/test_us_reform_application.py           | 155 +++++++++++
 7 files changed, 775 insertions(+), 27 deletions(-)
 create mode 100644 tests/fixtures/parametric_reforms_fixtures.py
 create mode 100644 tests/fixtures/us_reform_fixtures.py
 create mode 100644 tests/test_parametric_reforms.py
 create mode 100644 tests/test_us_reform_application.py

diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py
index 1860c5c0..0db59259 100644
--- a/src/policyengine/tax_benefit_models/us/model.py
+++ b/src/policyengine/tax_benefit_models/us/model.py
@@ -363,7 +363,7 @@ def run(self, simulation: "Simulation") -> "Simulation":
         from policyengine_us.system import system
 
         from policyengine.utils.parametric_reforms import (
-            simulation_modifier_from_parameter_values,
+            reform_dict_from_parameter_values,
         )
 
         assert isinstance(simulation.dataset, PolicyEngineUSDataset)
@@ -377,33 +377,51 @@ def run(self, simulation: "Simulation") -> "Simulation":
                 dataset, simulation.filter_field, simulation.filter_value
             )
 
-        # Build simulation from entity IDs using PolicyEngine Core pattern
-        microsim = Microsimulation()
-        self._build_simulation_from_dataset(microsim, dataset, system)
+        # Build reform dict from policy and dynamic parameter values
+        # US requires reforms to be passed at Microsimulation construction time
+        # (unlike UK which supports p.update() after construction)
+        reform_dict = None
+
+        # Collect policy reforms
+        if simulation.policy:
+            if simulation.policy.simulation_modifier is not None:
+                # Custom simulation modifier - extract parameter values if available
+                # Fall back to parameter_values if no custom modifier logic needed
+                if simulation.policy.parameter_values:
+                    reform_dict = reform_dict_from_parameter_values(
+                        simulation.policy.parameter_values
+                    )
+            elif simulation.policy.parameter_values:
+                reform_dict = reform_dict_from_parameter_values(
+                    simulation.policy.parameter_values
+                )
 
-        # Apply policy reforms
-        if (
-            simulation.policy
-            and simulation.policy.simulation_modifier is not None
-        ):
-            simulation.policy.simulation_modifier(microsim)
-        elif simulation.policy:
-            modifier = simulation_modifier_from_parameter_values(
-                simulation.policy.parameter_values
-            )
-            modifier(microsim)
-
-        # Apply dynamic reforms
-        if (
-            simulation.dynamic
-            and simulation.dynamic.simulation_modifier is not None
-        ):
-            simulation.dynamic.simulation_modifier(microsim)
-        elif simulation.dynamic:
-            modifier = simulation_modifier_from_parameter_values(
-                simulation.dynamic.parameter_values
-            )
-            modifier(microsim)
+        # Merge dynamic reforms into reform_dict
+        if simulation.dynamic:
+            dynamic_reform = None
+            if simulation.dynamic.simulation_modifier is not None:
+                if simulation.dynamic.parameter_values:
+                    dynamic_reform = reform_dict_from_parameter_values(
+                        simulation.dynamic.parameter_values
+                    )
+            elif simulation.dynamic.parameter_values:
+                dynamic_reform = reform_dict_from_parameter_values(
+                    simulation.dynamic.parameter_values
+                )
+
+            if dynamic_reform:
+                if reform_dict is None:
+                    reform_dict = dynamic_reform
+                else:
+                    # Merge dynamic reforms into policy reforms
+                    for param_name, period_values in dynamic_reform.items():
+                        if param_name not in reform_dict:
+                            reform_dict[param_name] = {}
+                        reform_dict[param_name].update(period_values)
+
+        # Create Microsimulation with reform at construction time
+        microsim = Microsimulation(reform=reform_dict)
+        self._build_simulation_from_dataset(microsim, dataset, system)
 
         data = {
             "person": pd.DataFrame(),
diff --git a/src/policyengine/utils/parametric_reforms.py b/src/policyengine/utils/parametric_reforms.py
index 7d7a869a..7a9494a5 100644
--- a/src/policyengine/utils/parametric_reforms.py
+++ b/src/policyengine/utils/parametric_reforms.py
@@ -5,6 +5,46 @@
 from policyengine.core import ParameterValue
 
 
+def reform_dict_from_parameter_values(
+    parameter_values: list[ParameterValue],
+) -> dict:
+    """
+    Convert a list of ParameterValue objects to a reform dict format.
+
+    This format is accepted by policyengine_us.Microsimulation(reform=...) and
+    policyengine_uk.Microsimulation(reform=...) at construction time.
+
+    Args:
+        parameter_values: List of ParameterValue objects to convert.
+
+    Returns:
+        A dict mapping parameter names to period-value dicts, e.g.:
+        {
+            "gov.irs.deductions.standard.amount.SINGLE": {
+                "2024-01-01": 29200
+            }
+        }
+    """
+    if not parameter_values:
+        return None
+
+    reform_dict = {}
+    for pv in parameter_values:
+        param_name = pv.parameter.name
+        if param_name not in reform_dict:
+            reform_dict[param_name] = {}
+
+        # Format the period string
+        period_str = pv.start_date.strftime("%Y-%m-%d")
+        if pv.end_date:
+            # Use period range format: "start.end"
+            period_str = f"{period_str}.{pv.end_date.strftime('%Y-%m-%d')}"
+
+        reform_dict[param_name][period_str] = pv.value
+
+    return reform_dict
+
+
 def simulation_modifier_from_parameter_values(
     parameter_values: list[ParameterValue],
 ) -> Callable:
diff --git a/tests/conftest.py b/tests/conftest.py
index a54a3d79..75e2b55c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,7 +5,20 @@
     uk_test_dataset,
     us_test_dataset,
 )
+from tests.fixtures.parametric_reforms_fixtures import (  # noqa: F401
+    mock_param_joint,
+    mock_param_single,
+    multi_period_param_values,
+    multiple_different_params,
+    param_value_with_end_date,
+    single_param_value,
+)
 from tests.fixtures.region_fixtures import (  # noqa: F401
     empty_registry,
     sample_registry,
 )
+from tests.fixtures.us_reform_fixtures import (  # noqa: F401
+    double_standard_deduction_policy,
+    high_income_single_filer,
+    married_couple_with_kids,
+)
diff --git a/tests/fixtures/parametric_reforms_fixtures.py b/tests/fixtures/parametric_reforms_fixtures.py
new file mode 100644
index 00000000..98bc7aa2
--- /dev/null
+++ b/tests/fixtures/parametric_reforms_fixtures.py
@@ -0,0 +1,136 @@
+"""Fixtures for parametric reforms tests."""
+
+from datetime import date
+from unittest.mock import MagicMock
+
+import pytest
+
+from policyengine.core import Parameter, ParameterValue
+
+
+def create_mock_parameter(
+    name: str = "gov.test.param",
+    label: str = "Test Parameter",
+) -> Parameter:
+    """Create a mock Parameter for testing."""
+    param = MagicMock(spec=Parameter)
+    param.name = name
+    param.label = label
+    return param
+
+
+def create_parameter_value(
+    parameter: Parameter,
+    value: float,
+    start_date: date,
+    end_date: date | None = None,
+) -> ParameterValue:
+    """Create a ParameterValue for testing."""
+    return ParameterValue(
+        parameter=parameter,
+        value=value,
+        start_date=start_date,
+        end_date=end_date,
+    )
+
+
+# Pre-built fixtures for common test scenarios
+
+MOCK_PARAM_SINGLE = create_mock_parameter(
+    name="gov.irs.deductions.standard.amount.SINGLE",
+    label="Standard Deduction (Single)",
+)
+
+MOCK_PARAM_JOINT = create_mock_parameter(
+    name="gov.irs.deductions.standard.amount.JOINT",
+    label="Standard Deduction (Joint)",
+)
+
+MOCK_PARAM_TAX_RATE = create_mock_parameter(
+    name="gov.irs.income_tax.rates.bracket_1.rate",
+    label="Tax Rate Bracket 1",
+)
+
+# Single parameter value
+SINGLE_PARAM_VALUE = create_parameter_value(
+    parameter=MOCK_PARAM_SINGLE,
+    value=29200,
+    start_date=date(2024, 1, 1),
+)
+
+# Parameter value with end date
+PARAM_VALUE_WITH_END_DATE = create_parameter_value(
+    parameter=MOCK_PARAM_SINGLE,
+    value=29200,
+    start_date=date(2024, 1, 1),
+    end_date=date(2024, 12, 31),
+)
+
+# Multiple parameter values for the same parameter (different periods)
+MULTI_PERIOD_PARAM_VALUES = [
+    create_parameter_value(
+        parameter=MOCK_PARAM_SINGLE,
+        value=29200,
+        start_date=date(2024, 1, 1),
+    ),
+    create_parameter_value(
+        parameter=MOCK_PARAM_SINGLE,
+        value=30000,
+        start_date=date(2025, 1, 1),
+    ),
+]
+
+# Multiple different parameters
+MULTIPLE_DIFFERENT_PARAMS = [
+    create_parameter_value(
+        parameter=MOCK_PARAM_SINGLE,
+        value=29200,
+        start_date=date(2024, 1, 1),
+    ),
+    create_parameter_value(
+        parameter=MOCK_PARAM_JOINT,
+        value=58400,
+        start_date=date(2024, 1, 1),
+    ),
+    create_parameter_value(
+        parameter=MOCK_PARAM_TAX_RATE,
+        value=0.10,
+        start_date=date(2024, 1, 1),
+    ),
+]
+
+
+@pytest.fixture
+def mock_param_single():
+    """Pytest fixture for a mock single filer parameter."""
+    return MOCK_PARAM_SINGLE
+
+
+@pytest.fixture
+def mock_param_joint():
+    """Pytest fixture for a mock joint filer parameter."""
+    return MOCK_PARAM_JOINT
+
+
+@pytest.fixture
+def single_param_value():
+    """Pytest fixture for a single parameter value."""
+    return SINGLE_PARAM_VALUE
+
+
+@pytest.fixture
+def param_value_with_end_date():
+    """Pytest fixture for a parameter value with end date."""
+    return PARAM_VALUE_WITH_END_DATE
+
+
+@pytest.fixture
+def multi_period_param_values():
+    """Pytest fixture for multiple values of the same parameter."""
+    return MULTI_PERIOD_PARAM_VALUES
+
+
+@pytest.fixture
+def multiple_different_params():
+    """Pytest fixture for multiple different parameters."""
+    return MULTIPLE_DIFFERENT_PARAMS
diff --git a/tests/fixtures/us_reform_fixtures.py b/tests/fixtures/us_reform_fixtures.py
new file mode 100644
index 00000000..c52a7aba
--- /dev/null
+++ b/tests/fixtures/us_reform_fixtures.py
@@ -0,0 +1,124 @@
+"""Fixtures for US reform application tests."""
+
+from datetime import date
+
+import pytest
+
+from policyengine.core import ParameterValue, Policy
+from policyengine.tax_benefit_models.us import USHouseholdInput, us_latest
+
+
+def create_standard_deduction_policy(
+    single_value: float = 29200,
+    joint_value: float = 58400,
+    year: int = 2024,
+) -> Policy:
+    """Create a policy that sets standard deduction values."""
+    std_deduction_single = us_latest.get_parameter(
+        "gov.irs.deductions.standard.amount.SINGLE"
+    )
+    std_deduction_joint = us_latest.get_parameter(
+        "gov.irs.deductions.standard.amount.JOINT"
+    )
+
+    return Policy(
+        name=f"Standard Deduction: ${single_value:,.0f} single, ${joint_value:,.0f} joint",
+        parameter_values=[
+            ParameterValue(
+                parameter=std_deduction_single,
+                value=single_value,
+                start_date=date(year, 1, 1),
+            ),
+            ParameterValue(
+                parameter=std_deduction_joint,
+                value=joint_value,
+                start_date=date(year, 1, 1),
+            ),
+        ],
+    )
+
+
+# Pre-built policy fixtures
+
+DOUBLE_STANDARD_DEDUCTION_POLICY = create_standard_deduction_policy(
+    single_value=14600 * 2,  # Double from $14,600 to $29,200
+    joint_value=29200 * 2,  # Double from $29,200 to $58,400
+)
+
+ZERO_STANDARD_DEDUCTION_POLICY = create_standard_deduction_policy(
+    single_value=0,
+    joint_value=0,
+)
+
+LARGE_STANDARD_DEDUCTION_POLICY = create_standard_deduction_policy(
+    single_value=100000,
+    joint_value=200000,
+)
+
+
+# Pre-built household fixtures
+
+HIGH_INCOME_SINGLE_FILER = USHouseholdInput(
+    people=[
+        {
+            "age": 35,
+            "employment_income": 100000,
+            "is_tax_unit_head": True,
+        }
+    ],
+    tax_unit={"filing_status": "SINGLE"},
+    year=2024,
+)
+
+MODERATE_INCOME_SINGLE_FILER = USHouseholdInput(
+    people=[
+        {
+            "age": 30,
+            "employment_income": 50000,
+            "is_tax_unit_head": True,
+        }
+    ],
+    tax_unit={"filing_status": "SINGLE"},
+    year=2024,
+)
+
+MARRIED_COUPLE_WITH_KIDS = USHouseholdInput(
+    people=[
+        {"age": 40, "employment_income": 100000, "is_tax_unit_head": True},
+        {"age": 38, "employment_income": 50000, "is_tax_unit_spouse": True},
+        {"age": 10},
+        {"age": 8},
+    ],
+    tax_unit={"filing_status": "JOINT"},
+    year=2024,
+)
+
+LOW_INCOME_FAMILY = USHouseholdInput(
+    people=[
+        {"age": 28, "employment_income": 25000, "is_tax_unit_head": True},
+        {"age": 5},
+    ],
+    tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"},
+    year=2024,
+)
+
+
+# Pytest fixtures
+
+
+@pytest.fixture
+def double_standard_deduction_policy():
+    """Pytest fixture for doubled standard deduction policy."""
+    return DOUBLE_STANDARD_DEDUCTION_POLICY
+
+
+@pytest.fixture
+def high_income_single_filer():
+    """Pytest fixture for high income single filer household."""
+    return HIGH_INCOME_SINGLE_FILER
+
+
+@pytest.fixture
+def married_couple_with_kids():
+    """Pytest fixture for married couple with kids household."""
+    return MARRIED_COUPLE_WITH_KIDS
diff --git a/tests/test_parametric_reforms.py b/tests/test_parametric_reforms.py
new file mode 100644
index 00000000..6e328aa3
--- /dev/null
+++ b/tests/test_parametric_reforms.py
@@ -0,0 +1,262 @@
+"""Tests for parametric reforms utility functions."""
+
+from datetime import date
+
+from policyengine.utils.parametric_reforms import (
+    reform_dict_from_parameter_values,
+    simulation_modifier_from_parameter_values,
+)
+from tests.fixtures.parametric_reforms_fixtures import (
+    MOCK_PARAM_JOINT,
+    MOCK_PARAM_SINGLE,
+    MOCK_PARAM_TAX_RATE,
+    MULTI_PERIOD_PARAM_VALUES,
+    MULTIPLE_DIFFERENT_PARAMS,
+    PARAM_VALUE_WITH_END_DATE,
+    SINGLE_PARAM_VALUE,
+    create_mock_parameter,
+    create_parameter_value,
+)
+
+
+class TestReformDictFromParameterValues:
+    """Tests for the reform_dict_from_parameter_values function."""
+
+    def test__given_none_parameter_values__then_returns_none(self):
+        """Given: None as parameter_values
+        When: Calling reform_dict_from_parameter_values
+        Then: Returns None
+        """
+        # Given
+        parameter_values = None
+
+        # When
+        result = reform_dict_from_parameter_values(parameter_values)
+
+        # Then
+        assert result is None
+
+    def test__given_empty_list__then_returns_none(self):
+        """Given: Empty list of parameter values
+        When: Calling reform_dict_from_parameter_values
+        Then: Returns None
+        """
+        # Given
+        parameter_values = []
+
+        # When
+        result = reform_dict_from_parameter_values(parameter_values)
+
+        # Then
+        assert result is None
+
+    def test__given_single_parameter_value__then_returns_dict_with_one_entry(
+        self,
+    ):
+        """Given: Single parameter value
+        When: Calling reform_dict_from_parameter_values
+        Then: Returns dict with parameter name and period-value mapping
+        """
+        # Given
+        pv = SINGLE_PARAM_VALUE
+
+        # When
+        result = reform_dict_from_parameter_values([pv])
+
+        # Then
+        assert result is not None
+        assert MOCK_PARAM_SINGLE.name in result
+        assert "2024-01-01" in result[MOCK_PARAM_SINGLE.name]
+        assert result[MOCK_PARAM_SINGLE.name]["2024-01-01"] == 29200
+
+    def test__given_parameter_value_with_end_date__then_uses_period_range_format(
+        self,
+    ):
+        """Given: Parameter value with start_date and end_date
+        When: Calling reform_dict_from_parameter_values
+        Then: Returns dict with period range format "start.end"
+        """
+        # Given
+        pv = PARAM_VALUE_WITH_END_DATE
+
+        # When
+        result = reform_dict_from_parameter_values([pv])
+
+        # Then
+        assert result is not None
+        param_name = MOCK_PARAM_SINGLE.name
+        assert param_name in result
+        # Should use "start.end" format
+        assert "2024-01-01.2024-12-31" in result[param_name]
+        assert result[param_name]["2024-01-01.2024-12-31"] == 29200
+
+    def test__given_multiple_periods_same_parameter__then_includes_all_periods(
+        self,
+    ):
+        """Given: Multiple parameter values for same parameter (different periods)
+        When: Calling reform_dict_from_parameter_values
+        Then: Returns dict with all periods for that parameter
+        """
+        # Given
+        param_values = MULTI_PERIOD_PARAM_VALUES
+
+        # When
+        result = reform_dict_from_parameter_values(param_values)
+
+        # Then
+        assert result is not None
+        param_name = MOCK_PARAM_SINGLE.name
+        assert param_name in result
+        assert len(result[param_name]) == 2
+        assert result[param_name]["2024-01-01"] == 29200
+        assert result[param_name]["2025-01-01"] == 30000
+
+    def test__given_multiple_different_parameters__then_includes_all_parameters(
+        self,
+    ):
+        """Given: Multiple parameter values for different parameters
+        When: Calling reform_dict_from_parameter_values
+        Then: Returns dict with all parameters
+        """
+        # Given
+        param_values = MULTIPLE_DIFFERENT_PARAMS
+
+        # When
+        result = reform_dict_from_parameter_values(param_values)
+
+        # Then
+        assert result is not None
+        assert len(result) == 3
+        assert MOCK_PARAM_SINGLE.name in result
+        assert MOCK_PARAM_JOINT.name in result
+        assert MOCK_PARAM_TAX_RATE.name in result
+        assert result[MOCK_PARAM_SINGLE.name]["2024-01-01"] == 29200
+        assert result[MOCK_PARAM_JOINT.name]["2024-01-01"] == 58400
+        assert result[MOCK_PARAM_TAX_RATE.name]["2024-01-01"] == 0.10
+
+    def test__given_parameter_value__then_preserves_value_type(self):
+        """Given: Parameter values with different types (int, float)
+        When: Calling reform_dict_from_parameter_values
+        Then: Values preserve their original types
+        """
+        # Given
+        mock_param = create_mock_parameter("gov.test.rate")
+        pv_float = create_parameter_value(
+            parameter=mock_param,
+            value=0.15,
+            start_date=date(2024, 1, 1),
+        )
+
+        # When
+        result = reform_dict_from_parameter_values([pv_float])
+
+        # Then
+        assert result["gov.test.rate"]["2024-01-01"] == 0.15
+        assert isinstance(result["gov.test.rate"]["2024-01-01"], float)
+
+
+class TestSimulationModifierFromParameterValues:
+    """Tests for the simulation_modifier_from_parameter_values function."""
+
+    def test__given_empty_list__then_returns_callable(self):
+        """Given: Empty list of parameter values
+        When: Calling simulation_modifier_from_parameter_values
+        Then: Returns a callable function
+        """
+        # Given
+        parameter_values = []
+
+        # When
+        result = simulation_modifier_from_parameter_values(parameter_values)
+
+        # Then
+        assert callable(result)
+
+    def test__given_parameter_values__then_returns_modifier_function(self):
+        """Given: List of parameter values
+        When: Calling simulation_modifier_from_parameter_values
+        Then: Returns a callable modifier function
+        """
+        # Given
+        param_values = [SINGLE_PARAM_VALUE]
+
+        # When
+        result = simulation_modifier_from_parameter_values(param_values)
+
+        # Then
+        assert callable(result)
+
+    def test__given_modifier__then_calls_p_update_for_each_value(self):
+        """Given: Modifier function from parameter values
+        When: Calling the modifier with a mock simulation
+        Then: Calls p.update() for each parameter value
+        """
+        # Given
+        from unittest.mock import MagicMock
+
+        mock_simulation = MagicMock()
+        mock_param_node = MagicMock()
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
+            mock_param_node
+        )
+
+        param_values = [SINGLE_PARAM_VALUE]
+        modifier = simulation_modifier_from_parameter_values(param_values)
+
+        # When
+        modifier(mock_simulation)
+
+        # Then
+        mock_simulation.tax_benefit_system.parameters.get_child.assert_called_once_with(
+            MOCK_PARAM_SINGLE.name
+        )
+        mock_param_node.update.assert_called_once()
+
+    def test__given_multiple_values__then_applies_all_updates(self):
+        """Given: Multiple parameter values
+        When: Calling the modifier with a mock simulation
+        Then: Applies updates for all parameter values
+        """
+        # Given
+        from unittest.mock import MagicMock
+
+        mock_simulation = MagicMock()
+        mock_param_node = MagicMock()
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
+            mock_param_node
+        )
+
+        param_values = MULTIPLE_DIFFERENT_PARAMS
+        modifier = simulation_modifier_from_parameter_values(param_values)
+
+        # When
+        modifier(mock_simulation)
+
+        # Then
+        assert (
+            mock_simulation.tax_benefit_system.parameters.get_child.call_count
+            == 3
+        )
+        assert mock_param_node.update.call_count == 3
+
+    def test__given_modifier__then_returns_simulation(self):
+        """Given: Modifier function
+        When: Calling with a simulation
+        Then: Returns the simulation object
+        """
+        # Given
+        from unittest.mock import MagicMock
+
+        mock_simulation = MagicMock()
+        mock_param_node = MagicMock()
+        mock_simulation.tax_benefit_system.parameters.get_child.return_value = (
+            mock_param_node
+        )
+
+        modifier = simulation_modifier_from_parameter_values([SINGLE_PARAM_VALUE])
+
+        # When
+        result = modifier(mock_simulation)
+
+        # Then
+        assert result is mock_simulation
diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py
new file mode 100644
index 00000000..d33f615d
--- /dev/null
+++ b/tests/test_us_reform_application.py
@@ -0,0 +1,155 @@
+"""Tests for US reform application via reform_dict at construction time.
+
+These tests verify that the US model correctly applies reforms by building
+a reform dict and passing it to Microsimulation at construction time,
+fixing the p.update() bug that exists in the US country package.
+"""
+
+from datetime import date
+
+from policyengine.core import ParameterValue, Policy
+from policyengine.tax_benefit_models.us import (
+    USHouseholdInput,
+    us_latest,
+)
+from policyengine.tax_benefit_models.us import (
+    calculate_household_impact as calculate_us_household_impact,
+)
+from tests.fixtures.us_reform_fixtures import (
+    DOUBLE_STANDARD_DEDUCTION_POLICY,
+    HIGH_INCOME_SINGLE_FILER,
+    MARRIED_COUPLE_WITH_KIDS,
+    create_standard_deduction_policy,
+)
+
+
+class TestUSHouseholdReformApplication:
+    """Tests for US household reform application."""
+
+    def test__given_baseline_policy__then_returns_baseline_tax(self):
+        """Given: No policy (baseline)
+        When: Calculating household impact
+        Then: Returns baseline tax calculation
+        """
+        # Given
+        household = HIGH_INCOME_SINGLE_FILER
+
+        # When
+        result = calculate_us_household_impact(household, policy=None)
+
+        # Then
+        assert result.tax_unit[0]["income_tax"] > 0
+
+    def test__given_doubled_standard_deduction__then_tax_is_lower(self):
+        """Given: Policy that doubles standard deduction
+        When: Calculating household impact
+        Then: Income tax is lower than baseline
+        """
+        # Given
+        household = HIGH_INCOME_SINGLE_FILER
+        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
+
+        # When
+        baseline_result = calculate_us_household_impact(household, policy=None)
+        reform_result = calculate_us_household_impact(household, policy=policy)
+
+        # Then
+        baseline_tax = baseline_result.tax_unit[0]["income_tax"]
+        reform_tax = reform_result.tax_unit[0]["income_tax"]
+
+        assert reform_tax < baseline_tax, (
+            f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})"
+        )
+
+    def test__given_doubled_standard_deduction__then_tax_reduction_is_significant(
+        self,
+    ):
+        """Given: Policy that doubles standard deduction
+        When: Calculating household impact for high income household
+        Then: Tax reduction is at least $1000 (significant impact)
+        """
+        # Given
+        household = HIGH_INCOME_SINGLE_FILER
+        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
+
+        # When
+        baseline_result = calculate_us_household_impact(household, policy=None)
+        reform_result = calculate_us_household_impact(household, policy=policy)
+
+        # Then
+        baseline_tax = baseline_result.tax_unit[0]["income_tax"]
+        reform_tax = reform_result.tax_unit[0]["income_tax"]
+        tax_reduction = baseline_tax - reform_tax
+
+        assert tax_reduction >= 1000, (
+            f"Tax reduction ({tax_reduction}) should be at least $1000"
+        )
+
+    def test__given_married_couple__then_joint_deduction_affects_tax(self):
+        """Given: Married couple with doubled joint standard deduction
+        When: Calculating household impact
+        Then: Tax is lower than baseline
+        """
+        # Given
+        household = MARRIED_COUPLE_WITH_KIDS
+        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
+
+        # When
+        baseline_result = calculate_us_household_impact(household, policy=None)
+        reform_result = calculate_us_household_impact(household, policy=policy)
+
+        # Then
+        baseline_tax = baseline_result.tax_unit[0]["income_tax"]
+        reform_tax = reform_result.tax_unit[0]["income_tax"]
+
+        assert reform_tax < baseline_tax, (
+            f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})"
+        )
+
+    def test__given_same_policy_twice__then_results_are_deterministic(self):
+        """Given: Same policy applied twice
+        When: Calculating household impact
+        Then: Results are identical (deterministic)
+        """
+        # Given
+        household = HIGH_INCOME_SINGLE_FILER
+        policy = DOUBLE_STANDARD_DEDUCTION_POLICY
+
+        # When
+        result1 = calculate_us_household_impact(household, policy=policy)
+        result2 = calculate_us_household_impact(household, policy=policy)
+
+        # Then
+        assert result1.tax_unit[0]["income_tax"] == result2.tax_unit[0]["income_tax"]
+
+    def test__given_custom_deduction_value__then_tax_reflects_value(self):
+        """Given: Custom standard deduction value
+        When: Calculating household impact
+        Then: Tax reflects the custom deduction
+        """
+        # Given
+        household = HIGH_INCOME_SINGLE_FILER
+
+        # Create policies with different deduction values
+        small_deduction_policy = create_standard_deduction_policy(
+            single_value=5000, joint_value=10000
+        )
+        large_deduction_policy = create_standard_deduction_policy(
+            single_value=50000, joint_value=100000
+        )
+
+        # When
+        small_deduction_result = calculate_us_household_impact(
+            household, policy=small_deduction_policy
+        )
+        large_deduction_result = calculate_us_household_impact(
+            household, policy=large_deduction_policy
+        )
+
+        # Then
+        small_tax = small_deduction_result.tax_unit[0]["income_tax"]
+        large_tax = large_deduction_result.tax_unit[0]["income_tax"]
+
+        assert large_tax < small_tax, (
+            f"Large deduction tax ({large_tax}) should be less than small deduction ({small_tax})"
+        )

From 2f52b0337a58ffc47a97235f26d903d4f2074e53 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Mon, 16 Feb 2026 20:40:21 +0100
Subject: [PATCH 7/8] fix: Fix ruff lint errors

Remove unused imports in test_us_reform_application.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/test_us_reform_application.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py
index d33f615d..42657499 100644
--- a/tests/test_us_reform_application.py
+++ b/tests/test_us_reform_application.py
@@ -5,13 +5,7 @@
 fixing the p.update() bug that exists in the US country package.
 """
 
-from datetime import date
 
-from policyengine.core import ParameterValue, Policy
-from policyengine.tax_benefit_models.us import (
-    USHouseholdInput,
-    us_latest,
-)
 from policyengine.tax_benefit_models.us import (
     calculate_household_impact as calculate_us_household_impact,
 )

From ee6b115267892849594545f4e3ed3d03e68891e6 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Mon, 16 Feb 2026 23:58:17 +0100
Subject: [PATCH 8/8] refactor: Extract shared entity utilities and decompose
 reform building

Extract duplicated entity relationship and dataset filtering logic from
US and UK model.py into shared utils/entity_utils.py. Decompose inline
reform dict construction in US run() into single-purpose functions
(build_reform_dict, merge_reform_dicts) in utils/parametric_reforms.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../tax_benefit_models/uk/model.py            | 133 ++------
 .../tax_benefit_models/us/model.py            | 232 ++------------
 src/policyengine/utils/entity_utils.py        | 127 ++++++++
 src/policyengine/utils/parametric_reforms.py  |  56 ++++
 tests/test_entity_utils.py                    | 295 ++++++++++++++++++
 5 files changed, 525 insertions(+), 318 deletions(-)
 create mode 100644 src/policyengine/utils/entity_utils.py
 create mode 100644 tests/test_entity_utils.py

diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py
index dedbdc29..88ead217 100644
--- a/src/policyengine/tax_benefit_models/uk/model.py
+++ b/src/policyengine/tax_benefit_models/uk/model.py
@@ -13,6 +13,10 @@
     TaxBenefitModelVersion,
     Variable,
 )
+from policyengine.utils.entity_utils import (
+    build_entity_relationships,
+    filter_dataset_by_household_variable,
+)
 from policyengine.utils.parameter_labels import (
     build_scale_lookup,
     generate_label_for_parameter,
@@ -23,6 +27,8 @@
 if TYPE_CHECKING:
     from policyengine.core.simulation import Simulation
 
+UK_GROUP_ENTITIES = ["benunit", "household"]
+
 
 class PolicyEngineUK(TaxBenefitModel):
     id: str = "policyengine-uk"
@@ -185,41 +191,9 @@ def __init__(self, **kwargs: dict):
     def _build_entity_relationships(
         self, dataset: PolicyEngineUKDataset
     ) -> pd.DataFrame:
-        """Build a DataFrame mapping each person to their containing entities.
-
-        Creates an explicit relationship map between persons and all entity
-        types (benunit, household). This enables filtering at any entity
-        level while preserving the integrity of all related entities.
-
-        Args:
-            dataset: The dataset to extract relationships from.
-
-        Returns:
-            A DataFrame indexed by person with columns for each entity ID.
-        """
+        """Build a DataFrame mapping each person to their containing entities."""
         person_data = pd.DataFrame(dataset.data.person)
-
-        # Determine column naming convention
-        benunit_id_col = (
-            "person_benunit_id"
-            if "person_benunit_id" in person_data.columns
-            else "benunit_id"
-        )
-        household_id_col = (
-            "person_household_id"
-            if "person_household_id" in person_data.columns
-            else "household_id"
-        )
-
-        entity_rel = pd.DataFrame(
-            {
-                "person_id": person_data["person_id"].values,
-                "benunit_id": person_data[benunit_id_col].values,
-                "household_id": person_data[household_id_col].values,
-            }
-        )
-
-        return entity_rel
+        return build_entity_relationships(person_data, UK_GROUP_ENTITIES)
 
     def _filter_dataset_by_household_variable(
         self,
@@ -227,75 +201,13 @@ def _filter_dataset_by_household_variable(
         variable_name: str,
         variable_value: str,
     ) -> PolicyEngineUKDataset:
-        """Filter a dataset to only include households where a variable matches.
-
-        Uses the entity relationship approach: builds an explicit map of all
-        entity relationships, filters at the household level, and keeps all
-        persons in matching households to preserve entity integrity.
-
-        Args:
-            dataset: The dataset to filter.
-            variable_name: The name of the household-level variable to filter on.
-            variable_value: The value to match. Handles both str and bytes encoding.
-
-        Returns:
-            A new filtered dataset containing only matching households.
-        """
-        # Build entity relationships
-        entity_rel = self._build_entity_relationships(dataset)
-
-        # Get household-level variable values
-        household_data = pd.DataFrame(dataset.data.household)
-
-        if variable_name not in household_data.columns:
-            raise ValueError(
-                f"Variable '{variable_name}' not found in household data. "
-                f"Available columns: {list(household_data.columns)}"
-            )
-
-        hh_values = household_data[variable_name].values
-        hh_ids = household_data["household_id"].values
-
-        # Create mask for matching households, handling bytes encoding
-        if isinstance(variable_value, str):
-            hh_mask = (hh_values == variable_value) | (
-                hh_values == variable_value.encode()
-            )
-        else:
-            hh_mask = hh_values == variable_value
-
-        matching_hh_ids = set(hh_ids[hh_mask])
-
-        if len(matching_hh_ids) == 0:
-            raise ValueError(
-                f"No households found matching {variable_name}={variable_value}"
-            )
-
-        # Filter entity_rel to persons in matching households
-        person_mask = entity_rel["household_id"].isin(matching_hh_ids)
-        filtered_entity_rel = entity_rel[person_mask]
-
-        # Get the filtered entity IDs
-        filtered_person_ids = set(filtered_entity_rel["person_id"])
-        filtered_household_ids = matching_hh_ids
-        filtered_benunit_ids = set(filtered_entity_rel["benunit_id"])
-
-        # Filter each entity DataFrame
-        person_df = pd.DataFrame(dataset.data.person)
-        household_df = pd.DataFrame(dataset.data.household)
-        benunit_df = pd.DataFrame(dataset.data.benunit)
-
-        filtered_person = person_df[
-            person_df["person_id"].isin(filtered_person_ids)
-        ]
-        filtered_household = household_df[
-            household_df["household_id"].isin(filtered_household_ids)
-        ]
-        filtered_benunit = benunit_df[
-            benunit_df["benunit_id"].isin(filtered_benunit_ids)
-        ]
-
-        # Create filtered dataset
+        """Filter a dataset to only include households where a variable matches."""
+        filtered = filter_dataset_by_household_variable(
+            entity_data=dataset.data.entity_data,
+            group_entities=UK_GROUP_ENTITIES,
+            variable_name=variable_name,
+            variable_value=variable_value,
+        )
         return PolicyEngineUKDataset(
             id=dataset.id + f"_filtered_{variable_name}_{variable_value}",
             name=dataset.name,
@@ -304,18 +216,9 @@ def _filter_dataset_by_household_variable(
             year=dataset.year,
             is_output_dataset=dataset.is_output_dataset,
             data=UKYearData(
-                person=MicroDataFrame(
-                    filtered_person.reset_index(drop=True),
-                    weights="person_weight",
-                ),
-                benunit=MicroDataFrame(
-                    filtered_benunit.reset_index(drop=True),
-                    weights="benunit_weight",
-                ),
-                household=MicroDataFrame(
-                    filtered_household.reset_index(drop=True),
-                    weights="household_weight",
-                ),
+                person=filtered["person"],
+                benunit=filtered["benunit"],
+                household=filtered["household"],
             ),
         )
 
diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py
index 0db59259..3c8a5aae 100644
--- a/src/policyengine/tax_benefit_models/us/model.py
+++ b/src/policyengine/tax_benefit_models/us/model.py
@@ -13,6 +13,10 @@
     TaxBenefitModelVersion,
     Variable,
 )
+from policyengine.utils.entity_utils import (
+    build_entity_relationships,
+    filter_dataset_by_household_variable,
+)
 from policyengine.utils.parameter_labels import (
     build_scale_lookup,
     generate_label_for_parameter,
@@ -23,6 +27,8 @@
 if TYPE_CHECKING:
     from policyengine.core.simulation import Simulation
 
+US_GROUP_ENTITIES = ["household", "tax_unit", "spm_unit", "family", "marital_unit"]
+
 
 class PolicyEngineUS(TaxBenefitModel):
     id: str = "policyengine-us"
@@ -178,60 +184,9 @@ def __init__(self, **kwargs: dict):
     def _build_entity_relationships(
         self, dataset: PolicyEngineUSDataset
     ) -> pd.DataFrame:
-        """Build a DataFrame mapping each person to their containing entities.
-
-        Creates an explicit relationship map between persons and all entity
-        types (household, tax_unit, spm_unit, family, marital_unit). This
-        enables filtering at any entity level while preserving the integrity
-        of all related entities.
-
-        Args:
-            dataset: The dataset to extract relationships from.
-
-        Returns:
-            A DataFrame indexed by person with columns for each entity ID.
-        """
+        """Build a DataFrame mapping each person to their containing entities."""
         person_data = pd.DataFrame(dataset.data.person)
-
-        # Determine column naming convention
-        household_id_col = (
-            "person_household_id"
-            if "person_household_id" in person_data.columns
-            else "household_id"
-        )
-        tax_unit_id_col = (
-            "person_tax_unit_id"
-            if "person_tax_unit_id" in person_data.columns
-            else "tax_unit_id"
-        )
-        spm_unit_id_col = (
-            "person_spm_unit_id"
-            if "person_spm_unit_id" in person_data.columns
-            else "spm_unit_id"
-        )
-        family_id_col = (
-            "person_family_id"
-            if "person_family_id" in person_data.columns
-            else "family_id"
-        )
-        marital_unit_id_col = (
-            "person_marital_unit_id"
-            if "person_marital_unit_id" in person_data.columns
-            else "marital_unit_id"
-        )
-
-        entity_rel = pd.DataFrame(
-            {
-                "person_id": person_data["person_id"].values,
-                "household_id": person_data[household_id_col].values,
-                "tax_unit_id": person_data[tax_unit_id_col].values,
-                "spm_unit_id": person_data[spm_unit_id_col].values,
-                "family_id": person_data[family_id_col].values,
-                "marital_unit_id": person_data[marital_unit_id_col].values,
-            }
-        )
-
-        return entity_rel
+        return build_entity_relationships(person_data, US_GROUP_ENTITIES)
 
     def _filter_dataset_by_household_variable(
         self,
@@ -239,90 +194,13 @@ def _filter_dataset_by_household_variable(
         variable_name: str,
         variable_value: str,
     ) -> PolicyEngineUSDataset:
-        """Filter a dataset to only include households where a variable matches.
-
-        Uses the entity relationship approach: builds an explicit map of all
-        entity relationships, filters at the household level, and keeps all
-        persons in matching households to preserve entity integrity.
-
-        Args:
-            dataset: The dataset to filter.
-            variable_name: The name of the household-level variable to filter on.
-            variable_value: The value to match. Handles both str and bytes encoding.
-
-        Returns:
-            A new filtered dataset containing only matching households.
-        """
-        # Build entity relationships
-        entity_rel = self._build_entity_relationships(dataset)
-
-        # Get household-level variable values
-        household_data = pd.DataFrame(dataset.data.household)
-
-        if variable_name not in household_data.columns:
-            raise ValueError(
-                f"Variable '{variable_name}' not found in household data. "
-                f"Available columns: {list(household_data.columns)}"
-            )
-
-        hh_values = household_data[variable_name].values
-        hh_ids = household_data["household_id"].values
-
-        # Create mask for matching households, handling bytes encoding
-        if isinstance(variable_value, str):
-            hh_mask = (hh_values == variable_value) | (
-                hh_values == variable_value.encode()
-            )
-        else:
-            hh_mask = hh_values == variable_value
-
-        matching_hh_ids = set(hh_ids[hh_mask])
-
-        if len(matching_hh_ids) == 0:
-            raise ValueError(
-                f"No households found matching {variable_name}={variable_value}"
-            )
-
-        # Filter entity_rel to persons in matching households
-        person_mask = entity_rel["household_id"].isin(matching_hh_ids)
-        filtered_entity_rel = entity_rel[person_mask]
-
-        # Get the filtered entity IDs
-        filtered_person_ids = set(filtered_entity_rel["person_id"])
-        filtered_household_ids = matching_hh_ids
-        filtered_tax_unit_ids = set(filtered_entity_rel["tax_unit_id"])
-        filtered_spm_unit_ids = set(filtered_entity_rel["spm_unit_id"])
-        filtered_family_ids = set(filtered_entity_rel["family_id"])
-        filtered_marital_unit_ids = set(filtered_entity_rel["marital_unit_id"])
-
-        # Filter each entity DataFrame
-        person_df = pd.DataFrame(dataset.data.person)
-        household_df = pd.DataFrame(dataset.data.household)
-        tax_unit_df = pd.DataFrame(dataset.data.tax_unit)
-        spm_unit_df = pd.DataFrame(dataset.data.spm_unit)
-        family_df = pd.DataFrame(dataset.data.family)
-        marital_unit_df = pd.DataFrame(dataset.data.marital_unit)
-
-        filtered_person = person_df[
-            person_df["person_id"].isin(filtered_person_ids)
-        ]
-        filtered_household = household_df[
-            household_df["household_id"].isin(filtered_household_ids)
-        ]
-        filtered_tax_unit = tax_unit_df[
-            tax_unit_df["tax_unit_id"].isin(filtered_tax_unit_ids)
-        ]
-        filtered_spm_unit = spm_unit_df[
-            spm_unit_df["spm_unit_id"].isin(filtered_spm_unit_ids)
-        ]
-        filtered_family = family_df[
-            family_df["family_id"].isin(filtered_family_ids)
-        ]
-        filtered_marital_unit = marital_unit_df[
-            marital_unit_df["marital_unit_id"].isin(filtered_marital_unit_ids)
-        ]
-
-        # Create filtered dataset
+        """Filter a dataset to only include households where a variable matches."""
+        filtered = filter_dataset_by_household_variable(
+            entity_data=dataset.data.entity_data,
+            group_entities=US_GROUP_ENTITIES,
+            variable_name=variable_name,
+            variable_value=variable_value,
+        )
         return PolicyEngineUSDataset(
             id=dataset.id + f"_filtered_{variable_name}_{variable_value}",
             name=dataset.name,
@@ -331,30 +209,12 @@ def _filter_dataset_by_household_variable(
             year=dataset.year,
             is_output_dataset=dataset.is_output_dataset,
             data=USYearData(
-                person=MicroDataFrame(
-                    filtered_person.reset_index(drop=True),
-                    weights="person_weight",
-                ),
-                household=MicroDataFrame(
-                    filtered_household.reset_index(drop=True),
-                    weights="household_weight",
-                ),
-                tax_unit=MicroDataFrame(
-                    filtered_tax_unit.reset_index(drop=True),
-                    weights="tax_unit_weight",
-                ),
-                spm_unit=MicroDataFrame(
-                    filtered_spm_unit.reset_index(drop=True),
-                    weights="spm_unit_weight",
-                ),
-                family=MicroDataFrame(
-                    filtered_family.reset_index(drop=True),
-                    weights="family_weight",
-                ),
-                marital_unit=MicroDataFrame(
-                    filtered_marital_unit.reset_index(drop=True),
-                    weights="marital_unit_weight",
-                ),
+                person=filtered["person"],
+                marital_unit=filtered["marital_unit"],
+                family=filtered["family"],
+                spm_unit=filtered["spm_unit"],
+                tax_unit=filtered["tax_unit"],
+                household=filtered["household"],
             ),
         )
 
@@ -363,7 +223,8 @@ def run(self, simulation: "Simulation") -> "Simulation":
         from policyengine_us.system import system
 
         from policyengine.utils.parametric_reforms import (
-            reform_dict_from_parameter_values,
+            build_reform_dict,
+            merge_reform_dicts,
         )
 
         assert isinstance(simulation.dataset, PolicyEngineUSDataset)
@@ -377,47 +238,12 @@ def run(self, simulation: "Simulation") -> "Simulation":
                 dataset, simulation.filter_field, simulation.filter_value
             )
 
-        # Build reform dict from policy and dynamic parameter values
-        # US requires reforms to be passed at Microsimulation construction time
-        # (unlike UK which supports p.update() after construction)
-        reform_dict = None
-
-        # Collect policy reforms
-        if simulation.policy:
-            if simulation.policy.simulation_modifier is not None:
-                # Custom simulation modifier - extract parameter values if available
-                # Fall back to parameter_values if no custom modifier logic needed
-                if simulation.policy.parameter_values:
-                    reform_dict = reform_dict_from_parameter_values(
-                        simulation.policy.parameter_values
-                    )
-            elif simulation.policy.parameter_values:
-                reform_dict = reform_dict_from_parameter_values(
-                    simulation.policy.parameter_values
-                )
-
-        # Merge dynamic reforms into reform_dict
-        if simulation.dynamic:
-            dynamic_reform = None
-            if simulation.dynamic.simulation_modifier is not None:
-                if simulation.dynamic.parameter_values:
-                    dynamic_reform = reform_dict_from_parameter_values(
-                        simulation.dynamic.parameter_values
-                    )
-            elif simulation.dynamic.parameter_values:
-                dynamic_reform = reform_dict_from_parameter_values(
-                    simulation.dynamic.parameter_values
-                )
-
-            if dynamic_reform:
-                if reform_dict is None:
-                    reform_dict = dynamic_reform
-                else:
-                    # Merge dynamic reforms into policy reforms
-                    for param_name, period_values in dynamic_reform.items():
-                        if param_name not in reform_dict:
-                            reform_dict[param_name] = {}
-                        reform_dict[param_name].update(period_values)
+        # Build reform dict from policy and dynamic parameter values.
+        # US requires reforms at Microsimulation construction time
+        # (unlike UK which supports p.update() after construction).
+        policy_reform = build_reform_dict(simulation.policy)
+        dynamic_reform = build_reform_dict(simulation.dynamic)
+        reform_dict = merge_reform_dicts(policy_reform, dynamic_reform)
 
         # Create Microsimulation with reform at construction time
         microsim = Microsimulation(reform=reform_dict)
diff --git a/src/policyengine/utils/entity_utils.py b/src/policyengine/utils/entity_utils.py
new file mode 100644
index 00000000..fdbcc092
--- /dev/null
+++ b/src/policyengine/utils/entity_utils.py
@@ -0,0 +1,127 @@
+"""Shared utilities for entity relationship building and dataset filtering."""
+
+import pandas as pd
+from microdf import MicroDataFrame
+
+
+def _resolve_id_column(
+    person_data: pd.DataFrame, entity_name: str
+) -> str:
+    """Resolve the ID column name for a group entity in person data.
+
+    Tries `person_{entity}_id` first (standard convention), falls back
+    to `{entity}_id` (custom datasets).
+    """
+    prefixed = f"person_{entity_name}_id"
+    bare = f"{entity_name}_id"
+    if prefixed in person_data.columns:
+        return prefixed
+    return bare
+
+
+def build_entity_relationships(
+    person_data: pd.DataFrame,
+    group_entities: list[str],
+) -> pd.DataFrame:
+    """Build a DataFrame mapping each person to their containing entities.
+
+    Creates an explicit relationship map between persons and all specified
+    group entity types. This enables filtering at any entity level while
+    preserving the integrity of all related entities.
+
+    Args:
+        person_data: DataFrame of person-level data with ID columns.
+        group_entities: List of group entity names (e.g., ["household", "tax_unit"]).
+
+    Returns:
+        A DataFrame with person_id and one {entity}_id column per group entity.
+    """
+    columns = {"person_id": person_data["person_id"].values}
+    for entity in group_entities:
+        id_col = _resolve_id_column(person_data, entity)
+        columns[f"{entity}_id"] = person_data[id_col].values
+    return pd.DataFrame(columns)
+
+
+def filter_dataset_by_household_variable(
+    entity_data: dict[str, MicroDataFrame],
+    group_entities: list[str],
+    variable_name: str,
+    variable_value: str,
+) -> dict[str, MicroDataFrame]:
+    """Filter dataset entities to only include households where a variable matches.
+
+    Uses an entity relationship approach: builds an explicit map of all
+    entity relationships, filters at the household level, and keeps all
+    persons in matching households to preserve entity integrity.
+
+    Args:
+        entity_data: Dict mapping entity names to their MicroDataFrames
+                     (from YearData.entity_data).
+        group_entities: List of group entity names for this country.
+        variable_name: The household-level variable to filter on.
+        variable_value: The value to match. Handles both str and bytes encoding.
+
+    Returns:
+        A dict mapping entity names to filtered MicroDataFrames.
+
+    Raises:
+        ValueError: If variable_name is not found or no households match.
+    """
+    person_data = pd.DataFrame(entity_data["person"])
+    household_data = pd.DataFrame(entity_data["household"])
+
+    if variable_name not in household_data.columns:
+        raise ValueError(
+            f"Variable '{variable_name}' not found in household data. "
+            f"Available columns: {list(household_data.columns)}"
+        )
+
+    # Build entity relationships
+    entity_rel = build_entity_relationships(person_data, group_entities)
+
+    # Find matching household IDs
+    hh_values = household_data[variable_name].values
+    hh_ids = household_data["household_id"].values
+
+    if isinstance(variable_value, str):
+        hh_mask = (hh_values == variable_value) | (
+            hh_values == variable_value.encode()
+        )
+    else:
+        hh_mask = hh_values == variable_value
+
+    matching_hh_ids = set(hh_ids[hh_mask])
+
+    if len(matching_hh_ids) == 0:
+        raise ValueError(
+            f"No households found matching {variable_name}={variable_value}"
+        )
+
+    # Filter persons to those in matching households
+    person_mask = entity_rel["household_id"].isin(matching_hh_ids)
+    filtered_rel = entity_rel[person_mask]
+
+    # Collect filtered IDs for each entity
+    filtered_ids = {"person": set(filtered_rel["person_id"])}
+    for entity in group_entities:
+        filtered_ids[entity] = set(filtered_rel[f"{entity}_id"])
+
+    # Filter each entity DataFrame
+    result = {}
+    for entity_name, mdf in entity_data.items():
+        df = pd.DataFrame(mdf)
+        id_col = f"{entity_name}_id"
+        if entity_name in filtered_ids and id_col in df.columns:
+            filtered_df = df[df[id_col].isin(filtered_ids[entity_name])]
+        else:
+            filtered_df = df
+
+        weight_col = f"{entity_name}_weight"
+        weights = weight_col if weight_col in filtered_df.columns else None
+        result[entity_name] = MicroDataFrame(
+            filtered_df.reset_index(drop=True),
+            weights=weights,
+        )
+
+    return result
diff --git a/src/policyengine/utils/parametric_reforms.py b/src/policyengine/utils/parametric_reforms.py
index 7a9494a5..4176037a 100644
--- a/src/policyengine/utils/parametric_reforms.py
+++ b/src/policyengine/utils/parametric_reforms.py
@@ -1,9 +1,16 @@
+from __future__ import annotations
+
 from collections.abc import Callable
+from typing import TYPE_CHECKING
 
 from policyengine_core.periods import period
 
 from policyengine.core import ParameterValue
 
+if TYPE_CHECKING:
+    from policyengine.core.dynamic import Dynamic
+    from policyengine.core.policy import Policy
+
 
 def reform_dict_from_parameter_values(
     parameter_values: list[ParameterValue],
@@ -77,3 +84,52 @@ def modifier(simulation):
         return simulation
 
     return modifier
+
+
+def build_reform_dict(policy_or_dynamic: Policy | Dynamic | None) -> dict | None:
+    """Extract a reform dict from a Policy or Dynamic object.
+
+    If the object has parameter_values, converts them to reform dict format.
+    Returns None if the object is None or has no parameter values.
+
+    Args:
+        policy_or_dynamic: A Policy or Dynamic object, or None.
+
+    Returns:
+        A reform dict suitable for Microsimulation(reform=...), or None.
+    """
+    if policy_or_dynamic is None:
+        return None
+    if policy_or_dynamic.parameter_values:
+        return reform_dict_from_parameter_values(
+            policy_or_dynamic.parameter_values
+        )
+    return None
+
+
+def merge_reform_dicts(
+    base: dict | None, override: dict | None
+) -> dict | None:
+    """Merge two reform dicts, with override values taking precedence.
+
+    Either or both dicts can be None. When both have entries for the same
+    parameter, period-level values from override replace those in base.
+
+    Args:
+        base: The base reform dict (e.g., from policy).
+        override: The override reform dict (e.g., from dynamic).
+
+    Returns:
+        The merged reform dict, or None if both inputs are None.
+    """
+    if base is None:
+        return override
+    if override is None:
+        return base
+
+    merged = {k: dict(v) for k, v in base.items()}
+    for param_name, period_values in override.items():
+        if param_name not in merged:
+            merged[param_name] = {}
+        merged[param_name].update(period_values)
+    return merged
diff --git a/tests/test_entity_utils.py b/tests/test_entity_utils.py
new file mode 100644
index 00000000..20c7b3ce
--- /dev/null
+++ b/tests/test_entity_utils.py
@@ -0,0 +1,295 @@
+"""Tests for shared entity utilities and reform dict helpers."""
+
+import pandas as pd
+import pytest
+from microdf import MicroDataFrame
+
+from policyengine.utils.entity_utils import (
+    build_entity_relationships,
+    filter_dataset_by_household_variable,
+)
+from policyengine.utils.parametric_reforms import (
+    build_reform_dict,
+    merge_reform_dicts,
+)
+
+
+class TestBuildEntityRelationships:
+    """Tests for the shared build_entity_relationships function."""
+
+    def test__given_us_style_entities__then_returns_all_columns(
+        self, us_test_dataset
+    ):
+        """Given: Person data with 5 group entities (US style)
+        When: Building entity relationships
+        Then: DataFrame has person_id + all 5 entity ID columns
+        """
+        person_data = pd.DataFrame(us_test_dataset.data.person)
+        group_entities = [
+            "household",
+            "tax_unit",
+            "spm_unit",
+            "family",
+            "marital_unit",
+        ]
+
+        result = build_entity_relationships(person_data, group_entities)
+
+        expected_columns = {
+            "person_id",
+            "household_id",
+            "tax_unit_id",
+            "spm_unit_id",
+            "family_id",
+            "marital_unit_id",
+        }
+        assert set(result.columns) == expected_columns
+
+    def test__given_uk_style_entities__then_returns_all_columns(
+        self, uk_test_dataset
+    ):
+        """Given: Person data with 2 group entities (UK style)
+        When: Building entity relationships
+        Then: DataFrame has person_id + 2 entity ID columns
+        """
+        person_data = pd.DataFrame(uk_test_dataset.data.person)
+        group_entities = ["benunit", "household"]
+
+        result = build_entity_relationships(person_data, group_entities)
+
+        expected_columns = {"person_id", "benunit_id", "household_id"}
+        assert set(result.columns) == expected_columns
+
+    def test__given_6_persons__then_returns_6_rows(self, us_test_dataset):
+        """Given: Dataset with 6 persons
+        When: Building entity relationships
+        Then: Result has 6 rows
+        """
+        person_data = pd.DataFrame(us_test_dataset.data.person)
+
+        result = build_entity_relationships(
+            person_data, ["household", "tax_unit"]
+        )
+
+        assert len(result) == 6
+
+    def test__given_prefixed_columns__then_resolves_correctly(self):
+        """Given: Person data with person_household_id naming convention
+        When: Building entity relationships
+        Then: Correctly maps to household_id in result
+        """
+        person_data = pd.DataFrame(
+            {
+                "person_id": [1, 2],
+                "person_household_id": [10, 20],
+            }
+        )
+
+        result = build_entity_relationships(person_data, ["household"])
+
+        assert list(result["household_id"]) == [10, 20]
+
+    def test__given_bare_columns__then_resolves_correctly(self):
+        """Given: Person data with household_id naming convention (no prefix)
+        When: Building entity relationships
+        Then: Correctly maps to household_id in result
+        """
+        person_data = pd.DataFrame(
+            {
+                "person_id": [1, 2],
+                "household_id": [10, 20],
+            }
+        )
+
+        result = build_entity_relationships(person_data, ["household"])
+
+        assert list(result["household_id"]) == [10, 20]
+
+
+class TestFilterDatasetByHouseholdVariable:
+    """Tests for the shared filter_dataset_by_household_variable function."""
+
+    def test__given_matching_value__then_returns_filtered_entities(self):
+        """Given: Dataset with 2 places
+        When: Filtering by place_fips=44000
+        Then: Returns only matching households and related persons
+        """
+        entity_data = {
+            "person": MicroDataFrame(
+                pd.DataFrame(
+                    {
+                        "person_id": [1, 2, 3],
+                        "household_id": [1, 1, 2],
+                        "person_weight": [1.0, 1.0, 1.0],
+                    }
+                ),
+                weights="person_weight",
+            ),
+            "household": MicroDataFrame(
+                pd.DataFrame(
+                    {
+                        "household_id": [1, 2],
+                        "household_weight": [1.0, 1.0],
+                        "place": ["A", "B"],
+                    }
+                ),
+                weights="household_weight",
+            ),
+        }
+
+        result = filter_dataset_by_household_variable(
+            entity_data=entity_data,
+            group_entities=["household"],
+            variable_name="place",
+            variable_value="A",
+        )
+
+        assert len(pd.DataFrame(result["person"])) == 2
+        assert len(pd.DataFrame(result["household"])) == 1
+
+    def test__given_no_match__then_raises_value_error(self):
+        """Given: Dataset with no matching households
+        When: Filtering
+        Then: Raises ValueError
+        """
+        entity_data = {
+            "person": MicroDataFrame(
+                pd.DataFrame(
+                    {
+                        "person_id": [1],
+                        "household_id": [1],
+                        "person_weight": [1.0],
+                    }
+                ),
+                weights="person_weight",
+            ),
+            "household": MicroDataFrame(
+                pd.DataFrame(
+                    {
+                        "household_id": [1],
+                        "household_weight": [1.0],
+                        "place": ["A"],
+                    }
+                ),
+                weights="household_weight",
+            ),
+        }
+
+        with pytest.raises(ValueError, match="No households found"):
+            filter_dataset_by_household_variable(
+                entity_data=entity_data,
+                group_entities=["household"],
+                variable_name="place",
+                variable_value="Z",
+            )
+
+    def test__given_missing_variable__then_raises_value_error(self):
+        """Given: Dataset without the filter variable
+        When: Filtering
+        Then: Raises ValueError
+        """
+        entity_data = {
+            "person": MicroDataFrame(
+                pd.DataFrame(
+                    {
+                        "person_id": [1],
+                        "household_id": [1],
+                        "person_weight": [1.0],
+                    }
+                ),
+                weights="person_weight",
+            ),
+            "household": MicroDataFrame(
+                pd.DataFrame(
+                    {
+                        "household_id": [1],
+                        "household_weight": [1.0],
+                    }
+                ),
+                weights="household_weight",
+            ),
+        }
+
+        with pytest.raises(ValueError, match="not found in household data"):
+            filter_dataset_by_household_variable(
+                entity_data=entity_data,
+                group_entities=["household"],
+                variable_name="nonexistent",
+                variable_value="x",
+            )
+
+
+class TestBuildReformDict:
+    """Tests for build_reform_dict helper."""
+
+    def test__given_none__then_returns_none(self):
+        assert build_reform_dict(None) is None
+
+    def test__given_no_parameter_values__then_returns_none(self):
+        from unittest.mock import MagicMock
+
+        obj = MagicMock()
+        obj.parameter_values = []
+        assert build_reform_dict(obj) is None
+
+    def test__given_parameter_values__then_returns_reform_dict(self):
+        from datetime import datetime
+        from unittest.mock import MagicMock
+
+        param = MagicMock()
+        param.name = "gov.test.param"
+
+        pv = MagicMock()
+        pv.parameter = param
+        pv.value = 1000
+        pv.start_date = datetime(2024, 1, 1)
+        pv.end_date = None
+
+        obj = MagicMock()
+        obj.parameter_values = [pv]
+
+        result = build_reform_dict(obj)
+
+        assert result == {"gov.test.param": {"2024-01-01": 1000}}
+
+
+class TestMergeReformDicts:
+    """Tests for merge_reform_dicts helper."""
+
+    def test__given_both_none__then_returns_none(self):
+        assert merge_reform_dicts(None, None) is None
+
+    def test__given_base_none__then_returns_override(self):
+        override = {"param": {"2024-01-01": 100}}
+        assert merge_reform_dicts(None, override) is override
+
+    def test__given_override_none__then_returns_base(self):
+        base = {"param": {"2024-01-01": 100}}
+        assert merge_reform_dicts(base, None) is base
+
+    def test__given_both_dicts__then_merges_correctly(self):
+        base = {"param_a": {"2024-01-01": 100}}
+        override = {"param_b": {"2024-01-01": 200}}
+
+        result = merge_reform_dicts(base, override)
+
+        assert result == {
+            "param_a": {"2024-01-01": 100},
+            "param_b": {"2024-01-01": 200},
+        }
+
+    def test__given_overlapping_params__then_override_wins(self):
+        base = {"param": {"2024-01-01": 100}}
+        override = {"param": {"2024-01-01": 999}}
+
+        result = merge_reform_dicts(base, override)
+
+        assert result == {"param": {"2024-01-01": 999}}
+
+    def test__given_merge__then_does_not_mutate_base(self):
+        base = {"param": {"2024-01-01": 100}}
+        override = {"param": {"2024-01-01": 999}}
+
+        merge_reform_dicts(base, override)
+
+        assert base == {"param": {"2024-01-01": 100}}