From 69bd31814364e8bad53f20069deac79bca1205d0 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 10 Feb 2026 14:58:11 +0100 Subject: [PATCH 1/8] feat: Region filtering and calculations --- src/policyengine/core/__init__.py | 3 + src/policyengine/core/region.py | 201 ++++++++++ .../core/tax_benefit_model_version.py | 19 + src/policyengine/countries/__init__.py | 9 + src/policyengine/countries/uk/__init__.py | 5 + src/policyengine/countries/uk/regions.py | 176 +++++++++ src/policyengine/countries/us/__init__.py | 5 + .../countries/us/data/__init__.py | 18 + .../countries/us/data/districts.py | 64 ++++ src/policyengine/countries/us/data/places.py | 346 ++++++++++++++++++ src/policyengine/countries/us/data/states.py | 59 +++ src/policyengine/countries/us/regions.py | 106 ++++++ .../tax_benefit_models/uk/model.py | 5 + .../tax_benefit_models/us/model.py | 5 + tests/fixtures/region_fixtures.py | 127 +++++++ tests/test_models.py | 30 ++ tests/test_region.py | 246 +++++++++++++ tests/test_uk_regions.py | 227 ++++++++++++ tests/test_us_regions.py | 252 +++++++++++++ uv.lock | 22 +- 20 files changed, 1914 insertions(+), 11 deletions(-) create mode 100644 src/policyengine/core/region.py create mode 100644 src/policyengine/countries/__init__.py create mode 100644 src/policyengine/countries/uk/__init__.py create mode 100644 src/policyengine/countries/uk/regions.py create mode 100644 src/policyengine/countries/us/__init__.py create mode 100644 src/policyengine/countries/us/data/__init__.py create mode 100644 src/policyengine/countries/us/data/districts.py create mode 100644 src/policyengine/countries/us/data/places.py create mode 100644 src/policyengine/countries/us/data/states.py create mode 100644 src/policyengine/countries/us/regions.py create mode 100644 tests/fixtures/region_fixtures.py create mode 100644 tests/test_region.py create mode 100644 tests/test_uk_regions.py create mode 100644 tests/test_us_regions.py diff --git a/src/policyengine/core/__init__.py b/src/policyengine/core/__init__.py index 630620a0..fdd250ea 100644 --- a/src/policyengine/core/__init__.py +++ b/src/policyengine/core/__init__.py @@ -8,6 +8,9 @@ from .parameter import Parameter as Parameter from .parameter_value import ParameterValue as ParameterValue from .policy import Policy as Policy +from .region import Region as Region +from .region import RegionRegistry as RegionRegistry +from .region import RegionType as RegionType from .simulation import Simulation as Simulation from .tax_benefit_model import TaxBenefitModel as TaxBenefitModel from .tax_benefit_model_version import ( diff --git a/src/policyengine/core/region.py b/src/policyengine/core/region.py new file mode 100644 index 00000000..3208b35e --- /dev/null +++ b/src/policyengine/core/region.py @@ -0,0 +1,201 @@ +"""Region definitions for geographic simulations. + +This module provides the Region and RegionRegistry classes for defining +geographic regions that a tax-benefit model supports. Regions can have: +1. A dedicated dataset (e.g., US states, congressional districts) +2. Filter from a parent region's dataset (e.g., US places/cities, UK countries) +""" + +from typing import Literal + +from pydantic import BaseModel, Field, PrivateAttr + +# Region type literals for US and UK +USRegionType = Literal["national", "state", "congressional_district", "place"] +UKRegionType = Literal["national", "country", "constituency", "local_authority"] +RegionType = USRegionType | UKRegionType + + +class Region(BaseModel): + """Geographic region for tax-benefit simulations. + + Regions can either have: + 1. A dedicated dataset (dataset_path is set, requires_filter is False) + 2. Filter from a parent region's dataset (requires_filter is True) + + The unique identifier is the code field, which uses a prefixed format: + - National: "us", "uk" + - State: "state/ca", "state/ny" + - Congressional District: "congressional_district/CA-01" + - Place: "place/NJ-57000" + - UK Country: "country/england" + - Constituency: "constituency/Sheffield Central" + - Local Authority: "local_authority/E09000001" + """ + + # Core identification + code: str = Field( + ..., + description="Unique region code with type prefix (e.g., 'state/ca', 'place/NJ-57000')", + ) + label: str = Field(..., description="Human-readable label (e.g., 'California')") + region_type: RegionType = Field( + ..., description="Type of region (e.g., 'state', 'place')" + ) + + # Hierarchy + parent_code: str | None = Field( + default=None, + description="Code of parent region (e.g., 'us' for states, 'state/nj' for places in New Jersey)", + ) + + # Dataset configuration + dataset_path: str | None = Field( + default=None, + description="GCS path to dedicated dataset (e.g., 'gs://policyengine-us-data/states/CA.h5')", + ) + + # Filtering configuration (for regions that filter from parent datasets) + requires_filter: bool = Field( + default=False, + description="True if this region filters from a parent dataset rather than having its own", + ) + filter_field: str | None = Field( + default=None, + description="Dataset field to filter on (e.g., 'place_fips', 'country')", + ) + filter_value: str | None = Field( + default=None, + description="Value to match when filtering (defaults to code suffix if not set)", + ) + + # Metadata (primarily for US congressional districts) + state_code: str | None = Field( + default=None, description="Two-letter state code (e.g., 'CA', 'NJ')" + ) + state_name: str | None = Field( + default=None, description="Full state name (e.g., 'California', 'New Jersey')" + ) + + def __hash__(self) -> int: + """Hash by code for use in sets and dict keys.""" + return hash(self.code) + + def __eq__(self, other: object) -> bool: + """Equality by code.""" + if not isinstance(other, Region): + return False + return self.code == other.code + + +class RegionRegistry(BaseModel): + """Registry of all regions for a country model. + + Provides indexed lookups for regions by code and type. + Indices are rebuilt automatically after initialization. + """ + + country_id: str = Field(..., description="Country identifier (e.g., 'us', 'uk')") + regions: list[Region] = Field(default_factory=list) + + # Private indexed lookups (excluded from serialization) + _by_code: dict[str, Region] = PrivateAttr(default_factory=dict) + _by_type: dict[str, list[Region]] = PrivateAttr(default_factory=dict) + + def model_post_init(self, __context: object) -> None: + """Build lookup indices after initialization.""" + self._rebuild_indices() + + def _rebuild_indices(self) -> None: + """Rebuild all lookup indices from the regions list.""" + self._by_code = {} + self._by_type = {} + + for region in self.regions: + # Index by code + self._by_code[region.code] = region + + # Index by type + if region.region_type not in self._by_type: + self._by_type[region.region_type] = [] + self._by_type[region.region_type].append(region) + + def add_region(self, region: Region) -> None: + """Add a region to the registry and update indices.""" + self.regions.append(region) + self._by_code[region.code] = region + if region.region_type not in self._by_type: + self._by_type[region.region_type] = [] + self._by_type[region.region_type].append(region) + + def get(self, code: str) -> Region | None: + """Get a region by its code. + + Args: + code: Region code (e.g., 'state/ca', 'place/NJ-57000') + + Returns: + The Region if found, None otherwise + """ + return self._by_code.get(code) + + def get_by_type(self, region_type: str) -> list[Region]: + """Get all regions of a given type. + + Args: + region_type: Type to filter by (e.g., 'state', 'place') + + Returns: + List of regions with the given type + """ + return self._by_type.get(region_type, []) + + def get_national(self) -> Region | None: + """Get the national-level region. + + Returns: + The national Region if found, None otherwise + """ + national = self.get_by_type("national") + return national[0] if national else None + + def get_children(self, parent_code: str) -> list[Region]: + """Get all regions with a given parent code. + + Args: + parent_code: Parent region code to filter by + + Returns: + List of regions with the given parent + """ + return [r for r in self.regions if r.parent_code == parent_code] + + def get_dataset_regions(self) -> list[Region]: + """Get all regions that have dedicated datasets. + + Returns: + List of regions with dataset_path set and requires_filter False + """ + return [ + r for r in self.regions if r.dataset_path is not None and not r.requires_filter + ] + + def get_filter_regions(self) -> list[Region]: + """Get all regions that require filtering from parent datasets. + + Returns: + List of regions with requires_filter True + """ + return [r for r in self.regions if r.requires_filter] + + def __len__(self) -> int: + """Return the number of regions in the registry.""" + return len(self.regions) + + def __iter__(self): + """Iterate over regions.""" + return iter(self.regions) + + def __contains__(self, code: str) -> bool: + """Check if a region code exists in the registry.""" + return code in self._by_code diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py index be9d5af3..e74f82c1 100644 --- a/src/policyengine/core/tax_benefit_model_version.py +++ b/src/policyengine/core/tax_benefit_model_version.py @@ -9,6 +9,7 @@ if TYPE_CHECKING: from .parameter import Parameter from .parameter_value import ParameterValue + from .region import Region, RegionRegistry from .simulation import Simulation from .variable import Variable @@ -25,6 +26,11 @@ class TaxBenefitModelVersion(BaseModel): variables: list["Variable"] = Field(default_factory=list) parameters: list["Parameter"] = Field(default_factory=list) + # Region registry for geographic simulations + region_registry: "RegionRegistry | None" = Field( + default=None, description="Registry of supported geographic regions" + ) + @property def parameter_values(self) -> list["ParameterValue"]: """Aggregate all parameter values from all parameters.""" @@ -83,6 +89,19 @@ def get_variable(self, name: str) -> "Variable": f"Variable '{name}' not found in {self.model.id} version {self.version}" ) + def get_region(self, code: str) -> "Region | None": + """Get a region by its code. + + Args: + code: Region code (e.g., 'state/ca', 'place/NJ-57000') + + Returns: + The Region if found, None if not found or no region registry + """ + if self.region_registry is None: + return None + return self.region_registry.get(code) + def __repr__(self) -> str: # Give the id and version, and the number of variables, parameters, parameter values return f"" diff --git a/src/policyengine/countries/__init__.py b/src/policyengine/countries/__init__.py new file mode 100644 index 00000000..3f647fd9 --- /dev/null +++ b/src/policyengine/countries/__init__.py @@ -0,0 +1,9 @@ +"""Country-specific region definitions. + +This package contains region registries for each supported country. +""" + +from .uk.regions import uk_region_registry +from .us.regions import us_region_registry + +__all__ = ["us_region_registry", "uk_region_registry"] diff --git a/src/policyengine/countries/uk/__init__.py b/src/policyengine/countries/uk/__init__.py new file mode 100644 index 00000000..b2c255d3 --- /dev/null +++ b/src/policyengine/countries/uk/__init__.py @@ -0,0 +1,5 @@ +"""UK country-specific region definitions.""" + +from .regions import uk_region_registry + +__all__ = ["uk_region_registry"] diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py new file mode 100644 index 00000000..5e551755 --- /dev/null +++ b/src/policyengine/countries/uk/regions.py @@ -0,0 +1,176 @@ +"""UK region definitions. + +This module defines all UK geographic regions: +- National (1) +- Countries (4: England, Scotland, Wales, Northern Ireland) +- Constituencies (loaded from CSV at runtime) +- Local Authorities (loaded from CSV at runtime) + +Note: Constituencies and local authorities use weight adjustment rather than +data filtering. They modify household_weight based on pre-computed weights +from H5 files stored in GCS. +""" + +from pathlib import Path +from typing import TYPE_CHECKING + +from policyengine.core.region import Region, RegionRegistry + +if TYPE_CHECKING: + import pandas as pd + +UK_DATA_BUCKET = "gs://policyengine-uk-data-private" + +# UK countries +UK_COUNTRIES = { + "england": "England", + "scotland": "Scotland", + "wales": "Wales", + "northern_ireland": "Northern Ireland", +} + + +def _load_constituencies_from_csv() -> list[dict]: + """Load UK constituency data from CSV. + + Constituencies are loaded from: + gs://policyengine-uk-data-private/constituencies_2024.csv + + Returns: + List of dicts with 'code' and 'name' keys + """ + try: + from policyengine_core.tools.google_cloud import download + except ImportError: + # If policyengine_core is not available, return empty list + return [] + + try: + csv_path = download( + gcs_bucket="policyengine-uk-data-private", + gcs_key="constituencies_2024.csv", + ) + import pandas as pd + + df = pd.read_csv(csv_path) + return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()] + except Exception: + # If download fails, return empty list + return [] + + +def _load_local_authorities_from_csv() -> list[dict]: + """Load UK local authority data from CSV. + + Local authorities are loaded from: + gs://policyengine-uk-data-private/local_authorities_2021.csv + + Returns: + List of dicts with 'code' and 'name' keys + """ + try: + from policyengine_core.tools.google_cloud import download + except ImportError: + # If policyengine_core is not available, return empty list + return [] + + try: + csv_path = download( + gcs_bucket="policyengine-uk-data-private", + gcs_key="local_authorities_2021.csv", + ) + import pandas as pd + + df = pd.read_csv(csv_path) + return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()] + except Exception: + # If download fails, return empty list + return [] + + +def build_uk_region_registry( + include_constituencies: bool = False, + include_local_authorities: bool = False, +) -> RegionRegistry: + """Build the UK region registry. + + Args: + include_constituencies: If True, load and include constituencies from CSV. + Defaults to False to avoid GCS dependency at import time. + include_local_authorities: If True, load and include local authorities from CSV. + Defaults to False to avoid GCS dependency at import time. + + Returns: + RegionRegistry containing: + - 1 national region + - 4 country regions + - Optionally: constituencies (if include_constituencies=True) + - Optionally: local authorities (if include_local_authorities=True) + """ + regions: list[Region] = [] + + # 1. National region (has dedicated dataset) + regions.append( + Region( + code="uk", + label="United Kingdom", + region_type="national", + dataset_path=f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5", + ) + ) + + # 2. Country regions (filter from national by 'country' variable) + for code, name in UK_COUNTRIES.items(): + regions.append( + Region( + code=f"country/{code}", + label=name, + region_type="country", + parent_code="uk", + requires_filter=True, + filter_field="country", + filter_value=code.upper(), + ) + ) + + # 3. Constituencies (optional, loaded from CSV) + # Note: These use weight adjustment, not data filtering + if include_constituencies: + constituencies = _load_constituencies_from_csv() + for const in constituencies: + regions.append( + Region( + code=f"constituency/{const['code']}", + label=const["name"], + region_type="constituency", + parent_code="uk", + requires_filter=True, + filter_field="household_weight", # Uses weight adjustment + filter_value=const["code"], + ) + ) + + # 4. Local Authorities (optional, loaded from CSV) + # Note: These use weight adjustment, not data filtering + if include_local_authorities: + local_authorities = _load_local_authorities_from_csv() + for la in local_authorities: + regions.append( + Region( + code=f"local_authority/{la['code']}", + label=la["name"], + region_type="local_authority", + parent_code="uk", + requires_filter=True, + filter_field="household_weight", # Uses weight adjustment + filter_value=la["code"], + ) + ) + + return RegionRegistry(country_id="uk", regions=regions) + + +# Default registry with just core regions (national + countries) +# To get full registry with constituencies/LAs, call: +# build_uk_region_registry(include_constituencies=True, include_local_authorities=True) +uk_region_registry = build_uk_region_registry() diff --git a/src/policyengine/countries/us/__init__.py b/src/policyengine/countries/us/__init__.py new file mode 100644 index 00000000..68592459 --- /dev/null +++ b/src/policyengine/countries/us/__init__.py @@ -0,0 +1,5 @@ +"""US country-specific region definitions.""" + +from .regions import us_region_registry + +__all__ = ["us_region_registry"] diff --git a/src/policyengine/countries/us/data/__init__.py b/src/policyengine/countries/us/data/__init__.py new file mode 100644 index 00000000..fb833b64 --- /dev/null +++ b/src/policyengine/countries/us/data/__init__.py @@ -0,0 +1,18 @@ +"""US geographic data definitions. + +This module provides static data for US geographic regions: +- states.py: State abbreviations and full names +- districts.py: Congressional district counts by state +- places.py: US Census places (cities/towns over 100K population) +""" + +from .districts import AT_LARGE_STATES, DISTRICT_COUNTS +from .places import US_PLACES +from .states import US_STATES + +__all__ = [ + "US_STATES", + "DISTRICT_COUNTS", + "AT_LARGE_STATES", + "US_PLACES", +] diff --git a/src/policyengine/countries/us/data/districts.py b/src/policyengine/countries/us/data/districts.py new file mode 100644 index 00000000..e77d5e62 --- /dev/null +++ b/src/policyengine/countries/us/data/districts.py @@ -0,0 +1,64 @@ +"""US congressional district definitions. + +Based on 2020 Census apportionment. +Total: 435 voting representatives + 1 DC non-voting delegate = 436 +""" + +# Congressional district counts by state (2020 Census apportionment) +# States with 1 district are "at-large" +DISTRICT_COUNTS: dict[str, int] = { + "AL": 7, + "AK": 1, + "AZ": 9, + "AR": 4, + "CA": 52, + "CO": 8, + "CT": 5, + "DE": 1, + "DC": 1, # Non-voting delegate + "FL": 28, + "GA": 14, + "HI": 2, + "ID": 2, + "IL": 17, + "IN": 9, + "IA": 4, + "KS": 4, + "KY": 6, + "LA": 6, + "ME": 2, + "MD": 8, + "MA": 9, + "MI": 13, + "MN": 8, + "MS": 4, + "MO": 8, + "MT": 2, + "NE": 3, + "NV": 4, + "NH": 2, + "NJ": 12, + "NM": 3, + "NY": 26, + "NC": 14, + "ND": 1, + "OH": 15, + "OK": 5, + "OR": 6, + "PA": 17, + "RI": 2, + "SC": 7, + "SD": 1, + "TN": 9, + "TX": 38, + "UT": 4, + "VT": 1, + "VA": 11, + "WA": 10, + "WV": 2, + "WI": 8, + "WY": 1, +} + +# States with at-large congressional districts (single representative) +AT_LARGE_STATES: set[str] = {"AK", "DE", "DC", "ND", "SD", "VT", "WY"} diff --git a/src/policyengine/countries/us/data/places.py b/src/policyengine/countries/us/data/places.py new file mode 100644 index 00000000..f5367eca --- /dev/null +++ b/src/policyengine/countries/us/data/places.py @@ -0,0 +1,346 @@ +"""US Census places with population over 100,000. + +Source: US Census Bureau Population Estimates 2023 +Synced with policyengine-app-v2 main branch. +""" + +# US cities/places with population over 100K (from Census data) +# These filter from their parent state's dataset using place_fips +# Total: 333 places +US_PLACES: list[dict[str, str]] = [ + {"fips": "03000", "name": "Anchorage", "state": "AK", "state_name": "Alaska"}, + {"fips": "07000", "name": "Birmingham", "state": "AL", "state_name": "Alabama"}, + {"fips": "37000", "name": "Huntsville", "state": "AL", "state_name": "Alabama"}, + {"fips": "50000", "name": "Mobile", "state": "AL", "state_name": "Alabama"}, + {"fips": "51000", "name": "Montgomery", "state": "AL", "state_name": "Alabama"}, + {"fips": "77256", "name": "Tuscaloosa", "state": "AL", "state_name": "Alabama"}, + {"fips": "23290", "name": "Fayetteville", "state": "AR", "state_name": "Arkansas"}, + {"fips": "41000", "name": "Little Rock", "state": "AR", "state_name": "Arkansas"}, + {"fips": "07940", "name": "Buckeye", "state": "AZ", "state_name": "Arizona"}, + {"fips": "12000", "name": "Chandler", "state": "AZ", "state_name": "Arizona"}, + {"fips": "27400", "name": "Gilbert", "state": "AZ", "state_name": "Arizona"}, + {"fips": "27820", "name": "Glendale", "state": "AZ", "state_name": "Arizona"}, + {"fips": "28380", "name": "Goodyear", "state": "AZ", "state_name": "Arizona"}, + {"fips": "46000", "name": "Mesa", "state": "AZ", "state_name": "Arizona"}, + {"fips": "54050", "name": "Peoria", "state": "AZ", "state_name": "Arizona"}, + {"fips": "55000", "name": "Phoenix", "state": "AZ", "state_name": "Arizona"}, + {"fips": "65000", "name": "Scottsdale", "state": "AZ", "state_name": "Arizona"}, + {"fips": "71510", "name": "Surprise", "state": "AZ", "state_name": "Arizona"}, + {"fips": "73000", "name": "Tempe", "state": "AZ", "state_name": "Arizona"}, + {"fips": "77000", "name": "Tucson", "state": "AZ", "state_name": "Arizona"}, + {"fips": "85540", "name": "Yuma", "state": "AZ", "state_name": "Arizona"}, + {"fips": "02000", "name": "Anaheim", "state": "CA", "state_name": "California"}, + {"fips": "02252", "name": "Antioch", "state": "CA", "state_name": "California"}, + {"fips": "03526", "name": "Bakersfield", "state": "CA", "state_name": "California"}, + {"fips": "06000", "name": "Berkeley", "state": "CA", "state_name": "California"}, + {"fips": "08954", "name": "Burbank", "state": "CA", "state_name": "California"}, + {"fips": "11194", "name": "Carlsbad", "state": "CA", "state_name": "California"}, + {"fips": "13014", "name": "Chico", "state": "CA", "state_name": "California"}, + {"fips": "13392", "name": "Chula Vista", "state": "CA", "state_name": "California"}, + {"fips": "14218", "name": "Clovis", "state": "CA", "state_name": "California"}, + {"fips": "16000", "name": "Concord", "state": "CA", "state_name": "California"}, + {"fips": "16350", "name": "Corona", "state": "CA", "state_name": "California"}, + {"fips": "16532", "name": "Costa Mesa", "state": "CA", "state_name": "California"}, + {"fips": "19766", "name": "Downey", "state": "CA", "state_name": "California"}, + {"fips": "21712", "name": "El Cajon", "state": "CA", "state_name": "California"}, + {"fips": "22230", "name": "El Monte", "state": "CA", "state_name": "California"}, + {"fips": "22020", "name": "Elk Grove", "state": "CA", "state_name": "California"}, + {"fips": "22804", "name": "Escondido", "state": "CA", "state_name": "California"}, + {"fips": "23182", "name": "Fairfield", "state": "CA", "state_name": "California"}, + {"fips": "24680", "name": "Fontana", "state": "CA", "state_name": "California"}, + {"fips": "26000", "name": "Fremont", "state": "CA", "state_name": "California"}, + {"fips": "27000", "name": "Fresno", "state": "CA", "state_name": "California"}, + {"fips": "28000", "name": "Fullerton", "state": "CA", "state_name": "California"}, + {"fips": "29000", "name": "Garden Grove", "state": "CA", "state_name": "California"}, + {"fips": "30000", "name": "Glendale", "state": "CA", "state_name": "California"}, + {"fips": "33000", "name": "Hayward", "state": "CA", "state_name": "California"}, + {"fips": "33434", "name": "Hesperia", "state": "CA", "state_name": "California"}, + {"fips": "36000", "name": "Huntington Beach", "state": "CA", "state_name": "California"}, + {"fips": "36546", "name": "Inglewood", "state": "CA", "state_name": "California"}, + {"fips": "36770", "name": "Irvine", "state": "CA", "state_name": "California"}, + {"fips": "37692", "name": "Jurupa Valley", "state": "CA", "state_name": "California"}, + {"fips": "40130", "name": "Lancaster", "state": "CA", "state_name": "California"}, + {"fips": "43000", "name": "Long Beach", "state": "CA", "state_name": "California"}, + {"fips": "44000", "name": "Los Angeles", "state": "CA", "state_name": "California"}, + {"fips": "46842", "name": "Menifee", "state": "CA", "state_name": "California"}, + {"fips": "48354", "name": "Modesto", "state": "CA", "state_name": "California"}, + {"fips": "49270", "name": "Moreno Valley", "state": "CA", "state_name": "California"}, + {"fips": "50076", "name": "Murrieta", "state": "CA", "state_name": "California"}, + {"fips": "53000", "name": "Oakland", "state": "CA", "state_name": "California"}, + {"fips": "53322", "name": "Oceanside", "state": "CA", "state_name": "California"}, + {"fips": "53896", "name": "Ontario", "state": "CA", "state_name": "California"}, + {"fips": "53980", "name": "Orange", "state": "CA", "state_name": "California"}, + {"fips": "54652", "name": "Oxnard", "state": "CA", "state_name": "California"}, + {"fips": "55156", "name": "Palmdale", "state": "CA", "state_name": "California"}, + {"fips": "56000", "name": "Pasadena", "state": "CA", "state_name": "California"}, + {"fips": "58072", "name": "Pomona", "state": "CA", "state_name": "California"}, + {"fips": "59451", "name": "Rancho Cucamonga", "state": "CA", "state_name": "California"}, + {"fips": "60466", "name": "Rialto", "state": "CA", "state_name": "California"}, + {"fips": "60620", "name": "Richmond", "state": "CA", "state_name": "California"}, + {"fips": "62000", "name": "Riverside", "state": "CA", "state_name": "California"}, + {"fips": "62938", "name": "Roseville", "state": "CA", "state_name": "California"}, + {"fips": "64000", "name": "Sacramento", "state": "CA", "state_name": "California"}, + {"fips": "64224", "name": "Salinas", "state": "CA", "state_name": "California"}, + {"fips": "65000", "name": "San Bernardino", "state": "CA", "state_name": "California"}, + {"fips": "66000", "name": "San Diego", "state": "CA", "state_name": "California"}, + {"fips": "67000", "name": "San Francisco", "state": "CA", "state_name": "California"}, + {"fips": "68000", "name": "San Jose", "state": "CA", "state_name": "California"}, + {"fips": "68252", "name": "San Mateo", "state": "CA", "state_name": "California"}, + {"fips": "69000", "name": "Santa Ana", "state": "CA", "state_name": "California"}, + {"fips": "69084", "name": "Santa Clara", "state": "CA", "state_name": "California"}, + {"fips": "69088", "name": "Santa Clarita", "state": "CA", "state_name": "California"}, + {"fips": "69196", "name": "Santa Maria", "state": "CA", "state_name": "California"}, + {"fips": "70098", "name": "Santa Rosa", "state": "CA", "state_name": "California"}, + {"fips": "72016", "name": "Simi Valley", "state": "CA", "state_name": "California"}, + {"fips": "75000", "name": "Stockton", "state": "CA", "state_name": "California"}, + {"fips": "77000", "name": "Sunnyvale", "state": "CA", "state_name": "California"}, + {"fips": "78120", "name": "Temecula", "state": "CA", "state_name": "California"}, + {"fips": "78582", "name": "Thousand Oaks", "state": "CA", "state_name": "California"}, + {"fips": "80000", "name": "Torrance", "state": "CA", "state_name": "California"}, + {"fips": "81554", "name": "Vacaville", "state": "CA", "state_name": "California"}, + {"fips": "81666", "name": "Vallejo", "state": "CA", "state_name": "California"}, + {"fips": "65042", "name": "Ventura", "state": "CA", "state_name": "California"}, + {"fips": "82590", "name": "Victorville", "state": "CA", "state_name": "California"}, + {"fips": "82954", "name": "Visalia", "state": "CA", "state_name": "California"}, + {"fips": "84200", "name": "West Covina", "state": "CA", "state_name": "California"}, + {"fips": "03455", "name": "Arvada", "state": "CO", "state_name": "Colorado"}, + {"fips": "04000", "name": "Aurora", "state": "CO", "state_name": "Colorado"}, + {"fips": "07850", "name": "Boulder", "state": "CO", "state_name": "Colorado"}, + {"fips": "12815", "name": "Centennial", "state": "CO", "state_name": "Colorado"}, + {"fips": "16000", "name": "Colorado Springs", "state": "CO", "state_name": "Colorado"}, + {"fips": "20000", "name": "Denver", "state": "CO", "state_name": "Colorado"}, + {"fips": "27425", "name": "Fort Collins", "state": "CO", "state_name": "Colorado"}, + {"fips": "32155", "name": "Greeley", "state": "CO", "state_name": "Colorado"}, + {"fips": "43000", "name": "Lakewood", "state": "CO", "state_name": "Colorado"}, + {"fips": "62000", "name": "Pueblo", "state": "CO", "state_name": "Colorado"}, + {"fips": "77290", "name": "Thornton", "state": "CO", "state_name": "Colorado"}, + {"fips": "83835", "name": "Westminster", "state": "CO", "state_name": "Colorado"}, + {"fips": "08000", "name": "Bridgeport", "state": "CT", "state_name": "Connecticut"}, + {"fips": "37000", "name": "Hartford", "state": "CT", "state_name": "Connecticut"}, + {"fips": "52000", "name": "New Haven", "state": "CT", "state_name": "Connecticut"}, + {"fips": "73000", "name": "Stamford", "state": "CT", "state_name": "Connecticut"}, + {"fips": "80000", "name": "Waterbury", "state": "CT", "state_name": "Connecticut"}, + {"fips": "50000", "name": "Washington", "state": "DC", "state_name": "District of Columbia"}, + {"fips": "10275", "name": "Cape Coral", "state": "FL", "state_name": "Florida"}, + {"fips": "12875", "name": "Clearwater", "state": "FL", "state_name": "Florida"}, + {"fips": "14400", "name": "Coral Springs", "state": "FL", "state_name": "Florida"}, + {"fips": "16475", "name": "Davie", "state": "FL", "state_name": "Florida"}, + {"fips": "24000", "name": "Fort Lauderdale", "state": "FL", "state_name": "Florida"}, + {"fips": "25175", "name": "Gainesville", "state": "FL", "state_name": "Florida"}, + {"fips": "30000", "name": "Hialeah", "state": "FL", "state_name": "Florida"}, + {"fips": "32000", "name": "Hollywood", "state": "FL", "state_name": "Florida"}, + {"fips": "35000", "name": "Jacksonville", "state": "FL", "state_name": "Florida"}, + {"fips": "38250", "name": "Lakeland", "state": "FL", "state_name": "Florida"}, + {"fips": "45060", "name": "Miami Gardens", "state": "FL", "state_name": "Florida"}, + {"fips": "45000", "name": "Miami", "state": "FL", "state_name": "Florida"}, + {"fips": "45975", "name": "Miramar", "state": "FL", "state_name": "Florida"}, + {"fips": "53000", "name": "Orlando", "state": "FL", "state_name": "Florida"}, + {"fips": "54000", "name": "Palm Bay", "state": "FL", "state_name": "Florida"}, + {"fips": "54200", "name": "Palm Coast", "state": "FL", "state_name": "Florida"}, + {"fips": "55775", "name": "Pembroke Pines", "state": "FL", "state_name": "Florida"}, + {"fips": "58050", "name": "Pompano Beach", "state": "FL", "state_name": "Florida"}, + {"fips": "58715", "name": "Port St. Lucie", "state": "FL", "state_name": "Florida"}, + {"fips": "63000", "name": "St. Petersburg", "state": "FL", "state_name": "Florida"}, + {"fips": "70600", "name": "Tallahassee", "state": "FL", "state_name": "Florida"}, + {"fips": "71000", "name": "Tampa", "state": "FL", "state_name": "Florida"}, + {"fips": "76600", "name": "West Palm Beach", "state": "FL", "state_name": "Florida"}, + {"fips": "03440", "name": "Athens-Clarke County", "state": "GA", "state_name": "Georgia"}, + {"fips": "04000", "name": "Atlanta", "state": "GA", "state_name": "Georgia"}, + {"fips": "04204", "name": "Augusta-Richmond County", "state": "GA", "state_name": "Georgia"}, + {"fips": "19000", "name": "Columbus", "state": "GA", "state_name": "Georgia"}, + {"fips": "49008", "name": "Macon-Bibb County", "state": "GA", "state_name": "Georgia"}, + {"fips": "68516", "name": "Sandy Springs", "state": "GA", "state_name": "Georgia"}, + {"fips": "69000", "name": "Savannah", "state": "GA", "state_name": "Georgia"}, + {"fips": "72122", "name": "South Fulton", "state": "GA", "state_name": "Georgia"}, + {"fips": "71550", "name": "Urban Honolulu", "state": "HI", "state_name": "Hawaii"}, + {"fips": "12000", "name": "Cedar Rapids", "state": "IA", "state_name": "Iowa"}, + {"fips": "19000", "name": "Davenport", "state": "IA", "state_name": "Iowa"}, + {"fips": "21000", "name": "Des Moines", "state": "IA", "state_name": "Iowa"}, + {"fips": "08830", "name": "Boise City", "state": "ID", "state_name": "Idaho"}, + {"fips": "52120", "name": "Meridian", "state": "ID", "state_name": "Idaho"}, + {"fips": "56260", "name": "Nampa", "state": "ID", "state_name": "Idaho"}, + {"fips": "03012", "name": "Aurora", "state": "IL", "state_name": "Illinois"}, + {"fips": "14000", "name": "Chicago", "state": "IL", "state_name": "Illinois"}, + {"fips": "23074", "name": "Elgin", "state": "IL", "state_name": "Illinois"}, + {"fips": "38570", "name": "Joliet", "state": "IL", "state_name": "Illinois"}, + {"fips": "51622", "name": "Naperville", "state": "IL", "state_name": "Illinois"}, + {"fips": "59000", "name": "Peoria", "state": "IL", "state_name": "Illinois"}, + {"fips": "65000", "name": "Rockford", "state": "IL", "state_name": "Illinois"}, + {"fips": "72000", "name": "Springfield", "state": "IL", "state_name": "Illinois"}, + {"fips": "10342", "name": "Carmel", "state": "IN", "state_name": "Indiana"}, + {"fips": "22000", "name": "Evansville", "state": "IN", "state_name": "Indiana"}, + {"fips": "23278", "name": "Fishers", "state": "IN", "state_name": "Indiana"}, + {"fips": "25000", "name": "Fort Wayne", "state": "IN", "state_name": "Indiana"}, + {"fips": "36003", "name": "Indianapolis", "state": "IN", "state_name": "Indiana"}, + {"fips": "71000", "name": "South Bend", "state": "IN", "state_name": "Indiana"}, + {"fips": "36000", "name": "Kansas City", "state": "KS", "state_name": "Kansas"}, + {"fips": "52575", "name": "Olathe", "state": "KS", "state_name": "Kansas"}, + {"fips": "53775", "name": "Overland Park", "state": "KS", "state_name": "Kansas"}, + {"fips": "71000", "name": "Topeka", "state": "KS", "state_name": "Kansas"}, + {"fips": "79000", "name": "Wichita", "state": "KS", "state_name": "Kansas"}, + {"fips": "46027", "name": "Lexington-Fayette", "state": "KY", "state_name": "Kentucky"}, + {"fips": "48006", "name": "Louisville/Jefferson County", "state": "KY", "state_name": "Kentucky"}, + {"fips": "05000", "name": "Baton Rouge", "state": "LA", "state_name": "Louisiana"}, + {"fips": "40735", "name": "Lafayette", "state": "LA", "state_name": "Louisiana"}, + {"fips": "55000", "name": "New Orleans", "state": "LA", "state_name": "Louisiana"}, + {"fips": "70000", "name": "Shreveport", "state": "LA", "state_name": "Louisiana"}, + {"fips": "07000", "name": "Boston", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "09000", "name": "Brockton", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "11000", "name": "Cambridge", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "37000", "name": "Lowell", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "37490", "name": "Lynn", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "45000", "name": "New Bedford", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "55745", "name": "Quincy", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "67000", "name": "Springfield", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "82000", "name": "Worcester", "state": "MA", "state_name": "Massachusetts"}, + {"fips": "04000", "name": "Baltimore", "state": "MD", "state_name": "Maryland"}, + {"fips": "03000", "name": "Ann Arbor", "state": "MI", "state_name": "Michigan"}, + {"fips": "21000", "name": "Dearborn", "state": "MI", "state_name": "Michigan"}, + {"fips": "22000", "name": "Detroit", "state": "MI", "state_name": "Michigan"}, + {"fips": "34000", "name": "Grand Rapids", "state": "MI", "state_name": "Michigan"}, + {"fips": "46000", "name": "Lansing", "state": "MI", "state_name": "Michigan"}, + {"fips": "76460", "name": "Sterling Heights", "state": "MI", "state_name": "Michigan"}, + {"fips": "84000", "name": "Warren", "state": "MI", "state_name": "Michigan"}, + {"fips": "43000", "name": "Minneapolis", "state": "MN", "state_name": "Minnesota"}, + {"fips": "54880", "name": "Rochester", "state": "MN", "state_name": "Minnesota"}, + {"fips": "58000", "name": "St. Paul", "state": "MN", "state_name": "Minnesota"}, + {"fips": "15670", "name": "Columbia", "state": "MO", "state_name": "Missouri"}, + {"fips": "35000", "name": "Independence", "state": "MO", "state_name": "Missouri"}, + {"fips": "38000", "name": "Kansas City", "state": "MO", "state_name": "Missouri"}, + {"fips": "41348", "name": "Lee's Summit", "state": "MO", "state_name": "Missouri"}, + {"fips": "70000", "name": "Springfield", "state": "MO", "state_name": "Missouri"}, + {"fips": "65000", "name": "St. Louis", "state": "MO", "state_name": "Missouri"}, + {"fips": "36000", "name": "Jackson", "state": "MS", "state_name": "Mississippi"}, + {"fips": "06550", "name": "Billings", "state": "MT", "state_name": "Montana"}, + {"fips": "10740", "name": "Cary", "state": "NC", "state_name": "North Carolina"}, + {"fips": "12000", "name": "Charlotte", "state": "NC", "state_name": "North Carolina"}, + {"fips": "14100", "name": "Concord", "state": "NC", "state_name": "North Carolina"}, + {"fips": "19000", "name": "Durham", "state": "NC", "state_name": "North Carolina"}, + {"fips": "22920", "name": "Fayetteville", "state": "NC", "state_name": "North Carolina"}, + {"fips": "28000", "name": "Greensboro", "state": "NC", "state_name": "North Carolina"}, + {"fips": "31400", "name": "High Point", "state": "NC", "state_name": "North Carolina"}, + {"fips": "55000", "name": "Raleigh", "state": "NC", "state_name": "North Carolina"}, + {"fips": "74440", "name": "Wilmington", "state": "NC", "state_name": "North Carolina"}, + {"fips": "75000", "name": "Winston-Salem", "state": "NC", "state_name": "North Carolina"}, + {"fips": "25700", "name": "Fargo", "state": "ND", "state_name": "North Dakota"}, + {"fips": "28000", "name": "Lincoln", "state": "NE", "state_name": "Nebraska"}, + {"fips": "37000", "name": "Omaha", "state": "NE", "state_name": "Nebraska"}, + {"fips": "45140", "name": "Manchester", "state": "NH", "state_name": "New Hampshire"}, + {"fips": "21000", "name": "Elizabeth", "state": "NJ", "state_name": "New Jersey"}, + {"fips": "36000", "name": "Jersey City", "state": "NJ", "state_name": "New Jersey"}, + {"fips": "51000", "name": "Newark", "state": "NJ", "state_name": "New Jersey"}, + {"fips": "57000", "name": "Paterson", "state": "NJ", "state_name": "New Jersey"}, + {"fips": "02000", "name": "Albuquerque", "state": "NM", "state_name": "New Mexico"}, + {"fips": "39380", "name": "Las Cruces", "state": "NM", "state_name": "New Mexico"}, + {"fips": "63460", "name": "Rio Rancho", "state": "NM", "state_name": "New Mexico"}, + {"fips": "31900", "name": "Henderson", "state": "NV", "state_name": "Nevada"}, + {"fips": "40000", "name": "Las Vegas", "state": "NV", "state_name": "Nevada"}, + {"fips": "51800", "name": "North Las Vegas", "state": "NV", "state_name": "Nevada"}, + {"fips": "60600", "name": "Reno", "state": "NV", "state_name": "Nevada"}, + {"fips": "68400", "name": "Sparks", "state": "NV", "state_name": "Nevada"}, + {"fips": "01000", "name": "Albany", "state": "NY", "state_name": "New York"}, + {"fips": "11000", "name": "Buffalo", "state": "NY", "state_name": "New York"}, + {"fips": "51000", "name": "New York City", "state": "NY", "state_name": "New York"}, + {"fips": "63000", "name": "Rochester", "state": "NY", "state_name": "New York"}, + {"fips": "73000", "name": "Syracuse", "state": "NY", "state_name": "New York"}, + {"fips": "84000", "name": "Yonkers", "state": "NY", "state_name": "New York"}, + {"fips": "01000", "name": "Akron", "state": "OH", "state_name": "Ohio"}, + {"fips": "15000", "name": "Cincinnati", "state": "OH", "state_name": "Ohio"}, + {"fips": "16000", "name": "Cleveland", "state": "OH", "state_name": "Ohio"}, + {"fips": "18000", "name": "Columbus", "state": "OH", "state_name": "Ohio"}, + {"fips": "21000", "name": "Dayton", "state": "OH", "state_name": "Ohio"}, + {"fips": "77000", "name": "Toledo", "state": "OH", "state_name": "Ohio"}, + {"fips": "09050", "name": "Broken Arrow", "state": "OK", "state_name": "Oklahoma"}, + {"fips": "52500", "name": "Norman", "state": "OK", "state_name": "Oklahoma"}, + {"fips": "55000", "name": "Oklahoma City", "state": "OK", "state_name": "Oklahoma"}, + {"fips": "75000", "name": "Tulsa", "state": "OK", "state_name": "Oklahoma"}, + {"fips": "05800", "name": "Bend", "state": "OR", "state_name": "Oregon"}, + {"fips": "23850", "name": "Eugene", "state": "OR", "state_name": "Oregon"}, + {"fips": "31250", "name": "Gresham", "state": "OR", "state_name": "Oregon"}, + {"fips": "34100", "name": "Hillsboro", "state": "OR", "state_name": "Oregon"}, + {"fips": "59000", "name": "Portland", "state": "OR", "state_name": "Oregon"}, + {"fips": "64900", "name": "Salem", "state": "OR", "state_name": "Oregon"}, + {"fips": "02000", "name": "Allentown", "state": "PA", "state_name": "Pennsylvania"}, + {"fips": "60000", "name": "Philadelphia", "state": "PA", "state_name": "Pennsylvania"}, + {"fips": "61000", "name": "Pittsburgh", "state": "PA", "state_name": "Pennsylvania"}, + {"fips": "59000", "name": "Providence", "state": "RI", "state_name": "Rhode Island"}, + {"fips": "13330", "name": "Charleston", "state": "SC", "state_name": "South Carolina"}, + {"fips": "16000", "name": "Columbia", "state": "SC", "state_name": "South Carolina"}, + {"fips": "50875", "name": "North Charleston", "state": "SC", "state_name": "South Carolina"}, + {"fips": "59020", "name": "Sioux Falls", "state": "SD", "state_name": "South Dakota"}, + {"fips": "14000", "name": "Chattanooga", "state": "TN", "state_name": "Tennessee"}, + {"fips": "15160", "name": "Clarksville", "state": "TN", "state_name": "Tennessee"}, + {"fips": "40000", "name": "Knoxville", "state": "TN", "state_name": "Tennessee"}, + {"fips": "48000", "name": "Memphis", "state": "TN", "state_name": "Tennessee"}, + {"fips": "51560", "name": "Murfreesboro", "state": "TN", "state_name": "Tennessee"}, + +# Extracted 332 places + {"fips": "52006", "name": "Nashville-Davidson", "state": "TN", "state_name": "Tennessee"}, + {"fips": "01000", "name": "Abilene", "state": "TX", "state_name": "Texas"}, + {"fips": "01924", "name": "Allen", "state": "TX", "state_name": "Texas"}, + {"fips": "03000", "name": "Amarillo", "state": "TX", "state_name": "Texas"}, + {"fips": "04000", "name": "Arlington", "state": "TX", "state_name": "Texas"}, + {"fips": "05000", "name": "Austin", "state": "TX", "state_name": "Texas"}, + {"fips": "07000", "name": "Beaumont", "state": "TX", "state_name": "Texas"}, + {"fips": "10768", "name": "Brownsville", "state": "TX", "state_name": "Texas"}, + {"fips": "13024", "name": "Carrollton", "state": "TX", "state_name": "Texas"}, + {"fips": "15976", "name": "College Station", "state": "TX", "state_name": "Texas"}, + {"fips": "16432", "name": "Conroe", "state": "TX", "state_name": "Texas"}, + {"fips": "17000", "name": "Corpus Christi", "state": "TX", "state_name": "Texas"}, + {"fips": "19000", "name": "Dallas", "state": "TX", "state_name": "Texas"}, + {"fips": "19972", "name": "Denton", "state": "TX", "state_name": "Texas"}, + {"fips": "22660", "name": "Edinburg", "state": "TX", "state_name": "Texas"}, + {"fips": "24000", "name": "El Paso", "state": "TX", "state_name": "Texas"}, + {"fips": "27000", "name": "Fort Worth", "state": "TX", "state_name": "Texas"}, + {"fips": "27684", "name": "Frisco", "state": "TX", "state_name": "Texas"}, + {"fips": "29000", "name": "Garland", "state": "TX", "state_name": "Texas"}, + {"fips": "30464", "name": "Grand Prairie", "state": "TX", "state_name": "Texas"}, + {"fips": "35000", "name": "Houston", "state": "TX", "state_name": "Texas"}, + {"fips": "37000", "name": "Irving", "state": "TX", "state_name": "Texas"}, + {"fips": "39148", "name": "Killeen", "state": "TX", "state_name": "Texas"}, + {"fips": "41464", "name": "Laredo", "state": "TX", "state_name": "Texas"}, + {"fips": "41980", "name": "League City", "state": "TX", "state_name": "Texas"}, + {"fips": "42508", "name": "Lewisville", "state": "TX", "state_name": "Texas"}, + {"fips": "45000", "name": "Lubbock", "state": "TX", "state_name": "Texas"}, + {"fips": "45384", "name": "McAllen", "state": "TX", "state_name": "Texas"}, + {"fips": "45744", "name": "McKinney", "state": "TX", "state_name": "Texas"}, + {"fips": "47892", "name": "Mesquite", "state": "TX", "state_name": "Texas"}, + {"fips": "48072", "name": "Midland", "state": "TX", "state_name": "Texas"}, + {"fips": "50820", "name": "New Braunfels", "state": "TX", "state_name": "Texas"}, + {"fips": "53388", "name": "Odessa", "state": "TX", "state_name": "Texas"}, + {"fips": "56000", "name": "Pasadena", "state": "TX", "state_name": "Texas"}, + {"fips": "56348", "name": "Pearland", "state": "TX", "state_name": "Texas"}, + {"fips": "58016", "name": "Plano", "state": "TX", "state_name": "Texas"}, + {"fips": "61796", "name": "Richardson", "state": "TX", "state_name": "Texas"}, + {"fips": "63500", "name": "Round Rock", "state": "TX", "state_name": "Texas"}, + {"fips": "65000", "name": "San Antonio", "state": "TX", "state_name": "Texas"}, + {"fips": "70808", "name": "Sugar Land", "state": "TX", "state_name": "Texas"}, + {"fips": "74144", "name": "Tyler", "state": "TX", "state_name": "Texas"}, + {"fips": "76000", "name": "Waco", "state": "TX", "state_name": "Texas"}, + {"fips": "79000", "name": "Wichita Falls", "state": "TX", "state_name": "Texas"}, + {"fips": "62470", "name": "Provo", "state": "UT", "state_name": "Utah"}, + {"fips": "67000", "name": "Salt Lake City", "state": "UT", "state_name": "Utah"}, + {"fips": "65330", "name": "St. George", "state": "UT", "state_name": "Utah"}, + {"fips": "82950", "name": "West Jordan", "state": "UT", "state_name": "Utah"}, + {"fips": "83470", "name": "West Valley City", "state": "UT", "state_name": "Utah"}, + {"fips": "01000", "name": "Alexandria", "state": "VA", "state_name": "Virginia"}, + {"fips": "16000", "name": "Chesapeake", "state": "VA", "state_name": "Virginia"}, + {"fips": "35000", "name": "Hampton", "state": "VA", "state_name": "Virginia"}, + {"fips": "56000", "name": "Newport News", "state": "VA", "state_name": "Virginia"}, + {"fips": "57000", "name": "Norfolk", "state": "VA", "state_name": "Virginia"}, + {"fips": "67000", "name": "Richmond", "state": "VA", "state_name": "Virginia"}, + {"fips": "76432", "name": "Suffolk", "state": "VA", "state_name": "Virginia"}, + {"fips": "82000", "name": "Virginia Beach", "state": "VA", "state_name": "Virginia"}, + {"fips": "05210", "name": "Bellevue", "state": "WA", "state_name": "Washington"}, + {"fips": "22640", "name": "Everett", "state": "WA", "state_name": "Washington"}, + {"fips": "35415", "name": "Kent", "state": "WA", "state_name": "Washington"}, + {"fips": "57745", "name": "Renton", "state": "WA", "state_name": "Washington"}, + {"fips": "63000", "name": "Seattle", "state": "WA", "state_name": "Washington"}, + {"fips": "67167", "name": "Spokane Valley", "state": "WA", "state_name": "Washington"}, + {"fips": "67000", "name": "Spokane", "state": "WA", "state_name": "Washington"}, + {"fips": "70000", "name": "Tacoma", "state": "WA", "state_name": "Washington"}, + {"fips": "74060", "name": "Vancouver", "state": "WA", "state_name": "Washington"}, + {"fips": "31000", "name": "Green Bay", "state": "WI", "state_name": "Wisconsin"}, + {"fips": "48000", "name": "Madison", "state": "WI", "state_name": "Wisconsin"}, + {"fips": "53000", "name": "Milwaukee", "state": "WI", "state_name": "Wisconsin"}, +] diff --git a/src/policyengine/countries/us/data/states.py b/src/policyengine/countries/us/data/states.py new file mode 100644 index 00000000..1309201b --- /dev/null +++ b/src/policyengine/countries/us/data/states.py @@ -0,0 +1,59 @@ +"""US state definitions. + +All 50 states plus District of Columbia. +""" + +# All US states and territories with their full names +US_STATES: dict[str, str] = { + "AL": "Alabama", + "AK": "Alaska", + "AZ": "Arizona", + "AR": "Arkansas", + "CA": "California", + "CO": "Colorado", + "CT": "Connecticut", + "DE": "Delaware", + "DC": "District of Columbia", + "FL": "Florida", + "GA": "Georgia", + "HI": "Hawaii", + "ID": "Idaho", + "IL": "Illinois", + "IN": "Indiana", + "IA": "Iowa", + "KS": "Kansas", + "KY": "Kentucky", + "LA": "Louisiana", + "ME": "Maine", + "MD": "Maryland", + "MA": "Massachusetts", + "MI": "Michigan", + "MN": "Minnesota", + "MS": "Mississippi", + "MO": "Missouri", + "MT": "Montana", + "NE": "Nebraska", + "NV": "Nevada", + "NH": "New Hampshire", + "NJ": "New Jersey", + "NM": "New Mexico", + "NY": "New York", + "NC": "North Carolina", + "ND": "North Dakota", + "OH": "Ohio", + "OK": "Oklahoma", + "OR": "Oregon", + "PA": "Pennsylvania", + "RI": "Rhode Island", + "SC": "South Carolina", + "SD": "South Dakota", + "TN": "Tennessee", + "TX": "Texas", + "UT": "Utah", + "VT": "Vermont", + "VA": "Virginia", + "WA": "Washington", + "WV": "West Virginia", + "WI": "Wisconsin", + "WY": "Wyoming", +} diff --git a/src/policyengine/countries/us/regions.py b/src/policyengine/countries/us/regions.py new file mode 100644 index 00000000..6320578e --- /dev/null +++ b/src/policyengine/countries/us/regions.py @@ -0,0 +1,106 @@ +"""US region registry builder. + +This module builds the complete US region registry from the data definitions +in the data/ subdirectory: +- data/states.py: State definitions +- data/districts.py: Congressional district counts +- data/places.py: Census places over 100K population +""" + +from policyengine.core.region import Region, RegionRegistry + +from .data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES + +US_DATA_BUCKET = "gs://policyengine-us-data" + + +def _ordinal(n: int) -> str: + """Return ordinal suffix for a number (1st, 2nd, 3rd, etc.).""" + if 11 <= n % 100 <= 13: + return f"{n}th" + return f"{n}" + {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th") + + +def build_us_region_registry() -> RegionRegistry: + """Build the complete US region registry. + + Returns: + RegionRegistry containing: + - 1 national region + - 51 state regions (50 states + DC) + - 436 congressional district regions (435 + DC delegate) + - 333 place/city regions (Census places over 100K population) + """ + regions: list[Region] = [] + + # 1. National region (has dedicated dataset) + regions.append( + Region( + code="us", + label="United States", + region_type="national", + dataset_path=f"{US_DATA_BUCKET}/enhanced_cps_2024.h5", + ) + ) + + # 2. State regions (each has dedicated dataset) + for abbrev, name in US_STATES.items(): + regions.append( + Region( + code=f"state/{abbrev.lower()}", + label=name, + region_type="state", + parent_code="us", + dataset_path=f"{US_DATA_BUCKET}/states/{abbrev}.h5", + state_code=abbrev, + state_name=name, + ) + ) + + # 3. Congressional district regions (each has dedicated dataset) + for state_abbrev, count in DISTRICT_COUNTS.items(): + state_name = US_STATES[state_abbrev] + for i in range(1, count + 1): + district_code = f"{state_abbrev}-{i:02d}" + + # Create appropriate label + if state_abbrev in AT_LARGE_STATES: + label = f"{state_name}'s at-large congressional district" + else: + label = f"{state_name}'s {_ordinal(i)} congressional district" + + regions.append( + Region( + code=f"congressional_district/{district_code}", + label=label, + region_type="congressional_district", + parent_code=f"state/{state_abbrev.lower()}", + dataset_path=f"{US_DATA_BUCKET}/districts/{district_code}.h5", + state_code=state_abbrev, + state_name=state_name, + ) + ) + + # 4. Place/city regions (filter from state datasets) + for place in US_PLACES: + state_abbrev = place["state"] + fips = place["fips"] + regions.append( + Region( + code=f"place/{state_abbrev}-{fips}", + label=place["name"], + region_type="place", + parent_code=f"state/{state_abbrev.lower()}", + requires_filter=True, + filter_field="place_fips", + filter_value=fips, + state_code=state_abbrev, + state_name=place["state_name"], + ) + ) + + return RegionRegistry(country_id="us", regions=regions) + + +# Singleton instance for import +us_region_registry = build_us_region_registry() diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index fac5b91f..71cf78dc 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -123,6 +123,11 @@ def __init__(self, **kwargs: dict): from policyengine_core.enums import Enum from policyengine_uk.system import system + # Attach region registry + from policyengine.countries.uk.regions import uk_region_registry + + self.region_registry = uk_region_registry + self.id = f"{self.model.id}@{self.version}" for var_obj in system.variables.values(): diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index 487e4d51..b5191a19 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -116,6 +116,11 @@ def __init__(self, **kwargs: dict): from policyengine_core.enums import Enum from policyengine_us.system import system + # Attach region registry + from policyengine.countries.us.regions import us_region_registry + + self.region_registry = us_region_registry + self.id = f"{self.model.id}@{self.version}" for var_obj in system.variables.values(): diff --git a/tests/fixtures/region_fixtures.py b/tests/fixtures/region_fixtures.py new file mode 100644 index 00000000..ca1adfe2 --- /dev/null +++ b/tests/fixtures/region_fixtures.py @@ -0,0 +1,127 @@ +"""Fixtures for Region and RegionRegistry tests.""" + +import pytest + +from policyengine.core.region import Region, RegionRegistry + + +def create_national_region( + country_code: str = "us", + label: str = "United States", + dataset_path: str = "gs://policyengine-us-data/enhanced_cps_2024.h5", +) -> Region: + """Create a national region.""" + return Region( + code=country_code, + label=label, + region_type="national", + dataset_path=dataset_path, + ) + + +def create_state_region( + state_code: str, + state_name: str, + parent_code: str = "us", + bucket: str = "gs://policyengine-us-data", +) -> Region: + """Create a state region with dedicated dataset.""" + return Region( + code=f"state/{state_code.lower()}", + label=state_name, + region_type="state", + parent_code=parent_code, + dataset_path=f"{bucket}/states/{state_code}.h5", + state_code=state_code, + state_name=state_name, + ) + + +def create_place_region( + state_code: str, + fips: str, + name: str, + state_name: str, +) -> Region: + """Create a place region that filters from parent state.""" + return Region( + code=f"place/{state_code}-{fips}", + label=name, + region_type="place", + parent_code=f"state/{state_code.lower()}", + requires_filter=True, + filter_field="place_fips", + filter_value=fips, + state_code=state_code, + state_name=state_name, + ) + + +def create_sample_us_registry() -> RegionRegistry: + """Create a minimal US-like registry for testing. + + Contains: + - 1 national region (US) + - 2 state regions (CA, NY) + - 1 place region (Los Angeles) + """ + return RegionRegistry( + country_id="us", + regions=[ + create_national_region(), + create_state_region("CA", "California"), + create_state_region("NY", "New York"), + create_place_region("CA", "44000", "Los Angeles city", "California"), + ], + ) + + +# Pre-built fixtures for common test scenarios + +NATIONAL_US = create_national_region() + +STATE_CALIFORNIA = create_state_region("CA", "California") + +STATE_NEW_YORK = create_state_region("NY", "New York") + +PLACE_LOS_ANGELES = create_place_region("CA", "44000", "Los Angeles city", "California") + +SIMPLE_REGION = Region( + code="state/ca", + label="California", + region_type="state", +) + +REGION_WITH_DATASET = Region( + code="state/ca", + label="California", + region_type="state", + parent_code="us", + dataset_path="gs://policyengine-us-data/states/CA.h5", + state_code="CA", + state_name="California", +) + +FILTER_REGION = Region( + code="place/NJ-57000", + label="Paterson", + region_type="place", + parent_code="state/nj", + requires_filter=True, + filter_field="place_fips", + filter_value="57000", + state_code="NJ", + state_name="New Jersey", +) + + +@pytest.fixture +def sample_registry() -> RegionRegistry: + """Pytest fixture for a sample US-like registry.""" + return create_sample_us_registry() + + +@pytest.fixture +def empty_registry() -> RegionRegistry: + """Pytest fixture for an empty registry.""" + return RegionRegistry(country_id="test", regions=[]) diff --git a/tests/test_models.py b/tests/test_models.py index 3132abdf..e5b4484e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -9,6 +9,21 @@ class TestUKModel: """Tests for PolicyEngine UK model.""" + def test_has_region_registry(self): + """UK model should have a region registry attached.""" + assert uk_latest.region_registry is not None + assert uk_latest.region_registry.country_id == "uk" + + def test_can_get_region_by_code(self): + """UK model should be able to look up regions by code.""" + uk = uk_latest.get_region("uk") + assert uk is not None + assert uk.label == "United Kingdom" + + england = uk_latest.get_region("country/england") + assert england is not None + assert england.label == "England" + def test_has_hundreds_of_parameters(self): """UK model should have hundreds of parameters.""" assert len(uk_latest.parameters) >= 100 @@ -65,6 +80,21 @@ def test__given_bracket_label__then_follows_expected_format(self): class TestUSModel: """Tests for PolicyEngine US model.""" + def test_has_region_registry(self): + """US model should have a region registry attached.""" + assert us_latest.region_registry is not None + assert us_latest.region_registry.country_id == "us" + + def test_can_get_region_by_code(self): + """US model should be able to look up regions by code.""" + us = us_latest.get_region("us") + assert us is not None + assert us.label == "United States" + + ca = us_latest.get_region("state/ca") + assert ca is not None + assert ca.label == "California" + def test_has_hundreds_of_parameters(self): """US model should have hundreds of parameters.""" assert len(us_latest.parameters) >= 100 diff --git a/tests/test_region.py b/tests/test_region.py new file mode 100644 index 00000000..6669ec1c --- /dev/null +++ b/tests/test_region.py @@ -0,0 +1,246 @@ +"""Tests for Region and RegionRegistry classes.""" + +import pytest + +from policyengine.core.region import Region, RegionRegistry + +from tests.fixtures.region_fixtures import ( + FILTER_REGION, + REGION_WITH_DATASET, + SIMPLE_REGION, + create_sample_us_registry, + create_state_region, + sample_registry, +) + + +class TestRegion: + """Tests for the Region class.""" + + def test__given_required_fields__then_region_created(self): + """Given: Required fields (code, label, region_type) + When: Creating a Region + Then: Region is created with those values + """ + # Given + code = "state/ca" + label = "California" + region_type = "state" + + # When + region = Region(code=code, label=label, region_type=region_type) + + # Then + assert region.code == code + assert region.label == label + assert region.region_type == region_type + + def test__given_dataset_path__then_region_has_dedicated_dataset(self): + """Given: Region with dataset_path specified + When: Creating the Region + Then: Region has dataset_path and requires_filter is False + """ + # Given (using fixture) + region = REGION_WITH_DATASET + + # Then + assert region.dataset_path == "gs://policyengine-us-data/states/CA.h5" + assert region.parent_code == "us" + assert region.state_code == "CA" + assert not region.requires_filter + + def test__given_filter_configuration__then_region_requires_filter(self): + """Given: Region with requires_filter=True and filter fields + When: Creating the Region + Then: Region is configured for filtering from parent + """ + # Given (using fixture) + region = FILTER_REGION + + # Then + assert region.requires_filter is True + assert region.filter_field == "place_fips" + assert region.filter_value == "57000" + + def test__given_same_codes__then_regions_are_equal(self): + """Given: Two regions with the same code + When: Comparing them + Then: They are equal regardless of other fields + """ + # Given + region1 = Region(code="state/ca", label="California", region_type="state") + region2 = Region(code="state/ca", label="California (different)", region_type="state") + region3 = Region(code="state/ny", label="New York", region_type="state") + + # Then + assert region1 == region2 + assert region1 != region3 + + def test__given_region__then_can_use_as_dict_key_or_in_set(self): + """Given: Multiple regions + When: Using them in sets or as dict keys + Then: Regions with same code are deduplicated + """ + # Given + region1 = Region(code="state/ca", label="California", region_type="state") + region2 = Region(code="state/ca", label="California (duplicate)", region_type="state") + region3 = Region(code="state/ny", label="New York", region_type="state") + + # When + region_set = {region1, region2, region3} + region_dict = {region1: "first", region3: "third"} + + # Then + assert len(region_set) == 2 # region1 and region2 are same + assert region_dict[region2] == "first" # region2 == region1 + + +class TestRegionRegistry: + """Tests for the RegionRegistry class.""" + + def test__given_registry_with_regions__then_length_is_correct(self, sample_registry): + """Given: Registry with 4 regions + When: Checking length + Then: Length is 4 + """ + # Then + assert len(sample_registry) == 4 + + def test__given_registry__then_can_iterate_over_regions(self, sample_registry): + """Given: Registry with regions + When: Iterating + Then: All region codes are accessible + """ + # When + codes = [r.code for r in sample_registry] + + # Then + assert "us" in codes + assert "state/ca" in codes + assert "place/CA-44000" in codes + + def test__given_existing_code__then_code_is_in_registry(self, sample_registry): + """Given: Registry with state/ca + When: Checking if code exists + Then: Returns True for existing, False for missing + """ + # Then + assert "state/ca" in sample_registry + assert "state/tx" not in sample_registry + + def test__given_valid_code__then_get_returns_region(self, sample_registry): + """Given: Registry with state/ca + When: Getting by code + Then: Returns Region for existing, None for missing + """ + # When + ca = sample_registry.get("state/ca") + missing = sample_registry.get("state/tx") + + # Then + assert ca is not None + assert ca.label == "California" + assert missing is None + + def test__given_type__then_get_by_type_returns_matching_regions(self, sample_registry): + """Given: Registry with 2 states and 1 place + When: Getting by type + Then: Returns correct regions for each type + """ + # When + states = sample_registry.get_by_type("state") + places = sample_registry.get_by_type("place") + counties = sample_registry.get_by_type("county") + + # Then + assert len(states) == 2 + assert all(r.region_type == "state" for r in states) + assert len(places) == 1 + assert counties == [] + + def test__given_registry__then_get_national_returns_national_region(self, sample_registry): + """Given: Registry with national region + When: Getting national + Then: Returns the national region + """ + # When + national = sample_registry.get_national() + + # Then + assert national is not None + assert national.code == "us" + assert national.region_type == "national" + + def test__given_parent_code__then_get_children_returns_child_regions(self, sample_registry): + """Given: Registry with states under "us" + When: Getting children of "us" + Then: Returns state regions + """ + # When + us_children = sample_registry.get_children("us") + ca_children = sample_registry.get_children("state/ca") + + # Then + assert len(us_children) == 2 # CA and NY states + assert len(ca_children) == 1 # Los Angeles place + assert ca_children[0].code == "place/CA-44000" + + def test__given_registry__then_get_dataset_regions_returns_regions_with_datasets( + self, sample_registry + ): + """Given: Registry with 3 dataset regions (US, CA, NY) + When: Getting dataset regions + Then: Returns only regions with dataset_path and no filter + """ + # When + dataset_regions = sample_registry.get_dataset_regions() + + # Then + assert len(dataset_regions) == 3 # us, ca, ny + assert all(r.dataset_path is not None for r in dataset_regions) + assert all(not r.requires_filter for r in dataset_regions) + + def test__given_registry__then_get_filter_regions_returns_regions_requiring_filter( + self, sample_registry + ): + """Given: Registry with 1 filter region (Los Angeles) + When: Getting filter regions + Then: Returns only regions with requires_filter=True + """ + # When + filter_regions = sample_registry.get_filter_regions() + + # Then + assert len(filter_regions) == 1 + assert filter_regions[0].code == "place/CA-44000" + + def test__given_registry__then_can_add_region_dynamically(self, sample_registry): + """Given: Registry with 4 regions + When: Adding a new region + Then: Registry contains 5 regions and new region is indexed + """ + # Given + new_region = create_state_region("TX", "Texas") + + # When + sample_registry.add_region(new_region) + + # Then + assert len(sample_registry) == 5 + assert "state/tx" in sample_registry + assert sample_registry.get("state/tx").label == "Texas" + assert len(sample_registry.get_by_type("state")) == 3 + + def test__given_empty_registry__then_lookups_return_empty_results(self): + """Given: Empty registry + When: Performing lookups + Then: Returns empty results without errors + """ + # Given + registry = RegionRegistry(country_id="test", regions=[]) + + # Then + assert len(registry) == 0 + assert registry.get("anything") is None + assert registry.get_national() is None + assert registry.get_by_type("state") == [] diff --git a/tests/test_uk_regions.py b/tests/test_uk_regions.py new file mode 100644 index 00000000..b13026f0 --- /dev/null +++ b/tests/test_uk_regions.py @@ -0,0 +1,227 @@ +"""Tests for UK region definitions.""" + +from policyengine.countries.uk.regions import ( + UK_COUNTRIES, + UK_DATA_BUCKET, + build_uk_region_registry, + uk_region_registry, +) + + +class TestUKCountries: + """Tests for UK country definitions.""" + + def test__given_uk_countries__then_has_four_entries(self): + """Given: UK_COUNTRIES dictionary + When: Checking length + Then: Contains 4 countries + """ + # Then + assert len(UK_COUNTRIES) == 4 + + def test__given_uk_countries__then_all_countries_present(self): + """Given: UK_COUNTRIES dictionary + When: Checking for countries + Then: England, Scotland, Wales, NI are all present + """ + # Then + assert "england" in UK_COUNTRIES + assert "scotland" in UK_COUNTRIES + assert "wales" in UK_COUNTRIES + assert "northern_ireland" in UK_COUNTRIES + + def test__given_uk_countries__then_labels_capitalized(self): + """Given: UK_COUNTRIES dictionary + When: Checking labels + Then: Labels are properly capitalized + """ + # Then + assert UK_COUNTRIES["england"] == "England" + assert UK_COUNTRIES["scotland"] == "Scotland" + assert UK_COUNTRIES["wales"] == "Wales" + assert UK_COUNTRIES["northern_ireland"] == "Northern Ireland" + + +class TestUKRegionRegistry: + """Tests for the UK region registry.""" + + def test__given_uk_registry__then_country_id_is_uk(self): + """Given: UK region registry + When: Checking country_id + Then: Value is "uk" + """ + # Then + assert uk_region_registry.country_id == "uk" + + def test__given_uk_registry__then_has_national_region(self): + """Given: UK region registry + When: Getting national region + Then: Returns UK with correct dataset path + """ + # When + national = uk_region_registry.get_national() + + # Then + assert national is not None + assert national.code == "uk" + assert national.label == "United Kingdom" + assert national.region_type == "national" + assert national.dataset_path == f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5" + assert not national.requires_filter + + def test__given_uk_registry__then_has_four_country_regions(self): + """Given: UK region registry + When: Getting country regions + Then: Contains 4 countries + """ + # When + countries = uk_region_registry.get_by_type("country") + + # Then + assert len(countries) == 4 + + def test__given_england_region__then_filters_from_national(self): + """Given: England country region + When: Checking its properties + Then: Filters from national with country field + """ + # When + england = uk_region_registry.get("country/england") + + # Then + assert england is not None + assert england.label == "England" + assert england.region_type == "country" + assert england.parent_code == "uk" + assert england.requires_filter + assert england.filter_field == "country" + assert england.filter_value == "ENGLAND" + assert england.dataset_path is None + + def test__given_scotland_region__then_filters_from_national(self): + """Given: Scotland country region + When: Checking its properties + Then: Filters from national with correct value + """ + # When + scotland = uk_region_registry.get("country/scotland") + + # Then + assert scotland is not None + assert scotland.label == "Scotland" + assert scotland.requires_filter + assert scotland.filter_value == "SCOTLAND" + + def test__given_wales_region__then_filters_from_national(self): + """Given: Wales country region + When: Checking its properties + Then: Filters from national with correct value + """ + # When + wales = uk_region_registry.get("country/wales") + + # Then + assert wales is not None + assert wales.label == "Wales" + assert wales.requires_filter + assert wales.filter_value == "WALES" + + def test__given_northern_ireland_region__then_filters_from_national(self): + """Given: Northern Ireland country region + When: Checking its properties + Then: Filters from national with correct value + """ + # When + ni = uk_region_registry.get("country/northern_ireland") + + # Then + assert ni is not None + assert ni.label == "Northern Ireland" + assert ni.requires_filter + assert ni.filter_value == "NORTHERN_IRELAND" + + def test__given_uk_national__then_children_are_countries(self): + """Given: UK national region + When: Getting its children + Then: All children are country regions + """ + # When + uk_children = uk_region_registry.get_children("uk") + + # Then + assert len(uk_children) == 4 + assert all(c.region_type == "country" for c in uk_children) + + def test__given_uk_registry__then_only_national_has_dataset(self): + """Given: UK region registry + When: Getting dataset regions + Then: Only national has dedicated dataset + """ + # When + dataset_regions = uk_region_registry.get_dataset_regions() + + # Then + assert len(dataset_regions) == 1 + assert dataset_regions[0].code == "uk" + + def test__given_uk_registry__then_filter_regions_are_countries(self): + """Given: UK region registry + When: Getting filter regions + Then: All 4 countries require filter + """ + # When + filter_regions = uk_region_registry.get_filter_regions() + + # Then + assert len(filter_regions) == 4 + assert all(r.region_type == "country" for r in filter_regions) + + def test__given_default_registry__then_has_5_regions(self): + """Given: Default UK registry + When: Counting regions + Then: Contains 1 national + 4 countries = 5 + """ + # Then + assert len(uk_region_registry) == 5 + + +class TestUKRegionRegistryBuilder: + """Tests for UK registry builder with optional regions.""" + + def test__given_builder_without_optional_regions__then_returns_5_regions(self): + """Given: build_uk_region_registry with optional regions disabled + When: Building registry + Then: Returns 5 base regions only + """ + # When + registry = build_uk_region_registry( + include_constituencies=False, + include_local_authorities=False, + ) + + # Then + assert len(registry) == 5 # national + 4 countries + + def test__given_builder__then_accepts_include_constituencies_flag(self): + """Given: build_uk_region_registry + When: Passing include_constituencies=False + Then: Returns registry without constituencies + """ + # When + registry = build_uk_region_registry(include_constituencies=False) + + # Then + assert registry is not None + assert len(registry.get_by_type("constituency")) == 0 + + def test__given_builder__then_accepts_include_local_authorities_flag(self): + """Given: build_uk_region_registry + When: Passing include_local_authorities=False + Then: Returns registry without local authorities + """ + # When + registry = build_uk_region_registry(include_local_authorities=False) + + # Then + assert registry is not None + assert len(registry.get_by_type("local_authority")) == 0 diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py new file mode 100644 index 00000000..5f7d39b7 --- /dev/null +++ b/tests/test_us_regions.py @@ -0,0 +1,252 @@ +"""Tests for US region definitions.""" + +from policyengine.countries.us.data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES +from policyengine.countries.us.regions import US_DATA_BUCKET, us_region_registry + + +class TestUSStates: + """Tests for US state definitions.""" + + def test__given_us_states_dict__then_has_51_entries(self): + """Given: US_STATES dictionary + When: Checking length + Then: Contains 50 states + DC = 51 entries + """ + # Then + assert len(US_STATES) == 51 + + def test__given_us_states__then_includes_dc(self): + """Given: US_STATES dictionary + When: Looking for DC + Then: DC is present with full name + """ + # Then + assert "DC" in US_STATES + assert US_STATES["DC"] == "District of Columbia" + + def test__given_us_states__then_includes_major_states(self): + """Given: US_STATES dictionary + When: Checking for major states + Then: CA, TX, NY, FL are present + """ + # Then + assert "CA" in US_STATES + assert "TX" in US_STATES + assert "NY" in US_STATES + assert "FL" in US_STATES + + +class TestUSDistrictCounts: + """Tests for congressional district counts.""" + + def test__given_district_counts__then_every_state_has_count(self): + """Given: DISTRICT_COUNTS dictionary + When: Checking against US_STATES + Then: Every state has a district count + """ + # When/Then + for state in US_STATES: + assert state in DISTRICT_COUNTS, f"Missing district count for {state}" + + def test__given_district_counts__then_total_is_436(self): + """Given: DISTRICT_COUNTS dictionary + When: Summing all values + Then: Total is 435 voting + 1 DC delegate = 436 + """ + # When + total = sum(DISTRICT_COUNTS.values()) + + # Then + assert total == 436 + + def test__given_district_counts__then_dc_has_one(self): + """Given: DISTRICT_COUNTS for DC + When: Checking value + Then: DC has 1 at-large delegate + """ + # Then + assert DISTRICT_COUNTS["DC"] == 1 + + def test__given_district_counts__then_large_states_have_many(self): + """Given: DISTRICT_COUNTS for large states + When: Checking CA and TX + Then: CA >= 50, TX >= 35 (based on 2020 census) + """ + # Then + assert DISTRICT_COUNTS["CA"] >= 50 # CA has 52 + assert DISTRICT_COUNTS["TX"] >= 35 # TX has 38 + + +class TestUSRegionRegistry: + """Tests for the US region registry.""" + + def test__given_us_registry__then_country_id_is_us(self): + """Given: US region registry + When: Checking country_id + Then: Value is "us" + """ + # Then + assert us_region_registry.country_id == "us" + + def test__given_us_registry__then_has_national_region(self): + """Given: US region registry + When: Getting national region + Then: Returns US with correct dataset path + """ + # When + national = us_region_registry.get_national() + + # Then + assert national is not None + assert national.code == "us" + assert national.label == "United States" + assert national.region_type == "national" + assert national.dataset_path == f"{US_DATA_BUCKET}/enhanced_cps_2024.h5" + + def test__given_us_registry__then_has_51_states(self): + """Given: US region registry + When: Getting state regions + Then: Contains 51 states (including DC) + """ + # When + states = us_region_registry.get_by_type("state") + + # Then + assert len(states) == 51 + + def test__given_california_region__then_has_correct_format(self): + """Given: California state region + When: Checking its properties + Then: Has correct code, label, dataset path, and metadata + """ + # When + ca = us_region_registry.get("state/ca") + + # Then + assert ca is not None + assert ca.label == "California" + assert ca.region_type == "state" + assert ca.parent_code == "us" + assert ca.dataset_path == f"{US_DATA_BUCKET}/states/CA.h5" + assert ca.state_code == "CA" + assert ca.state_name == "California" + assert not ca.requires_filter + + def test__given_us_registry__then_has_436_congressional_districts(self): + """Given: US region registry + When: Getting congressional district regions + Then: Contains 436 districts + """ + # When + districts = us_region_registry.get_by_type("congressional_district") + + # Then + assert len(districts) == 436 + + def test__given_ca_first_district__then_has_correct_format(self): + """Given: California's 1st congressional district + When: Checking its properties + Then: Has correct code, label, and dataset path + """ + # When + ca01 = us_region_registry.get("congressional_district/CA-01") + + # Then + assert ca01 is not None + assert "California" in ca01.label + assert "1st" in ca01.label.lower() or "1 " in ca01.label + assert ca01.region_type == "congressional_district" + assert ca01.parent_code == "state/ca" + assert ca01.dataset_path == f"{US_DATA_BUCKET}/districts/CA-01.h5" + assert ca01.state_code == "CA" + assert not ca01.requires_filter + + def test__given_dc_district__then_is_at_large(self): + """Given: DC's congressional district + When: Checking its properties + Then: Is labeled as at-large + """ + # When + dc_al = us_region_registry.get("congressional_district/DC-01") + + # Then + assert dc_al is not None + assert dc_al.label == "District of Columbia's at-large congressional district" + assert dc_al.parent_code == "state/dc" + + def test__given_us_registry__then_has_places(self): + """Given: US region registry + When: Getting place regions + Then: Contains 100+ large cities + """ + # When + places = us_region_registry.get_by_type("place") + + # Then + assert len(places) >= 100 + + def test__given_los_angeles_region__then_has_correct_format(self): + """Given: Los Angeles place region + When: Checking its properties + Then: Requires filter with place_fips field + """ + # When + la = us_region_registry.get("place/CA-44000") + + # Then + assert la is not None + assert "Los Angeles" in la.label + assert la.region_type == "place" + assert la.parent_code == "state/ca" + assert la.requires_filter + assert la.filter_field == "place_fips" + assert la.filter_value == "44000" + assert la.state_code == "CA" + assert la.dataset_path is None # No dedicated dataset + + def test__given_california__then_children_include_districts_and_places(self): + """Given: California state region + When: Getting its children + Then: Includes all 52 districts and 10+ places + """ + # When + ca_children = us_region_registry.get_children("state/ca") + district_children = [c for c in ca_children if c.region_type == "congressional_district"] + place_children = [c for c in ca_children if c.region_type == "place"] + + # Then + assert len(district_children) == DISTRICT_COUNTS["CA"] + assert len(place_children) >= 10 # CA has many large cities + + def test__given_us_registry__then_dataset_regions_is_488(self): + """Given: US region registry + When: Getting regions with datasets + Then: Returns 1 national + 51 states + 436 districts = 488 + """ + # When + dataset_regions = us_region_registry.get_dataset_regions() + + # Then + assert len(dataset_regions) == 488 + + def test__given_us_registry__then_filter_regions_are_all_places(self): + """Given: US region registry + When: Getting regions requiring filter + Then: All are place regions + """ + # When + filter_regions = us_region_registry.get_filter_regions() + + # Then + assert all(r.region_type == "place" for r in filter_regions) + + def test__given_us_registry__then_total_exceeds_588(self): + """Given: US region registry + When: Counting all regions + Then: Total is at least 488 (dataset) + 100 (places) + """ + # When + total = len(us_region_registry) + + # Then + assert total >= 488 + 100 diff --git a/uv.lock b/uv.lock index 55fe0b9c..8cf942d2 100644 --- a/uv.lock +++ b/uv.lock @@ -775,15 +775,15 @@ wheels = [ [[package]] name = "microdf-python" -version = "1.0.2" +version = "1.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, { name = "pandas" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/25/55c2b0495ae4c3142d61f1283d675494aac4c254e40ecf1ea4b337a051c7/microdf_python-1.0.2.tar.gz", hash = "sha256:5c845974d485598a7002c151f58ec7438e94c04954fc8fdea9238265e7bf02f5", size = 14826, upload-time = "2025-07-24T12:21:08.17Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/96/6f9f37f79f2c6440d91036a7bf8111dd4b983c577a7e96d45bf3ca4171f3/microdf_python-1.2.1.tar.gz", hash = "sha256:d4f58e4e0c21decd0c6d425b115db8acc72751c558f48d2a1c3a6619f168a94a", size = 19641, upload-time = "2026-01-25T13:40:57.147Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/1a/aac40a7e58de4133a9cc7630913a8b8e6c76326288b168cbb47f7714c4fd/microdf_python-1.0.2-py3-none-any.whl", hash = "sha256:f7883785e4557d1c8822dbf0d69d7eeab9399f8e67a9bdb716f74554c7580ae7", size = 15823, upload-time = "2025-07-24T12:21:07.356Z" }, + { url = "https://files.pythonhosted.org/packages/cd/2e/375ab71f8d91b691597247b186a4d7b156d2ed975dfb00450e560beae747/microdf_python-1.2.1-py3-none-any.whl", hash = "sha256:3c3d318a82cba7db0ef5a72e8a73a6072fe0bc7a9cb59b1eac01a26ee8c82e7c", size = 20879, upload-time = "2026-01-25T13:40:55.877Z" }, ] [[package]] @@ -1080,7 +1080,7 @@ wheels = [ [[package]] name = "policyengine" -version = "3.1.14" +version = "3.1.16" source = { editable = "." } dependencies = [ { name = "microdf-python" }, @@ -1124,12 +1124,12 @@ requires-dist = [ { name = "furo", marker = "extra == 'dev'" }, { name = "itables", marker = "extra == 'dev'" }, { name = "jupyter-book", marker = "extra == 'dev'" }, - { name = "microdf-python" }, + { name = "microdf-python", specifier = ">=1.2.1" }, { name = "pandas", specifier = ">=2.0.0" }, { name = "plotly", specifier = ">=5.0.0" }, - { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.10" }, - { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.10" }, - { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.10" }, + { name = "policyengine-core", marker = "extra == 'dev'", specifier = ">=3.23.6" }, + { name = "policyengine-core", marker = "extra == 'uk'", specifier = ">=3.23.6" }, + { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.23.6" }, { name = "policyengine-uk", marker = "extra == 'dev'", specifier = ">=2.51.0" }, { name = "policyengine-uk", marker = "extra == 'uk'", specifier = ">=2.51.0" }, { name = "policyengine-us", marker = "extra == 'dev'", specifier = ">=1.213.1" }, @@ -1146,7 +1146,7 @@ provides-extras = ["uk", "us", "dev"] [[package]] name = "policyengine-core" -version = "3.20.0" +version = "3.23.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dpath" }, @@ -1166,9 +1166,9 @@ dependencies = [ { name = "standard-imghdr" }, { name = "wheel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d3/d7/cd4ae165221b3d5630a5c95e6df0a10be06d461b6545552a5f4a11c40907/policyengine_core-3.20.0.tar.gz", hash = "sha256:10c428467c8629861986f356f7f13ff8bf23ec907961779cf9f6add63f147fdf", size = 159655, upload-time = "2025-08-12T15:54:35.437Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5d/de/5bc5b02626703ea7d288c84c474ec51e823aa726d55ebabafe7c85e7285f/policyengine_core-3.23.6.tar.gz", hash = "sha256:81bb4057f5d6380f2d7f1af2fe4932bd3bd37fdfda7b841f7ee38b30aa5cc8e6", size = 163499, upload-time = "2026-01-25T14:04:43.233Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/01/47/9cd4a2cfd675c5267dc905b2f23313b37df734f94f65490ca083422db39f/policyengine_core-3.20.0-py3-none-any.whl", hash = "sha256:c802edf10059242f7c03b54f7e8c78703ae053defcfe4ec75c677ed0714c07a6", size = 220871, upload-time = "2025-08-12T15:54:33.799Z" }, + { url = "https://files.pythonhosted.org/packages/82/7a/b47b239fb0a85a36b36b47e7665db981800fcac3384aeec6dadf92a9e548/policyengine_core-3.23.6-py3-none-any.whl", hash = "sha256:f0834107335de6f2452d39e53db7a72a57088ed26d3703a4c4eaded55a4e7bce", size = 225309, upload-time = "2026-01-25T14:04:41.844Z" }, ] [[package]] From 0c7e9eab7d2ab0205b0f1ee5f519c0b76a5194e0 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 10 Feb 2026 17:00:40 +0100 Subject: [PATCH 2/8] style: Fix linting and formatting issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused imports - Sort imports correctly - Format code with ruff 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/policyengine/core/region.py | 19 +- src/policyengine/countries/uk/regions.py | 13 +- src/policyengine/countries/us/data/places.py | 2061 +++++++++++++++--- src/policyengine/utils/parameter_labels.py | 11 +- tests/fixtures/region_fixtures.py | 8 +- tests/test_pandas3_compatibility.py | 1 + tests/test_parameter_labels.py | 50 +- tests/test_region.py | 62 +- tests/test_uk_regions.py | 9 +- tests/test_us_regions.py | 28 +- 10 files changed, 1913 insertions(+), 349 deletions(-) diff --git a/src/policyengine/core/region.py b/src/policyengine/core/region.py index 3208b35e..36cbc71e 100644 --- a/src/policyengine/core/region.py +++ b/src/policyengine/core/region.py @@ -12,7 +12,9 @@ # Region type literals for US and UK USRegionType = Literal["national", "state", "congressional_district", "place"] -UKRegionType = Literal["national", "country", "constituency", "local_authority"] +UKRegionType = Literal[ + "national", "country", "constituency", "local_authority" +] RegionType = USRegionType | UKRegionType @@ -38,7 +40,9 @@ class Region(BaseModel): ..., description="Unique region code with type prefix (e.g., 'state/ca', 'place/NJ-57000')", ) - label: str = Field(..., description="Human-readable label (e.g., 'California')") + label: str = Field( + ..., description="Human-readable label (e.g., 'California')" + ) region_type: RegionType = Field( ..., description="Type of region (e.g., 'state', 'place')" ) @@ -74,7 +78,8 @@ class Region(BaseModel): default=None, description="Two-letter state code (e.g., 'CA', 'NJ')" ) state_name: str | None = Field( - default=None, description="Full state name (e.g., 'California', 'New Jersey')" + default=None, + description="Full state name (e.g., 'California', 'New Jersey')", ) def __hash__(self) -> int: @@ -95,7 +100,9 @@ class RegionRegistry(BaseModel): Indices are rebuilt automatically after initialization. """ - country_id: str = Field(..., description="Country identifier (e.g., 'us', 'uk')") + country_id: str = Field( + ..., description="Country identifier (e.g., 'us', 'uk')" + ) regions: list[Region] = Field(default_factory=list) # Private indexed lookups (excluded from serialization) @@ -177,7 +184,9 @@ def get_dataset_regions(self) -> list[Region]: List of regions with dataset_path set and requires_filter False """ return [ - r for r in self.regions if r.dataset_path is not None and not r.requires_filter + r + for r in self.regions + if r.dataset_path is not None and not r.requires_filter ] def get_filter_regions(self) -> list[Region]: diff --git a/src/policyengine/countries/uk/regions.py b/src/policyengine/countries/uk/regions.py index 5e551755..340a29e8 100644 --- a/src/policyengine/countries/uk/regions.py +++ b/src/policyengine/countries/uk/regions.py @@ -11,13 +11,12 @@ from H5 files stored in GCS. """ -from pathlib import Path from typing import TYPE_CHECKING from policyengine.core.region import Region, RegionRegistry if TYPE_CHECKING: - import pandas as pd + pass UK_DATA_BUCKET = "gs://policyengine-uk-data-private" @@ -53,7 +52,10 @@ def _load_constituencies_from_csv() -> list[dict]: import pandas as pd df = pd.read_csv(csv_path) - return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()] + return [ + {"code": row["code"], "name": row["name"]} + for _, row in df.iterrows() + ] except Exception: # If download fails, return empty list return [] @@ -82,7 +84,10 @@ def _load_local_authorities_from_csv() -> list[dict]: import pandas as pd df = pd.read_csv(csv_path) - return [{"code": row["code"], "name": row["name"]} for _, row in df.iterrows()] + return [ + {"code": row["code"], "name": row["name"]} + for _, row in df.iterrows() + ] except Exception: # If download fails, return empty list return [] diff --git a/src/policyengine/countries/us/data/places.py b/src/policyengine/countries/us/data/places.py index f5367eca..a5fe632f 100644 --- a/src/policyengine/countries/us/data/places.py +++ b/src/policyengine/countries/us/data/places.py @@ -8,339 +8,1808 @@ # These filter from their parent state's dataset using place_fips # Total: 333 places US_PLACES: list[dict[str, str]] = [ - {"fips": "03000", "name": "Anchorage", "state": "AK", "state_name": "Alaska"}, - {"fips": "07000", "name": "Birmingham", "state": "AL", "state_name": "Alabama"}, - {"fips": "37000", "name": "Huntsville", "state": "AL", "state_name": "Alabama"}, - {"fips": "50000", "name": "Mobile", "state": "AL", "state_name": "Alabama"}, - {"fips": "51000", "name": "Montgomery", "state": "AL", "state_name": "Alabama"}, - {"fips": "77256", "name": "Tuscaloosa", "state": "AL", "state_name": "Alabama"}, - {"fips": "23290", "name": "Fayetteville", "state": "AR", "state_name": "Arkansas"}, - {"fips": "41000", "name": "Little Rock", "state": "AR", "state_name": "Arkansas"}, - {"fips": "07940", "name": "Buckeye", "state": "AZ", "state_name": "Arizona"}, - {"fips": "12000", "name": "Chandler", "state": "AZ", "state_name": "Arizona"}, - {"fips": "27400", "name": "Gilbert", "state": "AZ", "state_name": "Arizona"}, - {"fips": "27820", "name": "Glendale", "state": "AZ", "state_name": "Arizona"}, - {"fips": "28380", "name": "Goodyear", "state": "AZ", "state_name": "Arizona"}, + { + "fips": "03000", + "name": "Anchorage", + "state": "AK", + "state_name": "Alaska", + }, + { + "fips": "07000", + "name": "Birmingham", + "state": "AL", + "state_name": "Alabama", + }, + { + "fips": "37000", + "name": "Huntsville", + "state": "AL", + "state_name": "Alabama", + }, + { + "fips": "50000", + "name": "Mobile", + "state": "AL", + "state_name": "Alabama", + }, + { + "fips": "51000", + "name": "Montgomery", + "state": "AL", + "state_name": "Alabama", + }, + { + "fips": "77256", + "name": "Tuscaloosa", + "state": "AL", + "state_name": "Alabama", + }, + { + "fips": "23290", + "name": "Fayetteville", + "state": "AR", + "state_name": "Arkansas", + }, + { + "fips": "41000", + "name": "Little Rock", + "state": "AR", + "state_name": "Arkansas", + }, + { + "fips": "07940", + "name": "Buckeye", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "12000", + "name": "Chandler", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "27400", + "name": "Gilbert", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "27820", + "name": "Glendale", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "28380", + "name": "Goodyear", + "state": "AZ", + "state_name": "Arizona", + }, {"fips": "46000", "name": "Mesa", "state": "AZ", "state_name": "Arizona"}, - {"fips": "54050", "name": "Peoria", "state": "AZ", "state_name": "Arizona"}, - {"fips": "55000", "name": "Phoenix", "state": "AZ", "state_name": "Arizona"}, - {"fips": "65000", "name": "Scottsdale", "state": "AZ", "state_name": "Arizona"}, - {"fips": "71510", "name": "Surprise", "state": "AZ", "state_name": "Arizona"}, + { + "fips": "54050", + "name": "Peoria", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "55000", + "name": "Phoenix", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "65000", + "name": "Scottsdale", + "state": "AZ", + "state_name": "Arizona", + }, + { + "fips": "71510", + "name": "Surprise", + "state": "AZ", + "state_name": "Arizona", + }, {"fips": "73000", "name": "Tempe", "state": "AZ", "state_name": "Arizona"}, - {"fips": "77000", "name": "Tucson", "state": "AZ", "state_name": "Arizona"}, + { + "fips": "77000", + "name": "Tucson", + "state": "AZ", + "state_name": "Arizona", + }, {"fips": "85540", "name": "Yuma", "state": "AZ", "state_name": "Arizona"}, - {"fips": "02000", "name": "Anaheim", "state": "CA", "state_name": "California"}, - {"fips": "02252", "name": "Antioch", "state": "CA", "state_name": "California"}, - {"fips": "03526", "name": "Bakersfield", "state": "CA", "state_name": "California"}, - {"fips": "06000", "name": "Berkeley", "state": "CA", "state_name": "California"}, - {"fips": "08954", "name": "Burbank", "state": "CA", "state_name": "California"}, - {"fips": "11194", "name": "Carlsbad", "state": "CA", "state_name": "California"}, - {"fips": "13014", "name": "Chico", "state": "CA", "state_name": "California"}, - {"fips": "13392", "name": "Chula Vista", "state": "CA", "state_name": "California"}, - {"fips": "14218", "name": "Clovis", "state": "CA", "state_name": "California"}, - {"fips": "16000", "name": "Concord", "state": "CA", "state_name": "California"}, - {"fips": "16350", "name": "Corona", "state": "CA", "state_name": "California"}, - {"fips": "16532", "name": "Costa Mesa", "state": "CA", "state_name": "California"}, - {"fips": "19766", "name": "Downey", "state": "CA", "state_name": "California"}, - {"fips": "21712", "name": "El Cajon", "state": "CA", "state_name": "California"}, - {"fips": "22230", "name": "El Monte", "state": "CA", "state_name": "California"}, - {"fips": "22020", "name": "Elk Grove", "state": "CA", "state_name": "California"}, - {"fips": "22804", "name": "Escondido", "state": "CA", "state_name": "California"}, - {"fips": "23182", "name": "Fairfield", "state": "CA", "state_name": "California"}, - {"fips": "24680", "name": "Fontana", "state": "CA", "state_name": "California"}, - {"fips": "26000", "name": "Fremont", "state": "CA", "state_name": "California"}, - {"fips": "27000", "name": "Fresno", "state": "CA", "state_name": "California"}, - {"fips": "28000", "name": "Fullerton", "state": "CA", "state_name": "California"}, - {"fips": "29000", "name": "Garden Grove", "state": "CA", "state_name": "California"}, - {"fips": "30000", "name": "Glendale", "state": "CA", "state_name": "California"}, - {"fips": "33000", "name": "Hayward", "state": "CA", "state_name": "California"}, - {"fips": "33434", "name": "Hesperia", "state": "CA", "state_name": "California"}, - {"fips": "36000", "name": "Huntington Beach", "state": "CA", "state_name": "California"}, - {"fips": "36546", "name": "Inglewood", "state": "CA", "state_name": "California"}, - {"fips": "36770", "name": "Irvine", "state": "CA", "state_name": "California"}, - {"fips": "37692", "name": "Jurupa Valley", "state": "CA", "state_name": "California"}, - {"fips": "40130", "name": "Lancaster", "state": "CA", "state_name": "California"}, - {"fips": "43000", "name": "Long Beach", "state": "CA", "state_name": "California"}, - {"fips": "44000", "name": "Los Angeles", "state": "CA", "state_name": "California"}, - {"fips": "46842", "name": "Menifee", "state": "CA", "state_name": "California"}, - {"fips": "48354", "name": "Modesto", "state": "CA", "state_name": "California"}, - {"fips": "49270", "name": "Moreno Valley", "state": "CA", "state_name": "California"}, - {"fips": "50076", "name": "Murrieta", "state": "CA", "state_name": "California"}, - {"fips": "53000", "name": "Oakland", "state": "CA", "state_name": "California"}, - {"fips": "53322", "name": "Oceanside", "state": "CA", "state_name": "California"}, - {"fips": "53896", "name": "Ontario", "state": "CA", "state_name": "California"}, - {"fips": "53980", "name": "Orange", "state": "CA", "state_name": "California"}, - {"fips": "54652", "name": "Oxnard", "state": "CA", "state_name": "California"}, - {"fips": "55156", "name": "Palmdale", "state": "CA", "state_name": "California"}, - {"fips": "56000", "name": "Pasadena", "state": "CA", "state_name": "California"}, - {"fips": "58072", "name": "Pomona", "state": "CA", "state_name": "California"}, - {"fips": "59451", "name": "Rancho Cucamonga", "state": "CA", "state_name": "California"}, - {"fips": "60466", "name": "Rialto", "state": "CA", "state_name": "California"}, - {"fips": "60620", "name": "Richmond", "state": "CA", "state_name": "California"}, - {"fips": "62000", "name": "Riverside", "state": "CA", "state_name": "California"}, - {"fips": "62938", "name": "Roseville", "state": "CA", "state_name": "California"}, - {"fips": "64000", "name": "Sacramento", "state": "CA", "state_name": "California"}, - {"fips": "64224", "name": "Salinas", "state": "CA", "state_name": "California"}, - {"fips": "65000", "name": "San Bernardino", "state": "CA", "state_name": "California"}, - {"fips": "66000", "name": "San Diego", "state": "CA", "state_name": "California"}, - {"fips": "67000", "name": "San Francisco", "state": "CA", "state_name": "California"}, - {"fips": "68000", "name": "San Jose", "state": "CA", "state_name": "California"}, - {"fips": "68252", "name": "San Mateo", "state": "CA", "state_name": "California"}, - {"fips": "69000", "name": "Santa Ana", "state": "CA", "state_name": "California"}, - {"fips": "69084", "name": "Santa Clara", "state": "CA", "state_name": "California"}, - {"fips": "69088", "name": "Santa Clarita", "state": "CA", "state_name": "California"}, - {"fips": "69196", "name": "Santa Maria", "state": "CA", "state_name": "California"}, - {"fips": "70098", "name": "Santa Rosa", "state": "CA", "state_name": "California"}, - {"fips": "72016", "name": "Simi Valley", "state": "CA", "state_name": "California"}, - {"fips": "75000", "name": "Stockton", "state": "CA", "state_name": "California"}, - {"fips": "77000", "name": "Sunnyvale", "state": "CA", "state_name": "California"}, - {"fips": "78120", "name": "Temecula", "state": "CA", "state_name": "California"}, - {"fips": "78582", "name": "Thousand Oaks", "state": "CA", "state_name": "California"}, - {"fips": "80000", "name": "Torrance", "state": "CA", "state_name": "California"}, - {"fips": "81554", "name": "Vacaville", "state": "CA", "state_name": "California"}, - {"fips": "81666", "name": "Vallejo", "state": "CA", "state_name": "California"}, - {"fips": "65042", "name": "Ventura", "state": "CA", "state_name": "California"}, - {"fips": "82590", "name": "Victorville", "state": "CA", "state_name": "California"}, - {"fips": "82954", "name": "Visalia", "state": "CA", "state_name": "California"}, - {"fips": "84200", "name": "West Covina", "state": "CA", "state_name": "California"}, - {"fips": "03455", "name": "Arvada", "state": "CO", "state_name": "Colorado"}, - {"fips": "04000", "name": "Aurora", "state": "CO", "state_name": "Colorado"}, - {"fips": "07850", "name": "Boulder", "state": "CO", "state_name": "Colorado"}, - {"fips": "12815", "name": "Centennial", "state": "CO", "state_name": "Colorado"}, - {"fips": "16000", "name": "Colorado Springs", "state": "CO", "state_name": "Colorado"}, - {"fips": "20000", "name": "Denver", "state": "CO", "state_name": "Colorado"}, - {"fips": "27425", "name": "Fort Collins", "state": "CO", "state_name": "Colorado"}, - {"fips": "32155", "name": "Greeley", "state": "CO", "state_name": "Colorado"}, - {"fips": "43000", "name": "Lakewood", "state": "CO", "state_name": "Colorado"}, - {"fips": "62000", "name": "Pueblo", "state": "CO", "state_name": "Colorado"}, - {"fips": "77290", "name": "Thornton", "state": "CO", "state_name": "Colorado"}, - {"fips": "83835", "name": "Westminster", "state": "CO", "state_name": "Colorado"}, - {"fips": "08000", "name": "Bridgeport", "state": "CT", "state_name": "Connecticut"}, - {"fips": "37000", "name": "Hartford", "state": "CT", "state_name": "Connecticut"}, - {"fips": "52000", "name": "New Haven", "state": "CT", "state_name": "Connecticut"}, - {"fips": "73000", "name": "Stamford", "state": "CT", "state_name": "Connecticut"}, - {"fips": "80000", "name": "Waterbury", "state": "CT", "state_name": "Connecticut"}, - {"fips": "50000", "name": "Washington", "state": "DC", "state_name": "District of Columbia"}, - {"fips": "10275", "name": "Cape Coral", "state": "FL", "state_name": "Florida"}, - {"fips": "12875", "name": "Clearwater", "state": "FL", "state_name": "Florida"}, - {"fips": "14400", "name": "Coral Springs", "state": "FL", "state_name": "Florida"}, + { + "fips": "02000", + "name": "Anaheim", + "state": "CA", + "state_name": "California", + }, + { + "fips": "02252", + "name": "Antioch", + "state": "CA", + "state_name": "California", + }, + { + "fips": "03526", + "name": "Bakersfield", + "state": "CA", + "state_name": "California", + }, + { + "fips": "06000", + "name": "Berkeley", + "state": "CA", + "state_name": "California", + }, + { + "fips": "08954", + "name": "Burbank", + "state": "CA", + "state_name": "California", + }, + { + "fips": "11194", + "name": "Carlsbad", + "state": "CA", + "state_name": "California", + }, + { + "fips": "13014", + "name": "Chico", + "state": "CA", + "state_name": "California", + }, + { + "fips": "13392", + "name": "Chula Vista", + "state": "CA", + "state_name": "California", + }, + { + "fips": "14218", + "name": "Clovis", + "state": "CA", + "state_name": "California", + }, + { + "fips": "16000", + "name": "Concord", + "state": "CA", + "state_name": "California", + }, + { + "fips": "16350", + "name": "Corona", + "state": "CA", + "state_name": "California", + }, + { + "fips": "16532", + "name": "Costa Mesa", + "state": "CA", + "state_name": "California", + }, + { + "fips": "19766", + "name": "Downey", + "state": "CA", + "state_name": "California", + }, + { + "fips": "21712", + "name": "El Cajon", + "state": "CA", + "state_name": "California", + }, + { + "fips": "22230", + "name": "El Monte", + "state": "CA", + "state_name": "California", + }, + { + "fips": "22020", + "name": "Elk Grove", + "state": "CA", + "state_name": "California", + }, + { + "fips": "22804", + "name": "Escondido", + "state": "CA", + "state_name": "California", + }, + { + "fips": "23182", + "name": "Fairfield", + "state": "CA", + "state_name": "California", + }, + { + "fips": "24680", + "name": "Fontana", + "state": "CA", + "state_name": "California", + }, + { + "fips": "26000", + "name": "Fremont", + "state": "CA", + "state_name": "California", + }, + { + "fips": "27000", + "name": "Fresno", + "state": "CA", + "state_name": "California", + }, + { + "fips": "28000", + "name": "Fullerton", + "state": "CA", + "state_name": "California", + }, + { + "fips": "29000", + "name": "Garden Grove", + "state": "CA", + "state_name": "California", + }, + { + "fips": "30000", + "name": "Glendale", + "state": "CA", + "state_name": "California", + }, + { + "fips": "33000", + "name": "Hayward", + "state": "CA", + "state_name": "California", + }, + { + "fips": "33434", + "name": "Hesperia", + "state": "CA", + "state_name": "California", + }, + { + "fips": "36000", + "name": "Huntington Beach", + "state": "CA", + "state_name": "California", + }, + { + "fips": "36546", + "name": "Inglewood", + "state": "CA", + "state_name": "California", + }, + { + "fips": "36770", + "name": "Irvine", + "state": "CA", + "state_name": "California", + }, + { + "fips": "37692", + "name": "Jurupa Valley", + "state": "CA", + "state_name": "California", + }, + { + "fips": "40130", + "name": "Lancaster", + "state": "CA", + "state_name": "California", + }, + { + "fips": "43000", + "name": "Long Beach", + "state": "CA", + "state_name": "California", + }, + { + "fips": "44000", + "name": "Los Angeles", + "state": "CA", + "state_name": "California", + }, + { + "fips": "46842", + "name": "Menifee", + "state": "CA", + "state_name": "California", + }, + { + "fips": "48354", + "name": "Modesto", + "state": "CA", + "state_name": "California", + }, + { + "fips": "49270", + "name": "Moreno Valley", + "state": "CA", + "state_name": "California", + }, + { + "fips": "50076", + "name": "Murrieta", + "state": "CA", + "state_name": "California", + }, + { + "fips": "53000", + "name": "Oakland", + "state": "CA", + "state_name": "California", + }, + { + "fips": "53322", + "name": "Oceanside", + "state": "CA", + "state_name": "California", + }, + { + "fips": "53896", + "name": "Ontario", + "state": "CA", + "state_name": "California", + }, + { + "fips": "53980", + "name": "Orange", + "state": "CA", + "state_name": "California", + }, + { + "fips": "54652", + "name": "Oxnard", + "state": "CA", + "state_name": "California", + }, + { + "fips": "55156", + "name": "Palmdale", + "state": "CA", + "state_name": "California", + }, + { + "fips": "56000", + "name": "Pasadena", + "state": "CA", + "state_name": "California", + }, + { + "fips": "58072", + "name": "Pomona", + "state": "CA", + "state_name": "California", + }, + { + "fips": "59451", + "name": "Rancho Cucamonga", + "state": "CA", + "state_name": "California", + }, + { + "fips": "60466", + "name": "Rialto", + "state": "CA", + "state_name": "California", + }, + { + "fips": "60620", + "name": "Richmond", + "state": "CA", + "state_name": "California", + }, + { + "fips": "62000", + "name": "Riverside", + "state": "CA", + "state_name": "California", + }, + { + "fips": "62938", + "name": "Roseville", + "state": "CA", + "state_name": "California", + }, + { + "fips": "64000", + "name": "Sacramento", + "state": "CA", + "state_name": "California", + }, + { + "fips": "64224", + "name": "Salinas", + "state": "CA", + "state_name": "California", + }, + { + "fips": "65000", + "name": "San Bernardino", + "state": "CA", + "state_name": "California", + }, + { + "fips": "66000", + "name": "San Diego", + "state": "CA", + "state_name": "California", + }, + { + "fips": "67000", + "name": "San Francisco", + "state": "CA", + "state_name": "California", + }, + { + "fips": "68000", + "name": "San Jose", + "state": "CA", + "state_name": "California", + }, + { + "fips": "68252", + "name": "San Mateo", + "state": "CA", + "state_name": "California", + }, + { + "fips": "69000", + "name": "Santa Ana", + "state": "CA", + "state_name": "California", + }, + { + "fips": "69084", + "name": "Santa Clara", + "state": "CA", + "state_name": "California", + }, + { + "fips": "69088", + "name": "Santa Clarita", + "state": "CA", + "state_name": "California", + }, + { + "fips": "69196", + "name": "Santa Maria", + "state": "CA", + "state_name": "California", + }, + { + "fips": "70098", + "name": "Santa Rosa", + "state": "CA", + "state_name": "California", + }, + { + "fips": "72016", + "name": "Simi Valley", + "state": "CA", + "state_name": "California", + }, + { + "fips": "75000", + "name": "Stockton", + "state": "CA", + "state_name": "California", + }, + { + "fips": "77000", + "name": "Sunnyvale", + "state": "CA", + "state_name": "California", + }, + { + "fips": "78120", + "name": "Temecula", + "state": "CA", + "state_name": "California", + }, + { + "fips": "78582", + "name": "Thousand Oaks", + "state": "CA", + "state_name": "California", + }, + { + "fips": "80000", + "name": "Torrance", + "state": "CA", + "state_name": "California", + }, + { + "fips": "81554", + "name": "Vacaville", + "state": "CA", + "state_name": "California", + }, + { + "fips": "81666", + "name": "Vallejo", + "state": "CA", + "state_name": "California", + }, + { + "fips": "65042", + "name": "Ventura", + "state": "CA", + "state_name": "California", + }, + { + "fips": "82590", + "name": "Victorville", + "state": "CA", + "state_name": "California", + }, + { + "fips": "82954", + "name": "Visalia", + "state": "CA", + "state_name": "California", + }, + { + "fips": "84200", + "name": "West Covina", + "state": "CA", + "state_name": "California", + }, + { + "fips": "03455", + "name": "Arvada", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "04000", + "name": "Aurora", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "07850", + "name": "Boulder", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "12815", + "name": "Centennial", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "16000", + "name": "Colorado Springs", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "20000", + "name": "Denver", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "27425", + "name": "Fort Collins", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "32155", + "name": "Greeley", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "43000", + "name": "Lakewood", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "62000", + "name": "Pueblo", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "77290", + "name": "Thornton", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "83835", + "name": "Westminster", + "state": "CO", + "state_name": "Colorado", + }, + { + "fips": "08000", + "name": "Bridgeport", + "state": "CT", + "state_name": "Connecticut", + }, + { + "fips": "37000", + "name": "Hartford", + "state": "CT", + "state_name": "Connecticut", + }, + { + "fips": "52000", + "name": "New Haven", + "state": "CT", + "state_name": "Connecticut", + }, + { + "fips": "73000", + "name": "Stamford", + "state": "CT", + "state_name": "Connecticut", + }, + { + "fips": "80000", + "name": "Waterbury", + "state": "CT", + "state_name": "Connecticut", + }, + { + "fips": "50000", + "name": "Washington", + "state": "DC", + "state_name": "District of Columbia", + }, + { + "fips": "10275", + "name": "Cape Coral", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "12875", + "name": "Clearwater", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "14400", + "name": "Coral Springs", + "state": "FL", + "state_name": "Florida", + }, {"fips": "16475", "name": "Davie", "state": "FL", "state_name": "Florida"}, - {"fips": "24000", "name": "Fort Lauderdale", "state": "FL", "state_name": "Florida"}, - {"fips": "25175", "name": "Gainesville", "state": "FL", "state_name": "Florida"}, - {"fips": "30000", "name": "Hialeah", "state": "FL", "state_name": "Florida"}, - {"fips": "32000", "name": "Hollywood", "state": "FL", "state_name": "Florida"}, - {"fips": "35000", "name": "Jacksonville", "state": "FL", "state_name": "Florida"}, - {"fips": "38250", "name": "Lakeland", "state": "FL", "state_name": "Florida"}, - {"fips": "45060", "name": "Miami Gardens", "state": "FL", "state_name": "Florida"}, + { + "fips": "24000", + "name": "Fort Lauderdale", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "25175", + "name": "Gainesville", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "30000", + "name": "Hialeah", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "32000", + "name": "Hollywood", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "35000", + "name": "Jacksonville", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "38250", + "name": "Lakeland", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "45060", + "name": "Miami Gardens", + "state": "FL", + "state_name": "Florida", + }, {"fips": "45000", "name": "Miami", "state": "FL", "state_name": "Florida"}, - {"fips": "45975", "name": "Miramar", "state": "FL", "state_name": "Florida"}, - {"fips": "53000", "name": "Orlando", "state": "FL", "state_name": "Florida"}, - {"fips": "54000", "name": "Palm Bay", "state": "FL", "state_name": "Florida"}, - {"fips": "54200", "name": "Palm Coast", "state": "FL", "state_name": "Florida"}, - {"fips": "55775", "name": "Pembroke Pines", "state": "FL", "state_name": "Florida"}, - {"fips": "58050", "name": "Pompano Beach", "state": "FL", "state_name": "Florida"}, - {"fips": "58715", "name": "Port St. Lucie", "state": "FL", "state_name": "Florida"}, - {"fips": "63000", "name": "St. Petersburg", "state": "FL", "state_name": "Florida"}, - {"fips": "70600", "name": "Tallahassee", "state": "FL", "state_name": "Florida"}, + { + "fips": "45975", + "name": "Miramar", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "53000", + "name": "Orlando", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "54000", + "name": "Palm Bay", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "54200", + "name": "Palm Coast", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "55775", + "name": "Pembroke Pines", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "58050", + "name": "Pompano Beach", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "58715", + "name": "Port St. Lucie", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "63000", + "name": "St. Petersburg", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "70600", + "name": "Tallahassee", + "state": "FL", + "state_name": "Florida", + }, {"fips": "71000", "name": "Tampa", "state": "FL", "state_name": "Florida"}, - {"fips": "76600", "name": "West Palm Beach", "state": "FL", "state_name": "Florida"}, - {"fips": "03440", "name": "Athens-Clarke County", "state": "GA", "state_name": "Georgia"}, - {"fips": "04000", "name": "Atlanta", "state": "GA", "state_name": "Georgia"}, - {"fips": "04204", "name": "Augusta-Richmond County", "state": "GA", "state_name": "Georgia"}, - {"fips": "19000", "name": "Columbus", "state": "GA", "state_name": "Georgia"}, - {"fips": "49008", "name": "Macon-Bibb County", "state": "GA", "state_name": "Georgia"}, - {"fips": "68516", "name": "Sandy Springs", "state": "GA", "state_name": "Georgia"}, - {"fips": "69000", "name": "Savannah", "state": "GA", "state_name": "Georgia"}, - {"fips": "72122", "name": "South Fulton", "state": "GA", "state_name": "Georgia"}, - {"fips": "71550", "name": "Urban Honolulu", "state": "HI", "state_name": "Hawaii"}, - {"fips": "12000", "name": "Cedar Rapids", "state": "IA", "state_name": "Iowa"}, - {"fips": "19000", "name": "Davenport", "state": "IA", "state_name": "Iowa"}, - {"fips": "21000", "name": "Des Moines", "state": "IA", "state_name": "Iowa"}, - {"fips": "08830", "name": "Boise City", "state": "ID", "state_name": "Idaho"}, - {"fips": "52120", "name": "Meridian", "state": "ID", "state_name": "Idaho"}, + { + "fips": "76600", + "name": "West Palm Beach", + "state": "FL", + "state_name": "Florida", + }, + { + "fips": "03440", + "name": "Athens-Clarke County", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "04000", + "name": "Atlanta", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "04204", + "name": "Augusta-Richmond County", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "19000", + "name": "Columbus", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "49008", + "name": "Macon-Bibb County", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "68516", + "name": "Sandy Springs", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "69000", + "name": "Savannah", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "72122", + "name": "South Fulton", + "state": "GA", + "state_name": "Georgia", + }, + { + "fips": "71550", + "name": "Urban Honolulu", + "state": "HI", + "state_name": "Hawaii", + }, + { + "fips": "12000", + "name": "Cedar Rapids", + "state": "IA", + "state_name": "Iowa", + }, + { + "fips": "19000", + "name": "Davenport", + "state": "IA", + "state_name": "Iowa", + }, + { + "fips": "21000", + "name": "Des Moines", + "state": "IA", + "state_name": "Iowa", + }, + { + "fips": "08830", + "name": "Boise City", + "state": "ID", + "state_name": "Idaho", + }, + { + "fips": "52120", + "name": "Meridian", + "state": "ID", + "state_name": "Idaho", + }, {"fips": "56260", "name": "Nampa", "state": "ID", "state_name": "Idaho"}, - {"fips": "03012", "name": "Aurora", "state": "IL", "state_name": "Illinois"}, - {"fips": "14000", "name": "Chicago", "state": "IL", "state_name": "Illinois"}, - {"fips": "23074", "name": "Elgin", "state": "IL", "state_name": "Illinois"}, - {"fips": "38570", "name": "Joliet", "state": "IL", "state_name": "Illinois"}, - {"fips": "51622", "name": "Naperville", "state": "IL", "state_name": "Illinois"}, - {"fips": "59000", "name": "Peoria", "state": "IL", "state_name": "Illinois"}, - {"fips": "65000", "name": "Rockford", "state": "IL", "state_name": "Illinois"}, - {"fips": "72000", "name": "Springfield", "state": "IL", "state_name": "Illinois"}, - {"fips": "10342", "name": "Carmel", "state": "IN", "state_name": "Indiana"}, - {"fips": "22000", "name": "Evansville", "state": "IN", "state_name": "Indiana"}, - {"fips": "23278", "name": "Fishers", "state": "IN", "state_name": "Indiana"}, - {"fips": "25000", "name": "Fort Wayne", "state": "IN", "state_name": "Indiana"}, - {"fips": "36003", "name": "Indianapolis", "state": "IN", "state_name": "Indiana"}, - {"fips": "71000", "name": "South Bend", "state": "IN", "state_name": "Indiana"}, - {"fips": "36000", "name": "Kansas City", "state": "KS", "state_name": "Kansas"}, + { + "fips": "03012", + "name": "Aurora", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "14000", + "name": "Chicago", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "23074", + "name": "Elgin", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "38570", + "name": "Joliet", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "51622", + "name": "Naperville", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "59000", + "name": "Peoria", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "65000", + "name": "Rockford", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "72000", + "name": "Springfield", + "state": "IL", + "state_name": "Illinois", + }, + { + "fips": "10342", + "name": "Carmel", + "state": "IN", + "state_name": "Indiana", + }, + { + "fips": "22000", + "name": "Evansville", + "state": "IN", + "state_name": "Indiana", + }, + { + "fips": "23278", + "name": "Fishers", + "state": "IN", + "state_name": "Indiana", + }, + { + "fips": "25000", + "name": "Fort Wayne", + "state": "IN", + "state_name": "Indiana", + }, + { + "fips": "36003", + "name": "Indianapolis", + "state": "IN", + "state_name": "Indiana", + }, + { + "fips": "71000", + "name": "South Bend", + "state": "IN", + "state_name": "Indiana", + }, + { + "fips": "36000", + "name": "Kansas City", + "state": "KS", + "state_name": "Kansas", + }, {"fips": "52575", "name": "Olathe", "state": "KS", "state_name": "Kansas"}, - {"fips": "53775", "name": "Overland Park", "state": "KS", "state_name": "Kansas"}, + { + "fips": "53775", + "name": "Overland Park", + "state": "KS", + "state_name": "Kansas", + }, {"fips": "71000", "name": "Topeka", "state": "KS", "state_name": "Kansas"}, - {"fips": "79000", "name": "Wichita", "state": "KS", "state_name": "Kansas"}, - {"fips": "46027", "name": "Lexington-Fayette", "state": "KY", "state_name": "Kentucky"}, - {"fips": "48006", "name": "Louisville/Jefferson County", "state": "KY", "state_name": "Kentucky"}, - {"fips": "05000", "name": "Baton Rouge", "state": "LA", "state_name": "Louisiana"}, - {"fips": "40735", "name": "Lafayette", "state": "LA", "state_name": "Louisiana"}, - {"fips": "55000", "name": "New Orleans", "state": "LA", "state_name": "Louisiana"}, - {"fips": "70000", "name": "Shreveport", "state": "LA", "state_name": "Louisiana"}, - {"fips": "07000", "name": "Boston", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "09000", "name": "Brockton", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "11000", "name": "Cambridge", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "37000", "name": "Lowell", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "37490", "name": "Lynn", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "45000", "name": "New Bedford", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "55745", "name": "Quincy", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "67000", "name": "Springfield", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "82000", "name": "Worcester", "state": "MA", "state_name": "Massachusetts"}, - {"fips": "04000", "name": "Baltimore", "state": "MD", "state_name": "Maryland"}, - {"fips": "03000", "name": "Ann Arbor", "state": "MI", "state_name": "Michigan"}, - {"fips": "21000", "name": "Dearborn", "state": "MI", "state_name": "Michigan"}, - {"fips": "22000", "name": "Detroit", "state": "MI", "state_name": "Michigan"}, - {"fips": "34000", "name": "Grand Rapids", "state": "MI", "state_name": "Michigan"}, - {"fips": "46000", "name": "Lansing", "state": "MI", "state_name": "Michigan"}, - {"fips": "76460", "name": "Sterling Heights", "state": "MI", "state_name": "Michigan"}, - {"fips": "84000", "name": "Warren", "state": "MI", "state_name": "Michigan"}, - {"fips": "43000", "name": "Minneapolis", "state": "MN", "state_name": "Minnesota"}, - {"fips": "54880", "name": "Rochester", "state": "MN", "state_name": "Minnesota"}, - {"fips": "58000", "name": "St. Paul", "state": "MN", "state_name": "Minnesota"}, - {"fips": "15670", "name": "Columbia", "state": "MO", "state_name": "Missouri"}, - {"fips": "35000", "name": "Independence", "state": "MO", "state_name": "Missouri"}, - {"fips": "38000", "name": "Kansas City", "state": "MO", "state_name": "Missouri"}, - {"fips": "41348", "name": "Lee's Summit", "state": "MO", "state_name": "Missouri"}, - {"fips": "70000", "name": "Springfield", "state": "MO", "state_name": "Missouri"}, - {"fips": "65000", "name": "St. Louis", "state": "MO", "state_name": "Missouri"}, - {"fips": "36000", "name": "Jackson", "state": "MS", "state_name": "Mississippi"}, - {"fips": "06550", "name": "Billings", "state": "MT", "state_name": "Montana"}, - {"fips": "10740", "name": "Cary", "state": "NC", "state_name": "North Carolina"}, - {"fips": "12000", "name": "Charlotte", "state": "NC", "state_name": "North Carolina"}, - {"fips": "14100", "name": "Concord", "state": "NC", "state_name": "North Carolina"}, - {"fips": "19000", "name": "Durham", "state": "NC", "state_name": "North Carolina"}, - {"fips": "22920", "name": "Fayetteville", "state": "NC", "state_name": "North Carolina"}, - {"fips": "28000", "name": "Greensboro", "state": "NC", "state_name": "North Carolina"}, - {"fips": "31400", "name": "High Point", "state": "NC", "state_name": "North Carolina"}, - {"fips": "55000", "name": "Raleigh", "state": "NC", "state_name": "North Carolina"}, - {"fips": "74440", "name": "Wilmington", "state": "NC", "state_name": "North Carolina"}, - {"fips": "75000", "name": "Winston-Salem", "state": "NC", "state_name": "North Carolina"}, - {"fips": "25700", "name": "Fargo", "state": "ND", "state_name": "North Dakota"}, - {"fips": "28000", "name": "Lincoln", "state": "NE", "state_name": "Nebraska"}, - {"fips": "37000", "name": "Omaha", "state": "NE", "state_name": "Nebraska"}, - {"fips": "45140", "name": "Manchester", "state": "NH", "state_name": "New Hampshire"}, - {"fips": "21000", "name": "Elizabeth", "state": "NJ", "state_name": "New Jersey"}, - {"fips": "36000", "name": "Jersey City", "state": "NJ", "state_name": "New Jersey"}, - {"fips": "51000", "name": "Newark", "state": "NJ", "state_name": "New Jersey"}, - {"fips": "57000", "name": "Paterson", "state": "NJ", "state_name": "New Jersey"}, - {"fips": "02000", "name": "Albuquerque", "state": "NM", "state_name": "New Mexico"}, - {"fips": "39380", "name": "Las Cruces", "state": "NM", "state_name": "New Mexico"}, - {"fips": "63460", "name": "Rio Rancho", "state": "NM", "state_name": "New Mexico"}, - {"fips": "31900", "name": "Henderson", "state": "NV", "state_name": "Nevada"}, - {"fips": "40000", "name": "Las Vegas", "state": "NV", "state_name": "Nevada"}, - {"fips": "51800", "name": "North Las Vegas", "state": "NV", "state_name": "Nevada"}, + { + "fips": "79000", + "name": "Wichita", + "state": "KS", + "state_name": "Kansas", + }, + { + "fips": "46027", + "name": "Lexington-Fayette", + "state": "KY", + "state_name": "Kentucky", + }, + { + "fips": "48006", + "name": "Louisville/Jefferson County", + "state": "KY", + "state_name": "Kentucky", + }, + { + "fips": "05000", + "name": "Baton Rouge", + "state": "LA", + "state_name": "Louisiana", + }, + { + "fips": "40735", + "name": "Lafayette", + "state": "LA", + "state_name": "Louisiana", + }, + { + "fips": "55000", + "name": "New Orleans", + "state": "LA", + "state_name": "Louisiana", + }, + { + "fips": "70000", + "name": "Shreveport", + "state": "LA", + "state_name": "Louisiana", + }, + { + "fips": "07000", + "name": "Boston", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "09000", + "name": "Brockton", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "11000", + "name": "Cambridge", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "37000", + "name": "Lowell", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "37490", + "name": "Lynn", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "45000", + "name": "New Bedford", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "55745", + "name": "Quincy", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "67000", + "name": "Springfield", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "82000", + "name": "Worcester", + "state": "MA", + "state_name": "Massachusetts", + }, + { + "fips": "04000", + "name": "Baltimore", + "state": "MD", + "state_name": "Maryland", + }, + { + "fips": "03000", + "name": "Ann Arbor", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "21000", + "name": "Dearborn", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "22000", + "name": "Detroit", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "34000", + "name": "Grand Rapids", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "46000", + "name": "Lansing", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "76460", + "name": "Sterling Heights", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "84000", + "name": "Warren", + "state": "MI", + "state_name": "Michigan", + }, + { + "fips": "43000", + "name": "Minneapolis", + "state": "MN", + "state_name": "Minnesota", + }, + { + "fips": "54880", + "name": "Rochester", + "state": "MN", + "state_name": "Minnesota", + }, + { + "fips": "58000", + "name": "St. Paul", + "state": "MN", + "state_name": "Minnesota", + }, + { + "fips": "15670", + "name": "Columbia", + "state": "MO", + "state_name": "Missouri", + }, + { + "fips": "35000", + "name": "Independence", + "state": "MO", + "state_name": "Missouri", + }, + { + "fips": "38000", + "name": "Kansas City", + "state": "MO", + "state_name": "Missouri", + }, + { + "fips": "41348", + "name": "Lee's Summit", + "state": "MO", + "state_name": "Missouri", + }, + { + "fips": "70000", + "name": "Springfield", + "state": "MO", + "state_name": "Missouri", + }, + { + "fips": "65000", + "name": "St. Louis", + "state": "MO", + "state_name": "Missouri", + }, + { + "fips": "36000", + "name": "Jackson", + "state": "MS", + "state_name": "Mississippi", + }, + { + "fips": "06550", + "name": "Billings", + "state": "MT", + "state_name": "Montana", + }, + { + "fips": "10740", + "name": "Cary", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "12000", + "name": "Charlotte", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "14100", + "name": "Concord", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "19000", + "name": "Durham", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "22920", + "name": "Fayetteville", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "28000", + "name": "Greensboro", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "31400", + "name": "High Point", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "55000", + "name": "Raleigh", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "74440", + "name": "Wilmington", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "75000", + "name": "Winston-Salem", + "state": "NC", + "state_name": "North Carolina", + }, + { + "fips": "25700", + "name": "Fargo", + "state": "ND", + "state_name": "North Dakota", + }, + { + "fips": "28000", + "name": "Lincoln", + "state": "NE", + "state_name": "Nebraska", + }, + { + "fips": "37000", + "name": "Omaha", + "state": "NE", + "state_name": "Nebraska", + }, + { + "fips": "45140", + "name": "Manchester", + "state": "NH", + "state_name": "New Hampshire", + }, + { + "fips": "21000", + "name": "Elizabeth", + "state": "NJ", + "state_name": "New Jersey", + }, + { + "fips": "36000", + "name": "Jersey City", + "state": "NJ", + "state_name": "New Jersey", + }, + { + "fips": "51000", + "name": "Newark", + "state": "NJ", + "state_name": "New Jersey", + }, + { + "fips": "57000", + "name": "Paterson", + "state": "NJ", + "state_name": "New Jersey", + }, + { + "fips": "02000", + "name": "Albuquerque", + "state": "NM", + "state_name": "New Mexico", + }, + { + "fips": "39380", + "name": "Las Cruces", + "state": "NM", + "state_name": "New Mexico", + }, + { + "fips": "63460", + "name": "Rio Rancho", + "state": "NM", + "state_name": "New Mexico", + }, + { + "fips": "31900", + "name": "Henderson", + "state": "NV", + "state_name": "Nevada", + }, + { + "fips": "40000", + "name": "Las Vegas", + "state": "NV", + "state_name": "Nevada", + }, + { + "fips": "51800", + "name": "North Las Vegas", + "state": "NV", + "state_name": "Nevada", + }, {"fips": "60600", "name": "Reno", "state": "NV", "state_name": "Nevada"}, {"fips": "68400", "name": "Sparks", "state": "NV", "state_name": "Nevada"}, - {"fips": "01000", "name": "Albany", "state": "NY", "state_name": "New York"}, - {"fips": "11000", "name": "Buffalo", "state": "NY", "state_name": "New York"}, - {"fips": "51000", "name": "New York City", "state": "NY", "state_name": "New York"}, - {"fips": "63000", "name": "Rochester", "state": "NY", "state_name": "New York"}, - {"fips": "73000", "name": "Syracuse", "state": "NY", "state_name": "New York"}, - {"fips": "84000", "name": "Yonkers", "state": "NY", "state_name": "New York"}, + { + "fips": "01000", + "name": "Albany", + "state": "NY", + "state_name": "New York", + }, + { + "fips": "11000", + "name": "Buffalo", + "state": "NY", + "state_name": "New York", + }, + { + "fips": "51000", + "name": "New York City", + "state": "NY", + "state_name": "New York", + }, + { + "fips": "63000", + "name": "Rochester", + "state": "NY", + "state_name": "New York", + }, + { + "fips": "73000", + "name": "Syracuse", + "state": "NY", + "state_name": "New York", + }, + { + "fips": "84000", + "name": "Yonkers", + "state": "NY", + "state_name": "New York", + }, {"fips": "01000", "name": "Akron", "state": "OH", "state_name": "Ohio"}, - {"fips": "15000", "name": "Cincinnati", "state": "OH", "state_name": "Ohio"}, - {"fips": "16000", "name": "Cleveland", "state": "OH", "state_name": "Ohio"}, + { + "fips": "15000", + "name": "Cincinnati", + "state": "OH", + "state_name": "Ohio", + }, + { + "fips": "16000", + "name": "Cleveland", + "state": "OH", + "state_name": "Ohio", + }, {"fips": "18000", "name": "Columbus", "state": "OH", "state_name": "Ohio"}, {"fips": "21000", "name": "Dayton", "state": "OH", "state_name": "Ohio"}, {"fips": "77000", "name": "Toledo", "state": "OH", "state_name": "Ohio"}, - {"fips": "09050", "name": "Broken Arrow", "state": "OK", "state_name": "Oklahoma"}, - {"fips": "52500", "name": "Norman", "state": "OK", "state_name": "Oklahoma"}, - {"fips": "55000", "name": "Oklahoma City", "state": "OK", "state_name": "Oklahoma"}, - {"fips": "75000", "name": "Tulsa", "state": "OK", "state_name": "Oklahoma"}, + { + "fips": "09050", + "name": "Broken Arrow", + "state": "OK", + "state_name": "Oklahoma", + }, + { + "fips": "52500", + "name": "Norman", + "state": "OK", + "state_name": "Oklahoma", + }, + { + "fips": "55000", + "name": "Oklahoma City", + "state": "OK", + "state_name": "Oklahoma", + }, + { + "fips": "75000", + "name": "Tulsa", + "state": "OK", + "state_name": "Oklahoma", + }, {"fips": "05800", "name": "Bend", "state": "OR", "state_name": "Oregon"}, {"fips": "23850", "name": "Eugene", "state": "OR", "state_name": "Oregon"}, - {"fips": "31250", "name": "Gresham", "state": "OR", "state_name": "Oregon"}, - {"fips": "34100", "name": "Hillsboro", "state": "OR", "state_name": "Oregon"}, - {"fips": "59000", "name": "Portland", "state": "OR", "state_name": "Oregon"}, + { + "fips": "31250", + "name": "Gresham", + "state": "OR", + "state_name": "Oregon", + }, + { + "fips": "34100", + "name": "Hillsboro", + "state": "OR", + "state_name": "Oregon", + }, + { + "fips": "59000", + "name": "Portland", + "state": "OR", + "state_name": "Oregon", + }, {"fips": "64900", "name": "Salem", "state": "OR", "state_name": "Oregon"}, - {"fips": "02000", "name": "Allentown", "state": "PA", "state_name": "Pennsylvania"}, - {"fips": "60000", "name": "Philadelphia", "state": "PA", "state_name": "Pennsylvania"}, - {"fips": "61000", "name": "Pittsburgh", "state": "PA", "state_name": "Pennsylvania"}, - {"fips": "59000", "name": "Providence", "state": "RI", "state_name": "Rhode Island"}, - {"fips": "13330", "name": "Charleston", "state": "SC", "state_name": "South Carolina"}, - {"fips": "16000", "name": "Columbia", "state": "SC", "state_name": "South Carolina"}, - {"fips": "50875", "name": "North Charleston", "state": "SC", "state_name": "South Carolina"}, - {"fips": "59020", "name": "Sioux Falls", "state": "SD", "state_name": "South Dakota"}, - {"fips": "14000", "name": "Chattanooga", "state": "TN", "state_name": "Tennessee"}, - {"fips": "15160", "name": "Clarksville", "state": "TN", "state_name": "Tennessee"}, - {"fips": "40000", "name": "Knoxville", "state": "TN", "state_name": "Tennessee"}, - {"fips": "48000", "name": "Memphis", "state": "TN", "state_name": "Tennessee"}, - {"fips": "51560", "name": "Murfreesboro", "state": "TN", "state_name": "Tennessee"}, - -# Extracted 332 places - {"fips": "52006", "name": "Nashville-Davidson", "state": "TN", "state_name": "Tennessee"}, + { + "fips": "02000", + "name": "Allentown", + "state": "PA", + "state_name": "Pennsylvania", + }, + { + "fips": "60000", + "name": "Philadelphia", + "state": "PA", + "state_name": "Pennsylvania", + }, + { + "fips": "61000", + "name": "Pittsburgh", + "state": "PA", + "state_name": "Pennsylvania", + }, + { + "fips": "59000", + "name": "Providence", + "state": "RI", + "state_name": "Rhode Island", + }, + { + "fips": "13330", + "name": "Charleston", + "state": "SC", + "state_name": "South Carolina", + }, + { + "fips": "16000", + "name": "Columbia", + "state": "SC", + "state_name": "South Carolina", + }, + { + "fips": "50875", + "name": "North Charleston", + "state": "SC", + "state_name": "South Carolina", + }, + { + "fips": "59020", + "name": "Sioux Falls", + "state": "SD", + "state_name": "South Dakota", + }, + { + "fips": "14000", + "name": "Chattanooga", + "state": "TN", + "state_name": "Tennessee", + }, + { + "fips": "15160", + "name": "Clarksville", + "state": "TN", + "state_name": "Tennessee", + }, + { + "fips": "40000", + "name": "Knoxville", + "state": "TN", + "state_name": "Tennessee", + }, + { + "fips": "48000", + "name": "Memphis", + "state": "TN", + "state_name": "Tennessee", + }, + { + "fips": "51560", + "name": "Murfreesboro", + "state": "TN", + "state_name": "Tennessee", + }, + # Extracted 332 places + { + "fips": "52006", + "name": "Nashville-Davidson", + "state": "TN", + "state_name": "Tennessee", + }, {"fips": "01000", "name": "Abilene", "state": "TX", "state_name": "Texas"}, {"fips": "01924", "name": "Allen", "state": "TX", "state_name": "Texas"}, - {"fips": "03000", "name": "Amarillo", "state": "TX", "state_name": "Texas"}, - {"fips": "04000", "name": "Arlington", "state": "TX", "state_name": "Texas"}, + { + "fips": "03000", + "name": "Amarillo", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "04000", + "name": "Arlington", + "state": "TX", + "state_name": "Texas", + }, {"fips": "05000", "name": "Austin", "state": "TX", "state_name": "Texas"}, - {"fips": "07000", "name": "Beaumont", "state": "TX", "state_name": "Texas"}, - {"fips": "10768", "name": "Brownsville", "state": "TX", "state_name": "Texas"}, - {"fips": "13024", "name": "Carrollton", "state": "TX", "state_name": "Texas"}, - {"fips": "15976", "name": "College Station", "state": "TX", "state_name": "Texas"}, + { + "fips": "07000", + "name": "Beaumont", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "10768", + "name": "Brownsville", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "13024", + "name": "Carrollton", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "15976", + "name": "College Station", + "state": "TX", + "state_name": "Texas", + }, {"fips": "16432", "name": "Conroe", "state": "TX", "state_name": "Texas"}, - {"fips": "17000", "name": "Corpus Christi", "state": "TX", "state_name": "Texas"}, + { + "fips": "17000", + "name": "Corpus Christi", + "state": "TX", + "state_name": "Texas", + }, {"fips": "19000", "name": "Dallas", "state": "TX", "state_name": "Texas"}, {"fips": "19972", "name": "Denton", "state": "TX", "state_name": "Texas"}, - {"fips": "22660", "name": "Edinburg", "state": "TX", "state_name": "Texas"}, + { + "fips": "22660", + "name": "Edinburg", + "state": "TX", + "state_name": "Texas", + }, {"fips": "24000", "name": "El Paso", "state": "TX", "state_name": "Texas"}, - {"fips": "27000", "name": "Fort Worth", "state": "TX", "state_name": "Texas"}, + { + "fips": "27000", + "name": "Fort Worth", + "state": "TX", + "state_name": "Texas", + }, {"fips": "27684", "name": "Frisco", "state": "TX", "state_name": "Texas"}, {"fips": "29000", "name": "Garland", "state": "TX", "state_name": "Texas"}, - {"fips": "30464", "name": "Grand Prairie", "state": "TX", "state_name": "Texas"}, + { + "fips": "30464", + "name": "Grand Prairie", + "state": "TX", + "state_name": "Texas", + }, {"fips": "35000", "name": "Houston", "state": "TX", "state_name": "Texas"}, {"fips": "37000", "name": "Irving", "state": "TX", "state_name": "Texas"}, {"fips": "39148", "name": "Killeen", "state": "TX", "state_name": "Texas"}, {"fips": "41464", "name": "Laredo", "state": "TX", "state_name": "Texas"}, - {"fips": "41980", "name": "League City", "state": "TX", "state_name": "Texas"}, - {"fips": "42508", "name": "Lewisville", "state": "TX", "state_name": "Texas"}, + { + "fips": "41980", + "name": "League City", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "42508", + "name": "Lewisville", + "state": "TX", + "state_name": "Texas", + }, {"fips": "45000", "name": "Lubbock", "state": "TX", "state_name": "Texas"}, {"fips": "45384", "name": "McAllen", "state": "TX", "state_name": "Texas"}, - {"fips": "45744", "name": "McKinney", "state": "TX", "state_name": "Texas"}, - {"fips": "47892", "name": "Mesquite", "state": "TX", "state_name": "Texas"}, + { + "fips": "45744", + "name": "McKinney", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "47892", + "name": "Mesquite", + "state": "TX", + "state_name": "Texas", + }, {"fips": "48072", "name": "Midland", "state": "TX", "state_name": "Texas"}, - {"fips": "50820", "name": "New Braunfels", "state": "TX", "state_name": "Texas"}, + { + "fips": "50820", + "name": "New Braunfels", + "state": "TX", + "state_name": "Texas", + }, {"fips": "53388", "name": "Odessa", "state": "TX", "state_name": "Texas"}, - {"fips": "56000", "name": "Pasadena", "state": "TX", "state_name": "Texas"}, - {"fips": "56348", "name": "Pearland", "state": "TX", "state_name": "Texas"}, + { + "fips": "56000", + "name": "Pasadena", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "56348", + "name": "Pearland", + "state": "TX", + "state_name": "Texas", + }, {"fips": "58016", "name": "Plano", "state": "TX", "state_name": "Texas"}, - {"fips": "61796", "name": "Richardson", "state": "TX", "state_name": "Texas"}, - {"fips": "63500", "name": "Round Rock", "state": "TX", "state_name": "Texas"}, - {"fips": "65000", "name": "San Antonio", "state": "TX", "state_name": "Texas"}, - {"fips": "70808", "name": "Sugar Land", "state": "TX", "state_name": "Texas"}, + { + "fips": "61796", + "name": "Richardson", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "63500", + "name": "Round Rock", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "65000", + "name": "San Antonio", + "state": "TX", + "state_name": "Texas", + }, + { + "fips": "70808", + "name": "Sugar Land", + "state": "TX", + "state_name": "Texas", + }, {"fips": "74144", "name": "Tyler", "state": "TX", "state_name": "Texas"}, {"fips": "76000", "name": "Waco", "state": "TX", "state_name": "Texas"}, - {"fips": "79000", "name": "Wichita Falls", "state": "TX", "state_name": "Texas"}, + { + "fips": "79000", + "name": "Wichita Falls", + "state": "TX", + "state_name": "Texas", + }, {"fips": "62470", "name": "Provo", "state": "UT", "state_name": "Utah"}, - {"fips": "67000", "name": "Salt Lake City", "state": "UT", "state_name": "Utah"}, - {"fips": "65330", "name": "St. George", "state": "UT", "state_name": "Utah"}, - {"fips": "82950", "name": "West Jordan", "state": "UT", "state_name": "Utah"}, - {"fips": "83470", "name": "West Valley City", "state": "UT", "state_name": "Utah"}, - {"fips": "01000", "name": "Alexandria", "state": "VA", "state_name": "Virginia"}, - {"fips": "16000", "name": "Chesapeake", "state": "VA", "state_name": "Virginia"}, - {"fips": "35000", "name": "Hampton", "state": "VA", "state_name": "Virginia"}, - {"fips": "56000", "name": "Newport News", "state": "VA", "state_name": "Virginia"}, - {"fips": "57000", "name": "Norfolk", "state": "VA", "state_name": "Virginia"}, - {"fips": "67000", "name": "Richmond", "state": "VA", "state_name": "Virginia"}, - {"fips": "76432", "name": "Suffolk", "state": "VA", "state_name": "Virginia"}, - {"fips": "82000", "name": "Virginia Beach", "state": "VA", "state_name": "Virginia"}, - {"fips": "05210", "name": "Bellevue", "state": "WA", "state_name": "Washington"}, - {"fips": "22640", "name": "Everett", "state": "WA", "state_name": "Washington"}, - {"fips": "35415", "name": "Kent", "state": "WA", "state_name": "Washington"}, - {"fips": "57745", "name": "Renton", "state": "WA", "state_name": "Washington"}, - {"fips": "63000", "name": "Seattle", "state": "WA", "state_name": "Washington"}, - {"fips": "67167", "name": "Spokane Valley", "state": "WA", "state_name": "Washington"}, - {"fips": "67000", "name": "Spokane", "state": "WA", "state_name": "Washington"}, - {"fips": "70000", "name": "Tacoma", "state": "WA", "state_name": "Washington"}, - {"fips": "74060", "name": "Vancouver", "state": "WA", "state_name": "Washington"}, - {"fips": "31000", "name": "Green Bay", "state": "WI", "state_name": "Wisconsin"}, - {"fips": "48000", "name": "Madison", "state": "WI", "state_name": "Wisconsin"}, - {"fips": "53000", "name": "Milwaukee", "state": "WI", "state_name": "Wisconsin"}, + { + "fips": "67000", + "name": "Salt Lake City", + "state": "UT", + "state_name": "Utah", + }, + { + "fips": "65330", + "name": "St. George", + "state": "UT", + "state_name": "Utah", + }, + { + "fips": "82950", + "name": "West Jordan", + "state": "UT", + "state_name": "Utah", + }, + { + "fips": "83470", + "name": "West Valley City", + "state": "UT", + "state_name": "Utah", + }, + { + "fips": "01000", + "name": "Alexandria", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "16000", + "name": "Chesapeake", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "35000", + "name": "Hampton", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "56000", + "name": "Newport News", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "57000", + "name": "Norfolk", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "67000", + "name": "Richmond", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "76432", + "name": "Suffolk", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "82000", + "name": "Virginia Beach", + "state": "VA", + "state_name": "Virginia", + }, + { + "fips": "05210", + "name": "Bellevue", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "22640", + "name": "Everett", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "35415", + "name": "Kent", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "57745", + "name": "Renton", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "63000", + "name": "Seattle", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "67167", + "name": "Spokane Valley", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "67000", + "name": "Spokane", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "70000", + "name": "Tacoma", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "74060", + "name": "Vancouver", + "state": "WA", + "state_name": "Washington", + }, + { + "fips": "31000", + "name": "Green Bay", + "state": "WI", + "state_name": "Wisconsin", + }, + { + "fips": "48000", + "name": "Madison", + "state": "WI", + "state_name": "Wisconsin", + }, + { + "fips": "53000", + "name": "Milwaukee", + "state": "WI", + "state_name": "Wisconsin", + }, ] diff --git a/src/policyengine/utils/parameter_labels.py b/src/policyengine/utils/parameter_labels.py index 2fd3e25c..6a574be8 100644 --- a/src/policyengine/utils/parameter_labels.py +++ b/src/policyengine/utils/parameter_labels.py @@ -81,9 +81,7 @@ def _generate_breakdown_label(param_node, system, breakdown_parent=None): breakdown_labels = breakdown_parent.metadata.get("breakdown_labels", []) # Collect dimension values from breakdown parent to param_node - dimension_values = _collect_dimension_values( - param_node, breakdown_parent - ) + dimension_values = _collect_dimension_values(param_node, breakdown_parent) if not dimension_values: return None @@ -146,7 +144,12 @@ def _format_dimension_value(value, var_name, dim_label, system): str: Formatted dimension value """ # First, try to get enum display value - if var_name and isinstance(var_name, str) and not var_name.startswith("range(") and not var_name.startswith("list("): + if ( + var_name + and isinstance(var_name, str) + and not var_name.startswith("range(") + and not var_name.startswith("list(") + ): var = system.variables.get(var_name) if var and hasattr(var, "possible_values") and var.possible_values: try: diff --git a/tests/fixtures/region_fixtures.py b/tests/fixtures/region_fixtures.py index ca1adfe2..d08fb6d3 100644 --- a/tests/fixtures/region_fixtures.py +++ b/tests/fixtures/region_fixtures.py @@ -71,7 +71,9 @@ def create_sample_us_registry() -> RegionRegistry: create_national_region(), create_state_region("CA", "California"), create_state_region("NY", "New York"), - create_place_region("CA", "44000", "Los Angeles city", "California"), + create_place_region( + "CA", "44000", "Los Angeles city", "California" + ), ], ) @@ -84,7 +86,9 @@ def create_sample_us_registry() -> RegionRegistry: STATE_NEW_YORK = create_state_region("NY", "New York") -PLACE_LOS_ANGELES = create_place_region("CA", "44000", "Los Angeles city", "California") +PLACE_LOS_ANGELES = create_place_region( + "CA", "44000", "Los Angeles city", "California" +) SIMPLE_REGION = Region( code="state/ca", diff --git a/tests/test_pandas3_compatibility.py b/tests/test_pandas3_compatibility.py index 93fecd4d..98481aa1 100644 --- a/tests/test_pandas3_compatibility.py +++ b/tests/test_pandas3_compatibility.py @@ -1,4 +1,5 @@ """Test pandas 3.0 compatibility with enum encoding.""" + import pandas as pd from policyengine_core.enums import Enum diff --git a/tests/test_parameter_labels.py b/tests/test_parameter_labels.py index f0584419..4e66fc0a 100644 --- a/tests/test_parameter_labels.py +++ b/tests/test_parameter_labels.py @@ -435,7 +435,9 @@ def test__given_single_level_breakdown_with_enum__then_generates_label_with_enum result = generate_label_for_parameter(param, system, scale_lookup) # Then: Label uses enum display value - assert result == "Tax exemption by filing status (Married filing jointly)" + assert ( + result == "Tax exemption by filing status (Married filing jointly)" + ) def test__given_single_level_breakdown_without_enum__then_generates_label_with_raw_key( self, @@ -642,7 +644,10 @@ def test__given_nested_breakdown_with_enum_and_range__then_generates_full_label( # Then # Without snap_region enum in system, uses breakdown_label for first dimension too - assert result == "SNAP max allotment (SNAP region CONTIGUOUS_US, Household size 1)" + assert ( + result + == "SNAP max allotment (SNAP region CONTIGUOUS_US, Household size 1)" + ) def test__given_breakdown_labels_for_range__then_includes_semantic_label( self, @@ -720,7 +725,10 @@ def test__given_three_level_nesting__then_generates_all_dimensions(self): result = generate_label_for_parameter(param, system, scale_lookup) # Then - assert result == "State sales tax (CA, Income bracket 3, Exemption count 5)" + assert ( + result + == "State sales tax (CA, Income bracket 3, Exemption count 5)" + ) def test__given_missing_breakdown_labels__then_uses_raw_values(self): # Given @@ -785,9 +793,13 @@ def test__given_enum_range_enum_nesting__then_formats_each_correctly(self): result = generate_label_for_parameter(param, system, scale_lookup) # Then: Enum values use display names, range uses breakdown_label - assert result == "Earned income credit (CA, Number of children 2, Single)" + assert ( + result == "Earned income credit (CA, Number of children 2, Single)" + ) - def test__given_range_enum_range_nesting__then_formats_each_correctly(self): + def test__given_range_enum_range_nesting__then_formats_each_correctly( + self, + ): # Given: range -> enum -> range nesting breakdown_parent = create_mock_parent_node( name="gov.childcare.subsidy", @@ -821,13 +833,18 @@ def test__given_range_enum_range_nesting__then_formats_each_correctly(self): == "Childcare subsidy (Age group 2, Head of household, Household size 5)" ) - def test__given_partial_breakdown_labels__then_uses_labels_where_available(self): + def test__given_partial_breakdown_labels__then_uses_labels_where_available( + self, + ): # Given: breakdown_labels list shorter than breakdown list breakdown_parent = create_mock_parent_node( name="gov.benefits.utility", label="Utility allowance", breakdown=["area_code", "range(1, 20)", "housing_type"], - breakdown_labels=["Area", "Household size"], # Missing label for housing_type + breakdown_labels=[ + "Area", + "Household size", + ], # Missing label for housing_type ) level1 = create_mock_parent_node( name="gov.benefits.utility.AREA_1", @@ -848,7 +865,10 @@ def test__given_partial_breakdown_labels__then_uses_labels_where_available(self) result = generate_label_for_parameter(param, system, scale_lookup) # Then: Uses breakdown_labels where available, raw value for missing label - assert result == "Utility allowance (Area AREA_1, Household size 3, RENTER)" + assert ( + result + == "Utility allowance (Area AREA_1, Household size 3, RENTER)" + ) def test__given_four_level_nesting_with_mixed_types__then_generates_all_dimensions( self, @@ -857,8 +877,18 @@ def test__given_four_level_nesting_with_mixed_types__then_generates_all_dimensio breakdown_parent = create_mock_parent_node( name="gov.irs.deductions.sales_tax", label="State sales tax deduction", - breakdown=["state_code", "filing_status", "range(1, 7)", "range(1, 20)"], - breakdown_labels=["State", "Filing status", "Exemption count", "Income bracket"], + breakdown=[ + "state_code", + "filing_status", + "range(1, 7)", + "range(1, 20)", + ], + breakdown_labels=[ + "State", + "Filing status", + "Exemption count", + "Income bracket", + ], ) level1 = create_mock_parent_node( name="gov.irs.deductions.sales_tax.NY", diff --git a/tests/test_region.py b/tests/test_region.py index 6669ec1c..bc7ee0f6 100644 --- a/tests/test_region.py +++ b/tests/test_region.py @@ -1,16 +1,10 @@ """Tests for Region and RegionRegistry classes.""" -import pytest - from policyengine.core.region import Region, RegionRegistry - from tests.fixtures.region_fixtures import ( FILTER_REGION, REGION_WITH_DATASET, - SIMPLE_REGION, - create_sample_us_registry, create_state_region, - sample_registry, ) @@ -68,9 +62,17 @@ def test__given_same_codes__then_regions_are_equal(self): Then: They are equal regardless of other fields """ # Given - region1 = Region(code="state/ca", label="California", region_type="state") - region2 = Region(code="state/ca", label="California (different)", region_type="state") - region3 = Region(code="state/ny", label="New York", region_type="state") + region1 = Region( + code="state/ca", label="California", region_type="state" + ) + region2 = Region( + code="state/ca", + label="California (different)", + region_type="state", + ) + region3 = Region( + code="state/ny", label="New York", region_type="state" + ) # Then assert region1 == region2 @@ -82,9 +84,17 @@ def test__given_region__then_can_use_as_dict_key_or_in_set(self): Then: Regions with same code are deduplicated """ # Given - region1 = Region(code="state/ca", label="California", region_type="state") - region2 = Region(code="state/ca", label="California (duplicate)", region_type="state") - region3 = Region(code="state/ny", label="New York", region_type="state") + region1 = Region( + code="state/ca", label="California", region_type="state" + ) + region2 = Region( + code="state/ca", + label="California (duplicate)", + region_type="state", + ) + region3 = Region( + code="state/ny", label="New York", region_type="state" + ) # When region_set = {region1, region2, region3} @@ -98,7 +108,9 @@ def test__given_region__then_can_use_as_dict_key_or_in_set(self): class TestRegionRegistry: """Tests for the RegionRegistry class.""" - def test__given_registry_with_regions__then_length_is_correct(self, sample_registry): + def test__given_registry_with_regions__then_length_is_correct( + self, sample_registry + ): """Given: Registry with 4 regions When: Checking length Then: Length is 4 @@ -106,7 +118,9 @@ def test__given_registry_with_regions__then_length_is_correct(self, sample_regis # Then assert len(sample_registry) == 4 - def test__given_registry__then_can_iterate_over_regions(self, sample_registry): + def test__given_registry__then_can_iterate_over_regions( + self, sample_registry + ): """Given: Registry with regions When: Iterating Then: All region codes are accessible @@ -119,7 +133,9 @@ def test__given_registry__then_can_iterate_over_regions(self, sample_registry): assert "state/ca" in codes assert "place/CA-44000" in codes - def test__given_existing_code__then_code_is_in_registry(self, sample_registry): + def test__given_existing_code__then_code_is_in_registry( + self, sample_registry + ): """Given: Registry with state/ca When: Checking if code exists Then: Returns True for existing, False for missing @@ -142,7 +158,9 @@ def test__given_valid_code__then_get_returns_region(self, sample_registry): assert ca.label == "California" assert missing is None - def test__given_type__then_get_by_type_returns_matching_regions(self, sample_registry): + def test__given_type__then_get_by_type_returns_matching_regions( + self, sample_registry + ): """Given: Registry with 2 states and 1 place When: Getting by type Then: Returns correct regions for each type @@ -158,7 +176,9 @@ def test__given_type__then_get_by_type_returns_matching_regions(self, sample_reg assert len(places) == 1 assert counties == [] - def test__given_registry__then_get_national_returns_national_region(self, sample_registry): + def test__given_registry__then_get_national_returns_national_region( + self, sample_registry + ): """Given: Registry with national region When: Getting national Then: Returns the national region @@ -171,7 +191,9 @@ def test__given_registry__then_get_national_returns_national_region(self, sample assert national.code == "us" assert national.region_type == "national" - def test__given_parent_code__then_get_children_returns_child_regions(self, sample_registry): + def test__given_parent_code__then_get_children_returns_child_regions( + self, sample_registry + ): """Given: Registry with states under "us" When: Getting children of "us" Then: Returns state regions @@ -214,7 +236,9 @@ def test__given_registry__then_get_filter_regions_returns_regions_requiring_filt assert len(filter_regions) == 1 assert filter_regions[0].code == "place/CA-44000" - def test__given_registry__then_can_add_region_dynamically(self, sample_registry): + def test__given_registry__then_can_add_region_dynamically( + self, sample_registry + ): """Given: Registry with 4 regions When: Adding a new region Then: Registry contains 5 regions and new region is indexed diff --git a/tests/test_uk_regions.py b/tests/test_uk_regions.py index b13026f0..cbaa5328 100644 --- a/tests/test_uk_regions.py +++ b/tests/test_uk_regions.py @@ -66,7 +66,10 @@ def test__given_uk_registry__then_has_national_region(self): assert national.code == "uk" assert national.label == "United Kingdom" assert national.region_type == "national" - assert national.dataset_path == f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5" + assert ( + national.dataset_path + == f"{UK_DATA_BUCKET}/enhanced_frs_2023_24.h5" + ) assert not national.requires_filter def test__given_uk_registry__then_has_four_country_regions(self): @@ -188,7 +191,9 @@ def test__given_default_registry__then_has_5_regions(self): class TestUKRegionRegistryBuilder: """Tests for UK registry builder with optional regions.""" - def test__given_builder_without_optional_regions__then_returns_5_regions(self): + def test__given_builder_without_optional_regions__then_returns_5_regions( + self, + ): """Given: build_uk_region_registry with optional regions disabled When: Building registry Then: Returns 5 base regions only diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py index 5f7d39b7..54149305 100644 --- a/tests/test_us_regions.py +++ b/tests/test_us_regions.py @@ -1,7 +1,10 @@ """Tests for US region definitions.""" -from policyengine.countries.us.data import AT_LARGE_STATES, DISTRICT_COUNTS, US_PLACES, US_STATES -from policyengine.countries.us.regions import US_DATA_BUCKET, us_region_registry +from policyengine.countries.us.data import DISTRICT_COUNTS, US_STATES +from policyengine.countries.us.regions import ( + US_DATA_BUCKET, + us_region_registry, +) class TestUSStates: @@ -46,7 +49,9 @@ def test__given_district_counts__then_every_state_has_count(self): """ # When/Then for state in US_STATES: - assert state in DISTRICT_COUNTS, f"Missing district count for {state}" + assert state in DISTRICT_COUNTS, ( + f"Missing district count for {state}" + ) def test__given_district_counts__then_total_is_436(self): """Given: DISTRICT_COUNTS dictionary @@ -101,7 +106,9 @@ def test__given_us_registry__then_has_national_region(self): assert national.code == "us" assert national.label == "United States" assert national.region_type == "national" - assert national.dataset_path == f"{US_DATA_BUCKET}/enhanced_cps_2024.h5" + assert ( + national.dataset_path == f"{US_DATA_BUCKET}/enhanced_cps_2024.h5" + ) def test__given_us_registry__then_has_51_states(self): """Given: US region registry @@ -171,7 +178,10 @@ def test__given_dc_district__then_is_at_large(self): # Then assert dc_al is not None - assert dc_al.label == "District of Columbia's at-large congressional district" + assert ( + dc_al.label + == "District of Columbia's at-large congressional district" + ) assert dc_al.parent_code == "state/dc" def test__given_us_registry__then_has_places(self): @@ -204,14 +214,18 @@ def test__given_los_angeles_region__then_has_correct_format(self): assert la.state_code == "CA" assert la.dataset_path is None # No dedicated dataset - def test__given_california__then_children_include_districts_and_places(self): + def test__given_california__then_children_include_districts_and_places( + self, + ): """Given: California state region When: Getting its children Then: Includes all 52 districts and 10+ places """ # When ca_children = us_region_registry.get_children("state/ca") - district_children = [c for c in ca_children if c.region_type == "congressional_district"] + district_children = [ + c for c in ca_children if c.region_type == "congressional_district" + ] place_children = [c for c in ca_children if c.region_type == "place"] # Then From cf799e502168978f1f47f5bd73f221f1778e407d Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 10 Feb 2026 18:19:35 +0100 Subject: [PATCH 3/8] fix: Add conftest.py for pytest fixture discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sample_registry fixture was not being discovered by pytest after linting removed unused imports. Moving fixture imports to conftest.py is the standard pytest pattern for shared fixtures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..40c6f69b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +"""Pytest configuration and shared fixtures.""" + +# Import fixtures from fixtures module so pytest can discover them +from tests.fixtures.region_fixtures import ( # noqa: F401 + empty_registry, + sample_registry, +) From 65ff6de0a38050672675f6c63ed5b41b571af3c6 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 10 Feb 2026 19:50:13 +0100 Subject: [PATCH 4/8] feat: Add regional dataset filtering support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add filter_field and filter_value parameters to Simulation class - Add _build_entity_relationships() to US and UK models for mapping persons to all containing entities - Add _filter_dataset_by_household_variable() to filter datasets while preserving entity integrity - Apply filtering in run() method when filter parameters are set This enables filtering datasets by household-level variables like place_fips (US) or country (UK) for regional analysis. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/policyengine/core/simulation.py | 10 + .../tax_benefit_models/uk/model.py | 144 +++++++++++++ .../tax_benefit_models/us/model.py | 189 ++++++++++++++++++ 3 files changed, 343 insertions(+) diff --git a/src/policyengine/core/simulation.py b/src/policyengine/core/simulation.py index 0bbc4ccc..d3208be8 100644 --- a/src/policyengine/core/simulation.py +++ b/src/policyengine/core/simulation.py @@ -21,6 +21,16 @@ class Simulation(BaseModel): dynamic: Dynamic | None = None dataset: Dataset = None + # Regional filtering parameters + filter_field: str | None = Field( + default=None, + description="Household-level variable to filter dataset by (e.g., 'place_fips', 'country')", + ) + filter_value: str | None = Field( + default=None, + description="Value to match when filtering (e.g., '44000', 'ENGLAND')", + ) + tax_benefit_model_version: TaxBenefitModelVersion = None output_dataset: Dataset | None = None diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index 71cf78dc..dedbdc29 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -182,6 +182,143 @@ def __init__(self, **kwargs: dict): ) self.add_parameter(parameter) + def _build_entity_relationships( + self, dataset: PolicyEngineUKDataset + ) -> pd.DataFrame: + """Build a DataFrame mapping each person to their containing entities. + + Creates an explicit relationship map between persons and all entity + types (benunit, household). This enables filtering at any entity + level while preserving the integrity of all related entities. + + Args: + dataset: The dataset to extract relationships from. + + Returns: + A DataFrame indexed by person with columns for each entity ID. + """ + person_data = pd.DataFrame(dataset.data.person) + + # Determine column naming convention + benunit_id_col = ( + "person_benunit_id" + if "person_benunit_id" in person_data.columns + else "benunit_id" + ) + household_id_col = ( + "person_household_id" + if "person_household_id" in person_data.columns + else "household_id" + ) + + entity_rel = pd.DataFrame( + { + "person_id": person_data["person_id"].values, + "benunit_id": person_data[benunit_id_col].values, + "household_id": person_data[household_id_col].values, + } + ) + + return entity_rel + + def _filter_dataset_by_household_variable( + self, + dataset: PolicyEngineUKDataset, + variable_name: str, + variable_value: str, + ) -> PolicyEngineUKDataset: + """Filter a dataset to only include households where a variable matches. + + Uses the entity relationship approach: builds an explicit map of all + entity relationships, filters at the household level, and keeps all + persons in matching households to preserve entity integrity. + + Args: + dataset: The dataset to filter. + variable_name: The name of the household-level variable to filter on. + variable_value: The value to match. Handles both str and bytes encoding. + + Returns: + A new filtered dataset containing only matching households. + """ + # Build entity relationships + entity_rel = self._build_entity_relationships(dataset) + + # Get household-level variable values + household_data = pd.DataFrame(dataset.data.household) + + if variable_name not in household_data.columns: + raise ValueError( + f"Variable '{variable_name}' not found in household data. " + f"Available columns: {list(household_data.columns)}" + ) + + hh_values = household_data[variable_name].values + hh_ids = household_data["household_id"].values + + # Create mask for matching households, handling bytes encoding + if isinstance(variable_value, str): + hh_mask = (hh_values == variable_value) | ( + hh_values == variable_value.encode() + ) + else: + hh_mask = hh_values == variable_value + + matching_hh_ids = set(hh_ids[hh_mask]) + + if len(matching_hh_ids) == 0: + raise ValueError( + f"No households found matching {variable_name}={variable_value}" + ) + + # Filter entity_rel to persons in matching households + person_mask = entity_rel["household_id"].isin(matching_hh_ids) + filtered_entity_rel = entity_rel[person_mask] + + # Get the filtered entity IDs + filtered_person_ids = set(filtered_entity_rel["person_id"]) + filtered_household_ids = matching_hh_ids + filtered_benunit_ids = set(filtered_entity_rel["benunit_id"]) + + # Filter each entity DataFrame + person_df = pd.DataFrame(dataset.data.person) + household_df = pd.DataFrame(dataset.data.household) + benunit_df = pd.DataFrame(dataset.data.benunit) + + filtered_person = person_df[ + person_df["person_id"].isin(filtered_person_ids) + ] + filtered_household = household_df[ + household_df["household_id"].isin(filtered_household_ids) + ] + filtered_benunit = benunit_df[ + benunit_df["benunit_id"].isin(filtered_benunit_ids) + ] + + # Create filtered dataset + return PolicyEngineUKDataset( + id=dataset.id + f"_filtered_{variable_name}_{variable_value}", + name=dataset.name, + description=f"{dataset.description} (filtered: {variable_name}={variable_value})", + filepath=dataset.filepath, + year=dataset.year, + is_output_dataset=dataset.is_output_dataset, + data=UKYearData( + person=MicroDataFrame( + filtered_person.reset_index(drop=True), + weights="person_weight", + ), + benunit=MicroDataFrame( + filtered_benunit.reset_index(drop=True), + weights="benunit_weight", + ), + household=MicroDataFrame( + filtered_household.reset_index(drop=True), + weights="household_weight", + ), + ), + ) + def run(self, simulation: "Simulation") -> "Simulation": from policyengine_uk import Microsimulation from policyengine_uk.data import UKSingleYearDataset @@ -194,6 +331,13 @@ def run(self, simulation: "Simulation") -> "Simulation": dataset = simulation.dataset dataset.load() + + # Apply regional filtering if specified + if simulation.filter_field and simulation.filter_value: + dataset = self._filter_dataset_by_household_variable( + dataset, simulation.filter_field, simulation.filter_value + ) + input_data = UKSingleYearDataset( person=dataset.data.person, benunit=dataset.data.benunit, diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index b5191a19..1860c5c0 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -175,6 +175,189 @@ def __init__(self, **kwargs: dict): ) self.add_parameter(parameter) + def _build_entity_relationships( + self, dataset: PolicyEngineUSDataset + ) -> pd.DataFrame: + """Build a DataFrame mapping each person to their containing entities. + + Creates an explicit relationship map between persons and all entity + types (household, tax_unit, spm_unit, family, marital_unit). This + enables filtering at any entity level while preserving the integrity + of all related entities. + + Args: + dataset: The dataset to extract relationships from. + + Returns: + A DataFrame indexed by person with columns for each entity ID. + """ + person_data = pd.DataFrame(dataset.data.person) + + # Determine column naming convention + household_id_col = ( + "person_household_id" + if "person_household_id" in person_data.columns + else "household_id" + ) + tax_unit_id_col = ( + "person_tax_unit_id" + if "person_tax_unit_id" in person_data.columns + else "tax_unit_id" + ) + spm_unit_id_col = ( + "person_spm_unit_id" + if "person_spm_unit_id" in person_data.columns + else "spm_unit_id" + ) + family_id_col = ( + "person_family_id" + if "person_family_id" in person_data.columns + else "family_id" + ) + marital_unit_id_col = ( + "person_marital_unit_id" + if "person_marital_unit_id" in person_data.columns + else "marital_unit_id" + ) + + entity_rel = pd.DataFrame( + { + "person_id": person_data["person_id"].values, + "household_id": person_data[household_id_col].values, + "tax_unit_id": person_data[tax_unit_id_col].values, + "spm_unit_id": person_data[spm_unit_id_col].values, + "family_id": person_data[family_id_col].values, + "marital_unit_id": person_data[marital_unit_id_col].values, + } + ) + + return entity_rel + + def _filter_dataset_by_household_variable( + self, + dataset: PolicyEngineUSDataset, + variable_name: str, + variable_value: str, + ) -> PolicyEngineUSDataset: + """Filter a dataset to only include households where a variable matches. + + Uses the entity relationship approach: builds an explicit map of all + entity relationships, filters at the household level, and keeps all + persons in matching households to preserve entity integrity. + + Args: + dataset: The dataset to filter. + variable_name: The name of the household-level variable to filter on. + variable_value: The value to match. Handles both str and bytes encoding. + + Returns: + A new filtered dataset containing only matching households. + """ + # Build entity relationships + entity_rel = self._build_entity_relationships(dataset) + + # Get household-level variable values + household_data = pd.DataFrame(dataset.data.household) + + if variable_name not in household_data.columns: + raise ValueError( + f"Variable '{variable_name}' not found in household data. " + f"Available columns: {list(household_data.columns)}" + ) + + hh_values = household_data[variable_name].values + hh_ids = household_data["household_id"].values + + # Create mask for matching households, handling bytes encoding + if isinstance(variable_value, str): + hh_mask = (hh_values == variable_value) | ( + hh_values == variable_value.encode() + ) + else: + hh_mask = hh_values == variable_value + + matching_hh_ids = set(hh_ids[hh_mask]) + + if len(matching_hh_ids) == 0: + raise ValueError( + f"No households found matching {variable_name}={variable_value}" + ) + + # Filter entity_rel to persons in matching households + person_mask = entity_rel["household_id"].isin(matching_hh_ids) + filtered_entity_rel = entity_rel[person_mask] + + # Get the filtered entity IDs + filtered_person_ids = set(filtered_entity_rel["person_id"]) + filtered_household_ids = matching_hh_ids + filtered_tax_unit_ids = set(filtered_entity_rel["tax_unit_id"]) + filtered_spm_unit_ids = set(filtered_entity_rel["spm_unit_id"]) + filtered_family_ids = set(filtered_entity_rel["family_id"]) + filtered_marital_unit_ids = set(filtered_entity_rel["marital_unit_id"]) + + # Filter each entity DataFrame + person_df = pd.DataFrame(dataset.data.person) + household_df = pd.DataFrame(dataset.data.household) + tax_unit_df = pd.DataFrame(dataset.data.tax_unit) + spm_unit_df = pd.DataFrame(dataset.data.spm_unit) + family_df = pd.DataFrame(dataset.data.family) + marital_unit_df = pd.DataFrame(dataset.data.marital_unit) + + filtered_person = person_df[ + person_df["person_id"].isin(filtered_person_ids) + ] + filtered_household = household_df[ + household_df["household_id"].isin(filtered_household_ids) + ] + filtered_tax_unit = tax_unit_df[ + tax_unit_df["tax_unit_id"].isin(filtered_tax_unit_ids) + ] + filtered_spm_unit = spm_unit_df[ + spm_unit_df["spm_unit_id"].isin(filtered_spm_unit_ids) + ] + filtered_family = family_df[ + family_df["family_id"].isin(filtered_family_ids) + ] + filtered_marital_unit = marital_unit_df[ + marital_unit_df["marital_unit_id"].isin(filtered_marital_unit_ids) + ] + + # Create filtered dataset + return PolicyEngineUSDataset( + id=dataset.id + f"_filtered_{variable_name}_{variable_value}", + name=dataset.name, + description=f"{dataset.description} (filtered: {variable_name}={variable_value})", + filepath=dataset.filepath, + year=dataset.year, + is_output_dataset=dataset.is_output_dataset, + data=USYearData( + person=MicroDataFrame( + filtered_person.reset_index(drop=True), + weights="person_weight", + ), + household=MicroDataFrame( + filtered_household.reset_index(drop=True), + weights="household_weight", + ), + tax_unit=MicroDataFrame( + filtered_tax_unit.reset_index(drop=True), + weights="tax_unit_weight", + ), + spm_unit=MicroDataFrame( + filtered_spm_unit.reset_index(drop=True), + weights="spm_unit_weight", + ), + family=MicroDataFrame( + filtered_family.reset_index(drop=True), + weights="family_weight", + ), + marital_unit=MicroDataFrame( + filtered_marital_unit.reset_index(drop=True), + weights="marital_unit_weight", + ), + ), + ) + def run(self, simulation: "Simulation") -> "Simulation": from policyengine_us import Microsimulation from policyengine_us.system import system @@ -188,6 +371,12 @@ def run(self, simulation: "Simulation") -> "Simulation": dataset = simulation.dataset dataset.load() + # Apply regional filtering if specified + if simulation.filter_field and simulation.filter_value: + dataset = self._filter_dataset_by_household_variable( + dataset, simulation.filter_field, simulation.filter_value + ) + # Build simulation from entity IDs using PolicyEngine Core pattern microsim = Microsimulation() self._build_simulation_from_dataset(microsim, dataset, system) From ab42f8a4baca4e189957d923fa4a1b9a8e60fce5 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 10 Feb 2026 20:11:28 +0100 Subject: [PATCH 5/8] test: Add unit tests for dataset filtering functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add filtering_fixtures.py with US and UK test datasets - Add 18 unit tests for _build_entity_relationships and _filter_dataset_by_household_variable methods - Tests follow given-when-then pattern - All tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/conftest.py | 4 + tests/fixtures/filtering_fixtures.py | 165 ++++++++++ tests/test_filtering.py | 439 +++++++++++++++++++++++++++ 3 files changed, 608 insertions(+) create mode 100644 tests/fixtures/filtering_fixtures.py create mode 100644 tests/test_filtering.py diff --git a/tests/conftest.py b/tests/conftest.py index 40c6f69b..a54a3d79 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,10 @@ """Pytest configuration and shared fixtures.""" # Import fixtures from fixtures module so pytest can discover them +from tests.fixtures.filtering_fixtures import ( # noqa: F401 + uk_test_dataset, + us_test_dataset, +) from tests.fixtures.region_fixtures import ( # noqa: F401 empty_registry, sample_registry, diff --git a/tests/fixtures/filtering_fixtures.py b/tests/fixtures/filtering_fixtures.py new file mode 100644 index 00000000..074f6655 --- /dev/null +++ b/tests/fixtures/filtering_fixtures.py @@ -0,0 +1,165 @@ +"""Fixtures for testing dataset filtering functionality.""" + +import pandas as pd +import pytest +from microdf import MicroDataFrame + +from policyengine.tax_benefit_models.uk.datasets import ( + PolicyEngineUKDataset, + UKYearData, +) +from policyengine.tax_benefit_models.us.datasets import ( + PolicyEngineUSDataset, + USYearData, +) + + +def create_us_test_dataset() -> PolicyEngineUSDataset: + """Create a minimal US dataset for filtering tests. + + Creates a dataset with 6 persons across 3 households: + - Household 1 (place_fips="44000"): 2 persons + - Household 2 (place_fips="44000"): 2 persons + - Household 3 (place_fips="57000"): 2 persons + """ + # Person data - 6 persons across 3 households + person_data = pd.DataFrame( + { + "person_id": [1, 2, 3, 4, 5, 6], + "household_id": [1, 1, 2, 2, 3, 3], + "tax_unit_id": [1, 1, 2, 2, 3, 3], + "spm_unit_id": [1, 1, 2, 2, 3, 3], + "family_id": [1, 1, 2, 2, 3, 3], + "marital_unit_id": [1, 1, 2, 2, 3, 3], + "person_weight": [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + "age": [35, 30, 45, 40, 25, 28], + } + ) + + # Household data - 3 households, 2 in place 44000, 1 in place 57000 + household_data = pd.DataFrame( + { + "household_id": [1, 2, 3], + "household_weight": [1000.0, 1000.0, 1000.0], + "place_fips": ["44000", "44000", "57000"], + "state_fips": [6, 6, 34], # CA, CA, NJ + } + ) + + # Tax unit data + tax_unit_data = pd.DataFrame( + { + "tax_unit_id": [1, 2, 3], + "tax_unit_weight": [1000.0, 1000.0, 1000.0], + } + ) + + # SPM unit data + spm_unit_data = pd.DataFrame( + { + "spm_unit_id": [1, 2, 3], + "spm_unit_weight": [1000.0, 1000.0, 1000.0], + } + ) + + # Family data + family_data = pd.DataFrame( + { + "family_id": [1, 2, 3], + "family_weight": [1000.0, 1000.0, 1000.0], + } + ) + + # Marital unit data + marital_unit_data = pd.DataFrame( + { + "marital_unit_id": [1, 2, 3], + "marital_unit_weight": [1000.0, 1000.0, 1000.0], + } + ) + + return PolicyEngineUSDataset( + id="test_us_dataset", + name="Test US Dataset", + description="Test dataset for filtering", + filepath="/tmp/test_us.h5", + year=2024, + is_output_dataset=False, + data=USYearData( + person=MicroDataFrame(person_data, weights="person_weight"), + household=MicroDataFrame( + household_data, weights="household_weight" + ), + tax_unit=MicroDataFrame(tax_unit_data, weights="tax_unit_weight"), + spm_unit=MicroDataFrame(spm_unit_data, weights="spm_unit_weight"), + family=MicroDataFrame(family_data, weights="family_weight"), + marital_unit=MicroDataFrame( + marital_unit_data, weights="marital_unit_weight" + ), + ), + ) + + +def create_uk_test_dataset() -> PolicyEngineUKDataset: + """Create a minimal UK dataset for filtering tests. + + Creates a dataset with 6 persons across 3 households: + - Household 1 (country="ENGLAND"): 2 persons + - Household 2 (country="ENGLAND"): 2 persons + - Household 3 (country="SCOTLAND"): 2 persons + """ + # Person data - 6 persons across 3 households + person_data = pd.DataFrame( + { + "person_id": [1, 2, 3, 4, 5, 6], + "benunit_id": [1, 1, 2, 2, 3, 3], + "household_id": [1, 1, 2, 2, 3, 3], + "person_weight": [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + "age": [35, 30, 45, 40, 25, 28], + } + ) + + # Benunit data + benunit_data = pd.DataFrame( + { + "benunit_id": [1, 2, 3], + "benunit_weight": [1000.0, 1000.0, 1000.0], + } + ) + + # Household data - 3 households, 2 in England, 1 in Scotland + household_data = pd.DataFrame( + { + "household_id": [1, 2, 3], + "household_weight": [1000.0, 1000.0, 1000.0], + "country": ["ENGLAND", "ENGLAND", "SCOTLAND"], + } + ) + + return PolicyEngineUKDataset( + id="test_uk_dataset", + name="Test UK Dataset", + description="Test dataset for filtering", + filepath="/tmp/test_uk.h5", + year=2024, + is_output_dataset=False, + data=UKYearData( + person=MicroDataFrame(person_data, weights="person_weight"), + benunit=MicroDataFrame(benunit_data, weights="benunit_weight"), + household=MicroDataFrame( + household_data, weights="household_weight" + ), + ), + ) + + +@pytest.fixture +def us_test_dataset() -> PolicyEngineUSDataset: + """Pytest fixture for US test dataset.""" + return create_us_test_dataset() + + +@pytest.fixture +def uk_test_dataset() -> PolicyEngineUKDataset: + """Pytest fixture for UK test dataset.""" + return create_uk_test_dataset() diff --git a/tests/test_filtering.py b/tests/test_filtering.py new file mode 100644 index 00000000..54c5c9af --- /dev/null +++ b/tests/test_filtering.py @@ -0,0 +1,439 @@ +"""Tests for dataset filtering functionality. + +Tests the _build_entity_relationships and _filter_dataset_by_household_variable +methods in both US and UK models. +""" + +import pandas as pd +import pytest + +from policyengine.core.simulation import Simulation + + +class TestSimulationFilterParameters: + """Tests for Simulation filter_field and filter_value parameters.""" + + def test__given_no_filter_params__then_simulation_has_none_values(self): + """Given: Simulation created without filter parameters + When: Accessing filter_field and filter_value + Then: Both are None + """ + # When + simulation = Simulation() + + # Then + assert simulation.filter_field is None + assert simulation.filter_value is None + + def test__given_filter_params__then_simulation_stores_them(self): + """Given: Simulation created with filter parameters + When: Accessing filter_field and filter_value + Then: Values are stored correctly + """ + # When + simulation = Simulation( + filter_field="place_fips", + filter_value="44000", + ) + + # Then + assert simulation.filter_field == "place_fips" + assert simulation.filter_value == "44000" + + +class TestUSBuildEntityRelationships: + """Tests for US model _build_entity_relationships method.""" + + def test__given_us_dataset__then_entity_relationships_has_all_columns( + self, us_test_dataset + ): + """Given: US dataset with persons and entities + When: Building entity relationships + Then: DataFrame has all entity ID columns + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + entity_rel = model._build_entity_relationships(us_test_dataset) + + # Then + expected_columns = { + "person_id", + "household_id", + "tax_unit_id", + "spm_unit_id", + "family_id", + "marital_unit_id", + } + assert set(entity_rel.columns) == expected_columns + + def test__given_us_dataset__then_entity_relationships_has_correct_row_count( + self, us_test_dataset + ): + """Given: US dataset with 6 persons + When: Building entity relationships + Then: DataFrame has 6 rows (one per person) + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + entity_rel = model._build_entity_relationships(us_test_dataset) + + # Then + assert len(entity_rel) == 6 + + def test__given_us_dataset__then_entity_relationships_preserves_mappings( + self, us_test_dataset + ): + """Given: US dataset where persons 1,2 belong to household 1 + When: Building entity relationships + Then: Mappings are preserved correctly + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + entity_rel = model._build_entity_relationships(us_test_dataset) + + # Then + person_1_row = entity_rel[entity_rel["person_id"] == 1].iloc[0] + assert person_1_row["household_id"] == 1 + assert person_1_row["tax_unit_id"] == 1 + + +class TestUSFilterDatasetByHouseholdVariable: + """Tests for US model _filter_dataset_by_household_variable method.""" + + def test__given_filter_by_place_fips__then_returns_matching_households( + self, us_test_dataset + ): + """Given: US dataset with households in places 44000 and 57000 + When: Filtering by place_fips=44000 + Then: Returns only households in place 44000 + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + us_test_dataset, + variable_name="place_fips", + variable_value="44000", + ) + + # Then + household_df = pd.DataFrame(filtered.data.household) + assert len(household_df) == 2 + assert all(household_df["place_fips"] == "44000") + + def test__given_filter_by_place_fips__then_preserves_related_persons( + self, us_test_dataset + ): + """Given: US dataset with 4 persons in place 44000 + When: Filtering by place_fips=44000 + Then: Returns all 4 persons in matching households + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + us_test_dataset, + variable_name="place_fips", + variable_value="44000", + ) + + # Then + person_df = pd.DataFrame(filtered.data.person) + assert len(person_df) == 4 + assert set(person_df["person_id"]) == {1, 2, 3, 4} + + def test__given_filter_by_place_fips__then_preserves_related_entities( + self, us_test_dataset + ): + """Given: US dataset with 2 tax units in place 44000 + When: Filtering by place_fips=44000 + Then: Returns all related entities (tax_unit, spm_unit, etc.) + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + us_test_dataset, + variable_name="place_fips", + variable_value="44000", + ) + + # Then + assert len(pd.DataFrame(filtered.data.tax_unit)) == 2 + assert len(pd.DataFrame(filtered.data.spm_unit)) == 2 + assert len(pd.DataFrame(filtered.data.family)) == 2 + assert len(pd.DataFrame(filtered.data.marital_unit)) == 2 + + def test__given_no_matching_households__then_raises_value_error( + self, us_test_dataset + ): + """Given: US dataset with no households matching filter + When: Filtering by place_fips=99999 + Then: Raises ValueError + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # Then + with pytest.raises(ValueError, match="No households found"): + model._filter_dataset_by_household_variable( + us_test_dataset, + variable_name="place_fips", + variable_value="99999", + ) + + def test__given_invalid_variable_name__then_raises_value_error( + self, us_test_dataset + ): + """Given: US dataset + When: Filtering by non-existent variable + Then: Raises ValueError with helpful message + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # Then + with pytest.raises(ValueError, match="not found in household data"): + model._filter_dataset_by_household_variable( + us_test_dataset, + variable_name="nonexistent_var", + variable_value="value", + ) + + def test__given_filtered_dataset__then_has_updated_metadata( + self, us_test_dataset + ): + """Given: US dataset + When: Filtering by place_fips + Then: Filtered dataset has updated id and description + """ + # Given + from policyengine.tax_benefit_models.us.model import ( + PolicyEngineUSLatest, + ) + + model = PolicyEngineUSLatest.__new__(PolicyEngineUSLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + us_test_dataset, + variable_name="place_fips", + variable_value="44000", + ) + + # Then + assert "filtered" in filtered.id + assert "place_fips=44000" in filtered.description + + +class TestUKBuildEntityRelationships: + """Tests for UK model _build_entity_relationships method.""" + + def test__given_uk_dataset__then_entity_relationships_has_all_columns( + self, uk_test_dataset + ): + """Given: UK dataset with persons and entities + When: Building entity relationships + Then: DataFrame has all entity ID columns + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # When + entity_rel = model._build_entity_relationships(uk_test_dataset) + + # Then + expected_columns = {"person_id", "benunit_id", "household_id"} + assert set(entity_rel.columns) == expected_columns + + def test__given_uk_dataset__then_entity_relationships_has_correct_row_count( + self, uk_test_dataset + ): + """Given: UK dataset with 6 persons + When: Building entity relationships + Then: DataFrame has 6 rows (one per person) + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # When + entity_rel = model._build_entity_relationships(uk_test_dataset) + + # Then + assert len(entity_rel) == 6 + + +class TestUKFilterDatasetByHouseholdVariable: + """Tests for UK model _filter_dataset_by_household_variable method.""" + + def test__given_filter_by_country__then_returns_matching_households( + self, uk_test_dataset + ): + """Given: UK dataset with households in England and Scotland + When: Filtering by country=ENGLAND + Then: Returns only households in England + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + uk_test_dataset, + variable_name="country", + variable_value="ENGLAND", + ) + + # Then + household_df = pd.DataFrame(filtered.data.household) + assert len(household_df) == 2 + assert all(household_df["country"] == "ENGLAND") + + def test__given_filter_by_country__then_preserves_related_persons( + self, uk_test_dataset + ): + """Given: UK dataset with 4 persons in England + When: Filtering by country=ENGLAND + Then: Returns all 4 persons in matching households + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + uk_test_dataset, + variable_name="country", + variable_value="ENGLAND", + ) + + # Then + person_df = pd.DataFrame(filtered.data.person) + assert len(person_df) == 4 + assert set(person_df["person_id"]) == {1, 2, 3, 4} + + def test__given_filter_by_country__then_preserves_related_benunits( + self, uk_test_dataset + ): + """Given: UK dataset with 2 benunits in England + When: Filtering by country=ENGLAND + Then: Returns all related benunits + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + uk_test_dataset, + variable_name="country", + variable_value="ENGLAND", + ) + + # Then + assert len(pd.DataFrame(filtered.data.benunit)) == 2 + + def test__given_no_matching_households__then_raises_value_error( + self, uk_test_dataset + ): + """Given: UK dataset with no households matching filter + When: Filtering by country=WALES + Then: Raises ValueError + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # Then + with pytest.raises(ValueError, match="No households found"): + model._filter_dataset_by_household_variable( + uk_test_dataset, + variable_name="country", + variable_value="WALES", + ) + + def test__given_filtered_dataset__then_has_updated_metadata( + self, uk_test_dataset + ): + """Given: UK dataset + When: Filtering by country + Then: Filtered dataset has updated id and description + """ + # Given + from policyengine.tax_benefit_models.uk.model import ( + PolicyEngineUKLatest, + ) + + model = PolicyEngineUKLatest.__new__(PolicyEngineUKLatest) + + # When + filtered = model._filter_dataset_by_household_variable( + uk_test_dataset, + variable_name="country", + variable_value="ENGLAND", + ) + + # Then + assert "filtered" in filtered.id + assert "country=ENGLAND" in filtered.description From 096ffeeaac20db964bdca04fb062520a19b6f61b Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 11 Feb 2026 21:26:35 +0100 Subject: [PATCH 6/8] fix: Apply US reforms at Microsimulation construction time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The US country package uses a shared singleton TaxBenefitSystem, which means p.update() after Microsimulation construction has no effect on calculations. This fix: - Adds reform_dict_from_parameter_values() utility to convert ParameterValue objects to the dict format accepted by Microsimulation - Updates US model.py to build reform dict and pass it at construction time instead of using simulation_modifier (p.update) after - Adds comprehensive unit tests for the utility function and US reform application The UK model continues to use p.update() since policyengine-uk was refactored to give each simulation its own TaxBenefitSystem instance. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../tax_benefit_models/us/model.py | 72 +++-- src/policyengine/utils/parametric_reforms.py | 40 +++ tests/conftest.py | 13 + tests/fixtures/parametric_reforms_fixtures.py | 136 +++++++++ tests/fixtures/us_reform_fixtures.py | 124 +++++++++ tests/test_parametric_reforms.py | 262 ++++++++++++++++++ tests/test_us_reform_application.py | 155 +++++++++++ 7 files changed, 775 insertions(+), 27 deletions(-) create mode 100644 tests/fixtures/parametric_reforms_fixtures.py create mode 100644 tests/fixtures/us_reform_fixtures.py create mode 100644 tests/test_parametric_reforms.py create mode 100644 tests/test_us_reform_application.py diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index 1860c5c0..0db59259 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -363,7 +363,7 @@ def run(self, simulation: "Simulation") -> "Simulation": from policyengine_us.system import system from policyengine.utils.parametric_reforms import ( - simulation_modifier_from_parameter_values, + reform_dict_from_parameter_values, ) assert isinstance(simulation.dataset, PolicyEngineUSDataset) @@ -377,33 +377,51 @@ def run(self, simulation: "Simulation") -> "Simulation": dataset, simulation.filter_field, simulation.filter_value ) - # Build simulation from entity IDs using PolicyEngine Core pattern - microsim = Microsimulation() - self._build_simulation_from_dataset(microsim, dataset, system) + # Build reform dict from policy and dynamic parameter values + # US requires reforms to be passed at Microsimulation construction time + # (unlike UK which supports p.update() after construction) + reform_dict = None + + # Collect policy reforms + if simulation.policy: + if simulation.policy.simulation_modifier is not None: + # Custom simulation modifier - extract parameter values if available + # Fall back to parameter_values if no custom modifier logic needed + if simulation.policy.parameter_values: + reform_dict = reform_dict_from_parameter_values( + simulation.policy.parameter_values + ) + elif simulation.policy.parameter_values: + reform_dict = reform_dict_from_parameter_values( + simulation.policy.parameter_values + ) - # Apply policy reforms - if ( - simulation.policy - and simulation.policy.simulation_modifier is not None - ): - simulation.policy.simulation_modifier(microsim) - elif simulation.policy: - modifier = simulation_modifier_from_parameter_values( - simulation.policy.parameter_values - ) - modifier(microsim) - - # Apply dynamic reforms - if ( - simulation.dynamic - and simulation.dynamic.simulation_modifier is not None - ): - simulation.dynamic.simulation_modifier(microsim) - elif simulation.dynamic: - modifier = simulation_modifier_from_parameter_values( - simulation.dynamic.parameter_values - ) - modifier(microsim) + # Merge dynamic reforms into reform_dict + if simulation.dynamic: + dynamic_reform = None + if simulation.dynamic.simulation_modifier is not None: + if simulation.dynamic.parameter_values: + dynamic_reform = reform_dict_from_parameter_values( + simulation.dynamic.parameter_values + ) + elif simulation.dynamic.parameter_values: + dynamic_reform = reform_dict_from_parameter_values( + simulation.dynamic.parameter_values + ) + + if dynamic_reform: + if reform_dict is None: + reform_dict = dynamic_reform + else: + # Merge dynamic reforms into policy reforms + for param_name, period_values in dynamic_reform.items(): + if param_name not in reform_dict: + reform_dict[param_name] = {} + reform_dict[param_name].update(period_values) + + # Create Microsimulation with reform at construction time + microsim = Microsimulation(reform=reform_dict) + self._build_simulation_from_dataset(microsim, dataset, system) data = { "person": pd.DataFrame(), diff --git a/src/policyengine/utils/parametric_reforms.py b/src/policyengine/utils/parametric_reforms.py index 7d7a869a..7a9494a5 100644 --- a/src/policyengine/utils/parametric_reforms.py +++ b/src/policyengine/utils/parametric_reforms.py @@ -5,6 +5,46 @@ from policyengine.core import ParameterValue +def reform_dict_from_parameter_values( + parameter_values: list[ParameterValue], +) -> dict: + """ + Convert a list of ParameterValue objects to a reform dict format. + + This format is accepted by policyengine_us.Microsimulation(reform=...) and + policyengine_uk.Microsimulation(reform=...) at construction time. + + Args: + parameter_values: List of ParameterValue objects to convert. + + Returns: + A dict mapping parameter names to period-value dicts, e.g.: + { + "gov.irs.deductions.standard.amount.SINGLE": { + "2024-01-01": 29200 + } + } + """ + if not parameter_values: + return None + + reform_dict = {} + for pv in parameter_values: + param_name = pv.parameter.name + if param_name not in reform_dict: + reform_dict[param_name] = {} + + # Format the period string + period_str = pv.start_date.strftime("%Y-%m-%d") + if pv.end_date: + # Use period range format: "start.end" + period_str = f"{period_str}.{pv.end_date.strftime('%Y-%m-%d')}" + + reform_dict[param_name][period_str] = pv.value + + return reform_dict + + def simulation_modifier_from_parameter_values( parameter_values: list[ParameterValue], ) -> Callable: diff --git a/tests/conftest.py b/tests/conftest.py index a54a3d79..75e2b55c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,20 @@ uk_test_dataset, us_test_dataset, ) +from tests.fixtures.parametric_reforms_fixtures import ( # noqa: F401 + mock_param_joint, + mock_param_single, + multi_period_param_values, + multiple_different_params, + param_value_with_end_date, + single_param_value, +) from tests.fixtures.region_fixtures import ( # noqa: F401 empty_registry, sample_registry, ) +from tests.fixtures.us_reform_fixtures import ( # noqa: F401 + double_standard_deduction_policy, + high_income_single_filer, + married_couple_with_kids, +) diff --git a/tests/fixtures/parametric_reforms_fixtures.py b/tests/fixtures/parametric_reforms_fixtures.py new file mode 100644 index 00000000..98bc7aa2 --- /dev/null +++ b/tests/fixtures/parametric_reforms_fixtures.py @@ -0,0 +1,136 @@ +"""Fixtures for parametric reforms tests.""" + +from datetime import date +from unittest.mock import MagicMock + +import pytest + +from policyengine.core import Parameter, ParameterValue + + +def create_mock_parameter( + name: str = "gov.test.param", + label: str = "Test Parameter", +) -> Parameter: + """Create a mock Parameter for testing.""" + param = MagicMock(spec=Parameter) + param.name = name + param.label = label + return param + + +def create_parameter_value( + parameter: Parameter, + value: float, + start_date: date, + end_date: date | None = None, +) -> ParameterValue: + """Create a ParameterValue for testing.""" + return ParameterValue( + parameter=parameter, + value=value, + start_date=start_date, + end_date=end_date, + ) + + +# Pre-built fixtures for common test scenarios + +MOCK_PARAM_SINGLE = create_mock_parameter( + name="gov.irs.deductions.standard.amount.SINGLE", + label="Standard Deduction (Single)", +) + +MOCK_PARAM_JOINT = create_mock_parameter( + name="gov.irs.deductions.standard.amount.JOINT", + label="Standard Deduction (Joint)", +) + +MOCK_PARAM_TAX_RATE = create_mock_parameter( + name="gov.irs.income_tax.rates.bracket_1.rate", + label="Tax Rate Bracket 1", +) + +# Single parameter value +SINGLE_PARAM_VALUE = create_parameter_value( + parameter=MOCK_PARAM_SINGLE, + value=29200, + start_date=date(2024, 1, 1), +) + +# Parameter value with end date +PARAM_VALUE_WITH_END_DATE = create_parameter_value( + parameter=MOCK_PARAM_SINGLE, + value=29200, + start_date=date(2024, 1, 1), + end_date=date(2024, 12, 31), +) + +# Multiple parameter values for the same parameter (different periods) +MULTI_PERIOD_PARAM_VALUES = [ + create_parameter_value( + parameter=MOCK_PARAM_SINGLE, + value=29200, + start_date=date(2024, 1, 1), + ), + create_parameter_value( + parameter=MOCK_PARAM_SINGLE, + value=30000, + start_date=date(2025, 1, 1), + ), +] + +# Multiple different parameters +MULTIPLE_DIFFERENT_PARAMS = [ + create_parameter_value( + parameter=MOCK_PARAM_SINGLE, + value=29200, + start_date=date(2024, 1, 1), + ), + create_parameter_value( + parameter=MOCK_PARAM_JOINT, + value=58400, + start_date=date(2024, 1, 1), + ), + create_parameter_value( + parameter=MOCK_PARAM_TAX_RATE, + value=0.10, + start_date=date(2024, 1, 1), + ), +] + + +@pytest.fixture +def mock_param_single(): + """Pytest fixture for a mock single filer parameter.""" + return MOCK_PARAM_SINGLE + + +@pytest.fixture +def mock_param_joint(): + """Pytest fixture for a mock joint filer parameter.""" + return MOCK_PARAM_JOINT + + +@pytest.fixture +def single_param_value(): + """Pytest fixture for a single parameter value.""" + return SINGLE_PARAM_VALUE + + +@pytest.fixture +def param_value_with_end_date(): + """Pytest fixture for a parameter value with end date.""" + return PARAM_VALUE_WITH_END_DATE + + +@pytest.fixture +def multi_period_param_values(): + """Pytest fixture for multiple values of the same parameter.""" + return MULTI_PERIOD_PARAM_VALUES + + +@pytest.fixture +def multiple_different_params(): + """Pytest fixture for multiple different parameters.""" + return MULTIPLE_DIFFERENT_PARAMS diff --git a/tests/fixtures/us_reform_fixtures.py b/tests/fixtures/us_reform_fixtures.py new file mode 100644 index 00000000..c52a7aba --- /dev/null +++ b/tests/fixtures/us_reform_fixtures.py @@ -0,0 +1,124 @@ +"""Fixtures for US reform application tests.""" + +from datetime import date + +import pytest + +from policyengine.core import ParameterValue, Policy +from policyengine.tax_benefit_models.us import USHouseholdInput, us_latest + + +def create_standard_deduction_policy( + single_value: float = 29200, + joint_value: float = 58400, + year: int = 2024, +) -> Policy: + """Create a policy that sets standard deduction values.""" + std_deduction_single = us_latest.get_parameter( + "gov.irs.deductions.standard.amount.SINGLE" + ) + std_deduction_joint = us_latest.get_parameter( + "gov.irs.deductions.standard.amount.JOINT" + ) + + return Policy( + name=f"Standard Deduction: ${single_value:,.0f} single, ${joint_value:,.0f} joint", + parameter_values=[ + ParameterValue( + parameter=std_deduction_single, + value=single_value, + start_date=date(year, 1, 1), + ), + ParameterValue( + parameter=std_deduction_joint, + value=joint_value, + start_date=date(year, 1, 1), + ), + ], + ) + + +# Pre-built policy fixtures + +DOUBLE_STANDARD_DEDUCTION_POLICY = create_standard_deduction_policy( + single_value=14600 * 2, # Double from $14,600 to $29,200 + joint_value=29200 * 2, # Double from $29,200 to $58,400 +) + +ZERO_STANDARD_DEDUCTION_POLICY = create_standard_deduction_policy( + single_value=0, + joint_value=0, +) + +LARGE_STANDARD_DEDUCTION_POLICY = create_standard_deduction_policy( + single_value=100000, + joint_value=200000, +) + + +# Pre-built household fixtures + +HIGH_INCOME_SINGLE_FILER = USHouseholdInput( + people=[ + { + "age": 35, + "employment_income": 100000, + "is_tax_unit_head": True, + } + ], + tax_unit={"filing_status": "SINGLE"}, + year=2024, +) + +MODERATE_INCOME_SINGLE_FILER = USHouseholdInput( + people=[ + { + "age": 30, + "employment_income": 50000, + "is_tax_unit_head": True, + } + ], + tax_unit={"filing_status": "SINGLE"}, + year=2024, +) + +MARRIED_COUPLE_WITH_KIDS = USHouseholdInput( + people=[ + {"age": 40, "employment_income": 100000, "is_tax_unit_head": True}, + {"age": 38, "employment_income": 50000, "is_tax_unit_spouse": True}, + {"age": 10}, + {"age": 8}, + ], + tax_unit={"filing_status": "JOINT"}, + year=2024, +) + +LOW_INCOME_FAMILY = USHouseholdInput( + people=[ + {"age": 28, "employment_income": 25000, "is_tax_unit_head": True}, + {"age": 5}, + ], + tax_unit={"filing_status": "HEAD_OF_HOUSEHOLD"}, + year=2024, +) + + +# Pytest fixtures + + +@pytest.fixture +def double_standard_deduction_policy(): + """Pytest fixture for doubled standard deduction policy.""" + return DOUBLE_STANDARD_DEDUCTION_POLICY + + +@pytest.fixture +def high_income_single_filer(): + """Pytest fixture for high income single filer household.""" + return HIGH_INCOME_SINGLE_FILER + + +@pytest.fixture +def married_couple_with_kids(): + """Pytest fixture for married couple with kids household.""" + return MARRIED_COUPLE_WITH_KIDS diff --git a/tests/test_parametric_reforms.py b/tests/test_parametric_reforms.py new file mode 100644 index 00000000..6e328aa3 --- /dev/null +++ b/tests/test_parametric_reforms.py @@ -0,0 +1,262 @@ +"""Tests for parametric reforms utility functions.""" + +from datetime import date + +from policyengine.utils.parametric_reforms import ( + reform_dict_from_parameter_values, + simulation_modifier_from_parameter_values, +) +from tests.fixtures.parametric_reforms_fixtures import ( + MOCK_PARAM_JOINT, + MOCK_PARAM_SINGLE, + MOCK_PARAM_TAX_RATE, + MULTI_PERIOD_PARAM_VALUES, + MULTIPLE_DIFFERENT_PARAMS, + PARAM_VALUE_WITH_END_DATE, + SINGLE_PARAM_VALUE, + create_mock_parameter, + create_parameter_value, +) + + +class TestReformDictFromParameterValues: + """Tests for the reform_dict_from_parameter_values function.""" + + def test__given_none_parameter_values__then_returns_none(self): + """Given: None as parameter_values + When: Calling reform_dict_from_parameter_values + Then: Returns None + """ + # Given + parameter_values = None + + # When + result = reform_dict_from_parameter_values(parameter_values) + + # Then + assert result is None + + def test__given_empty_list__then_returns_none(self): + """Given: Empty list of parameter values + When: Calling reform_dict_from_parameter_values + Then: Returns None + """ + # Given + parameter_values = [] + + # When + result = reform_dict_from_parameter_values(parameter_values) + + # Then + assert result is None + + def test__given_single_parameter_value__then_returns_dict_with_one_entry( + self, + ): + """Given: Single parameter value + When: Calling reform_dict_from_parameter_values + Then: Returns dict with parameter name and period-value mapping + """ + # Given + pv = SINGLE_PARAM_VALUE + + # When + result = reform_dict_from_parameter_values([pv]) + + # Then + assert result is not None + assert MOCK_PARAM_SINGLE.name in result + assert "2024-01-01" in result[MOCK_PARAM_SINGLE.name] + assert result[MOCK_PARAM_SINGLE.name]["2024-01-01"] == 29200 + + def test__given_parameter_value_with_end_date__then_uses_period_range_format( + self, + ): + """Given: Parameter value with start_date and end_date + When: Calling reform_dict_from_parameter_values + Then: Returns dict with period range format "start.end" + """ + # Given + pv = PARAM_VALUE_WITH_END_DATE + + # When + result = reform_dict_from_parameter_values([pv]) + + # Then + assert result is not None + param_name = MOCK_PARAM_SINGLE.name + assert param_name in result + # Should use "start.end" format + assert "2024-01-01.2024-12-31" in result[param_name] + assert result[param_name]["2024-01-01.2024-12-31"] == 29200 + + def test__given_multiple_periods_same_parameter__then_includes_all_periods( + self, + ): + """Given: Multiple parameter values for same parameter (different periods) + When: Calling reform_dict_from_parameter_values + Then: Returns dict with all periods for that parameter + """ + # Given + param_values = MULTI_PERIOD_PARAM_VALUES + + # When + result = reform_dict_from_parameter_values(param_values) + + # Then + assert result is not None + param_name = MOCK_PARAM_SINGLE.name + assert param_name in result + assert len(result[param_name]) == 2 + assert result[param_name]["2024-01-01"] == 29200 + assert result[param_name]["2025-01-01"] == 30000 + + def test__given_multiple_different_parameters__then_includes_all_parameters( + self, + ): + """Given: Multiple parameter values for different parameters + When: Calling reform_dict_from_parameter_values + Then: Returns dict with all parameters + """ + # Given + param_values = MULTIPLE_DIFFERENT_PARAMS + + # When + result = reform_dict_from_parameter_values(param_values) + + # Then + assert result is not None + assert len(result) == 3 + assert MOCK_PARAM_SINGLE.name in result + assert MOCK_PARAM_JOINT.name in result + assert MOCK_PARAM_TAX_RATE.name in result + assert result[MOCK_PARAM_SINGLE.name]["2024-01-01"] == 29200 + assert result[MOCK_PARAM_JOINT.name]["2024-01-01"] == 58400 + assert result[MOCK_PARAM_TAX_RATE.name]["2024-01-01"] == 0.10 + + def test__given_parameter_value__then_preserves_value_type(self): + """Given: Parameter values with different types (int, float) + When: Calling reform_dict_from_parameter_values + Then: Values preserve their original types + """ + # Given + mock_param = create_mock_parameter("gov.test.rate") + pv_float = create_parameter_value( + parameter=mock_param, + value=0.15, + start_date=date(2024, 1, 1), + ) + + # When + result = reform_dict_from_parameter_values([pv_float]) + + # Then + assert result["gov.test.rate"]["2024-01-01"] == 0.15 + assert isinstance(result["gov.test.rate"]["2024-01-01"], float) + + +class TestSimulationModifierFromParameterValues: + """Tests for the simulation_modifier_from_parameter_values function.""" + + def test__given_empty_list__then_returns_callable(self): + """Given: Empty list of parameter values + When: Calling simulation_modifier_from_parameter_values + Then: Returns a callable function + """ + # Given + parameter_values = [] + + # When + result = simulation_modifier_from_parameter_values(parameter_values) + + # Then + assert callable(result) + + def test__given_parameter_values__then_returns_modifier_function(self): + """Given: List of parameter values + When: Calling simulation_modifier_from_parameter_values + Then: Returns a callable modifier function + """ + # Given + param_values = [SINGLE_PARAM_VALUE] + + # When + result = simulation_modifier_from_parameter_values(param_values) + + # Then + assert callable(result) + + def test__given_modifier__then_calls_p_update_for_each_value(self): + """Given: Modifier function from parameter values + When: Calling the modifier with a mock simulation + Then: Calls p.update() for each parameter value + """ + # Given + from unittest.mock import MagicMock + + mock_simulation = MagicMock() + mock_param_node = MagicMock() + mock_simulation.tax_benefit_system.parameters.get_child.return_value = ( + mock_param_node + ) + + param_values = [SINGLE_PARAM_VALUE] + modifier = simulation_modifier_from_parameter_values(param_values) + + # When + modifier(mock_simulation) + + # Then + mock_simulation.tax_benefit_system.parameters.get_child.assert_called_once_with( + MOCK_PARAM_SINGLE.name + ) + mock_param_node.update.assert_called_once() + + def test__given_multiple_values__then_applies_all_updates(self): + """Given: Multiple parameter values + When: Calling the modifier with a mock simulation + Then: Applies updates for all parameter values + """ + # Given + from unittest.mock import MagicMock + + mock_simulation = MagicMock() + mock_param_node = MagicMock() + mock_simulation.tax_benefit_system.parameters.get_child.return_value = ( + mock_param_node + ) + + param_values = MULTIPLE_DIFFERENT_PARAMS + modifier = simulation_modifier_from_parameter_values(param_values) + + # When + modifier(mock_simulation) + + # Then + assert ( + mock_simulation.tax_benefit_system.parameters.get_child.call_count + == 3 + ) + assert mock_param_node.update.call_count == 3 + + def test__given_modifier__then_returns_simulation(self): + """Given: Modifier function + When: Calling with a simulation + Then: Returns the simulation object + """ + # Given + from unittest.mock import MagicMock + + mock_simulation = MagicMock() + mock_param_node = MagicMock() + mock_simulation.tax_benefit_system.parameters.get_child.return_value = ( + mock_param_node + ) + + modifier = simulation_modifier_from_parameter_values([SINGLE_PARAM_VALUE]) + + # When + result = modifier(mock_simulation) + + # Then + assert result is mock_simulation diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py new file mode 100644 index 00000000..d33f615d --- /dev/null +++ b/tests/test_us_reform_application.py @@ -0,0 +1,155 @@ +"""Tests for US reform application via reform_dict at construction time. + +These tests verify that the US model correctly applies reforms by building +a reform dict and passing it to Microsimulation at construction time, +fixing the p.update() bug that exists in the US country package. +""" + +from datetime import date + +from policyengine.core import ParameterValue, Policy +from policyengine.tax_benefit_models.us import ( + USHouseholdInput, + us_latest, +) +from policyengine.tax_benefit_models.us import ( + calculate_household_impact as calculate_us_household_impact, +) +from tests.fixtures.us_reform_fixtures import ( + DOUBLE_STANDARD_DEDUCTION_POLICY, + HIGH_INCOME_SINGLE_FILER, + MARRIED_COUPLE_WITH_KIDS, + create_standard_deduction_policy, +) + + +class TestUSHouseholdReformApplication: + """Tests for US household reform application.""" + + def test__given_baseline_policy__then_returns_baseline_tax(self): + """Given: No policy (baseline) + When: Calculating household impact + Then: Returns baseline tax calculation + """ + # Given + household = HIGH_INCOME_SINGLE_FILER + + # When + result = calculate_us_household_impact(household, policy=None) + + # Then + assert result.tax_unit[0]["income_tax"] > 0 + + def test__given_doubled_standard_deduction__then_tax_is_lower(self): + """Given: Policy that doubles standard deduction + When: Calculating household impact + Then: Income tax is lower than baseline + """ + # Given + household = HIGH_INCOME_SINGLE_FILER + policy = DOUBLE_STANDARD_DEDUCTION_POLICY + + # When + baseline_result = calculate_us_household_impact(household, policy=None) + reform_result = calculate_us_household_impact(household, policy=policy) + + # Then + baseline_tax = baseline_result.tax_unit[0]["income_tax"] + reform_tax = reform_result.tax_unit[0]["income_tax"] + + assert reform_tax < baseline_tax, ( + f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})" + ) + + def test__given_doubled_standard_deduction__then_tax_reduction_is_significant( + self, + ): + """Given: Policy that doubles standard deduction + When: Calculating household impact for high income household + Then: Tax reduction is at least $1000 (significant impact) + """ + # Given + household = HIGH_INCOME_SINGLE_FILER + policy = DOUBLE_STANDARD_DEDUCTION_POLICY + + # When + baseline_result = calculate_us_household_impact(household, policy=None) + reform_result = calculate_us_household_impact(household, policy=policy) + + # Then + baseline_tax = baseline_result.tax_unit[0]["income_tax"] + reform_tax = reform_result.tax_unit[0]["income_tax"] + tax_reduction = baseline_tax - reform_tax + + assert tax_reduction >= 1000, ( + f"Tax reduction ({tax_reduction}) should be at least $1000" + ) + + def test__given_married_couple__then_joint_deduction_affects_tax(self): + """Given: Married couple with doubled joint standard deduction + When: Calculating household impact + Then: Tax is lower than baseline + """ + # Given + household = MARRIED_COUPLE_WITH_KIDS + policy = DOUBLE_STANDARD_DEDUCTION_POLICY + + # When + baseline_result = calculate_us_household_impact(household, policy=None) + reform_result = calculate_us_household_impact(household, policy=policy) + + # Then + baseline_tax = baseline_result.tax_unit[0]["income_tax"] + reform_tax = reform_result.tax_unit[0]["income_tax"] + + assert reform_tax < baseline_tax, ( + f"Reform tax ({reform_tax}) should be less than baseline ({baseline_tax})" + ) + + def test__given_same_policy_twice__then_results_are_deterministic(self): + """Given: Same policy applied twice + When: Calculating household impact + Then: Results are identical (deterministic) + """ + # Given + household = HIGH_INCOME_SINGLE_FILER + policy = DOUBLE_STANDARD_DEDUCTION_POLICY + + # When + result1 = calculate_us_household_impact(household, policy=policy) + result2 = calculate_us_household_impact(household, policy=policy) + + # Then + assert result1.tax_unit[0]["income_tax"] == result2.tax_unit[0]["income_tax"] + + def test__given_custom_deduction_value__then_tax_reflects_value(self): + """Given: Custom standard deduction value + When: Calculating household impact + Then: Tax reflects the custom deduction + """ + # Given + household = HIGH_INCOME_SINGLE_FILER + + # Create policies with different deduction values + small_deduction_policy = create_standard_deduction_policy( + single_value=5000, joint_value=10000 + ) + large_deduction_policy = create_standard_deduction_policy( + single_value=50000, joint_value=100000 + ) + + # When + small_deduction_result = calculate_us_household_impact( + household, policy=small_deduction_policy + ) + large_deduction_result = calculate_us_household_impact( + household, policy=large_deduction_policy + ) + + # Then + small_tax = small_deduction_result.tax_unit[0]["income_tax"] + large_tax = large_deduction_result.tax_unit[0]["income_tax"] + + assert large_tax < small_tax, ( + f"Large deduction tax ({large_tax}) should be less than small deduction ({small_tax})" + ) From 2f52b0337a58ffc47a97235f26d903d4f2074e53 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Mon, 16 Feb 2026 20:40:21 +0100 Subject: [PATCH 7/8] fix: Fix ruff lint errors Remove unused imports in test_us_reform_application.py. Co-Authored-By: Claude Opus 4.6 --- tests/test_us_reform_application.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_us_reform_application.py b/tests/test_us_reform_application.py index d33f615d..42657499 100644 --- a/tests/test_us_reform_application.py +++ b/tests/test_us_reform_application.py @@ -5,13 +5,7 @@ fixing the p.update() bug that exists in the US country package. """ -from datetime import date -from policyengine.core import ParameterValue, Policy -from policyengine.tax_benefit_models.us import ( - USHouseholdInput, - us_latest, -) from policyengine.tax_benefit_models.us import ( calculate_household_impact as calculate_us_household_impact, ) From ee6b115267892849594545f4e3ed3d03e68891e6 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Mon, 16 Feb 2026 23:58:17 +0100 Subject: [PATCH 8/8] refactor: Extract shared entity utilities and decompose reform building Extract duplicated entity relationship and dataset filtering logic from US and UK model.py into shared utils/entity_utils.py. Decompose inline reform dict construction in US run() into single-purpose functions (build_reform_dict, merge_reform_dicts) in utils/parametric_reforms.py. Co-Authored-By: Claude Opus 4.6 --- .../tax_benefit_models/uk/model.py | 133 ++------ .../tax_benefit_models/us/model.py | 232 ++------------ src/policyengine/utils/entity_utils.py | 127 ++++++++ src/policyengine/utils/parametric_reforms.py | 56 ++++ tests/test_entity_utils.py | 295 ++++++++++++++++++ 5 files changed, 525 insertions(+), 318 deletions(-) create mode 100644 src/policyengine/utils/entity_utils.py create mode 100644 tests/test_entity_utils.py diff --git a/src/policyengine/tax_benefit_models/uk/model.py b/src/policyengine/tax_benefit_models/uk/model.py index dedbdc29..88ead217 100644 --- a/src/policyengine/tax_benefit_models/uk/model.py +++ b/src/policyengine/tax_benefit_models/uk/model.py @@ -13,6 +13,10 @@ TaxBenefitModelVersion, Variable, ) +from policyengine.utils.entity_utils import ( + build_entity_relationships, + filter_dataset_by_household_variable, +) from policyengine.utils.parameter_labels import ( build_scale_lookup, generate_label_for_parameter, @@ -23,6 +27,8 @@ if TYPE_CHECKING: from policyengine.core.simulation import Simulation +UK_GROUP_ENTITIES = ["benunit", "household"] + class PolicyEngineUK(TaxBenefitModel): id: str = "policyengine-uk" @@ -185,41 +191,9 @@ def __init__(self, **kwargs: dict): def _build_entity_relationships( self, dataset: PolicyEngineUKDataset ) -> pd.DataFrame: - """Build a DataFrame mapping each person to their containing entities. - - Creates an explicit relationship map between persons and all entity - types (benunit, household). This enables filtering at any entity - level while preserving the integrity of all related entities. - - Args: - dataset: The dataset to extract relationships from. - - Returns: - A DataFrame indexed by person with columns for each entity ID. - """ + """Build a DataFrame mapping each person to their containing entities.""" person_data = pd.DataFrame(dataset.data.person) - - # Determine column naming convention - benunit_id_col = ( - "person_benunit_id" - if "person_benunit_id" in person_data.columns - else "benunit_id" - ) - household_id_col = ( - "person_household_id" - if "person_household_id" in person_data.columns - else "household_id" - ) - - entity_rel = pd.DataFrame( - { - "person_id": person_data["person_id"].values, - "benunit_id": person_data[benunit_id_col].values, - "household_id": person_data[household_id_col].values, - } - ) - - return entity_rel + return build_entity_relationships(person_data, UK_GROUP_ENTITIES) def _filter_dataset_by_household_variable( self, @@ -227,75 +201,13 @@ def _filter_dataset_by_household_variable( variable_name: str, variable_value: str, ) -> PolicyEngineUKDataset: - """Filter a dataset to only include households where a variable matches. - - Uses the entity relationship approach: builds an explicit map of all - entity relationships, filters at the household level, and keeps all - persons in matching households to preserve entity integrity. - - Args: - dataset: The dataset to filter. - variable_name: The name of the household-level variable to filter on. - variable_value: The value to match. Handles both str and bytes encoding. - - Returns: - A new filtered dataset containing only matching households. - """ - # Build entity relationships - entity_rel = self._build_entity_relationships(dataset) - - # Get household-level variable values - household_data = pd.DataFrame(dataset.data.household) - - if variable_name not in household_data.columns: - raise ValueError( - f"Variable '{variable_name}' not found in household data. " - f"Available columns: {list(household_data.columns)}" - ) - - hh_values = household_data[variable_name].values - hh_ids = household_data["household_id"].values - - # Create mask for matching households, handling bytes encoding - if isinstance(variable_value, str): - hh_mask = (hh_values == variable_value) | ( - hh_values == variable_value.encode() - ) - else: - hh_mask = hh_values == variable_value - - matching_hh_ids = set(hh_ids[hh_mask]) - - if len(matching_hh_ids) == 0: - raise ValueError( - f"No households found matching {variable_name}={variable_value}" - ) - - # Filter entity_rel to persons in matching households - person_mask = entity_rel["household_id"].isin(matching_hh_ids) - filtered_entity_rel = entity_rel[person_mask] - - # Get the filtered entity IDs - filtered_person_ids = set(filtered_entity_rel["person_id"]) - filtered_household_ids = matching_hh_ids - filtered_benunit_ids = set(filtered_entity_rel["benunit_id"]) - - # Filter each entity DataFrame - person_df = pd.DataFrame(dataset.data.person) - household_df = pd.DataFrame(dataset.data.household) - benunit_df = pd.DataFrame(dataset.data.benunit) - - filtered_person = person_df[ - person_df["person_id"].isin(filtered_person_ids) - ] - filtered_household = household_df[ - household_df["household_id"].isin(filtered_household_ids) - ] - filtered_benunit = benunit_df[ - benunit_df["benunit_id"].isin(filtered_benunit_ids) - ] - - # Create filtered dataset + """Filter a dataset to only include households where a variable matches.""" + filtered = filter_dataset_by_household_variable( + entity_data=dataset.data.entity_data, + group_entities=UK_GROUP_ENTITIES, + variable_name=variable_name, + variable_value=variable_value, + ) return PolicyEngineUKDataset( id=dataset.id + f"_filtered_{variable_name}_{variable_value}", name=dataset.name, @@ -304,18 +216,9 @@ def _filter_dataset_by_household_variable( year=dataset.year, is_output_dataset=dataset.is_output_dataset, data=UKYearData( - person=MicroDataFrame( - filtered_person.reset_index(drop=True), - weights="person_weight", - ), - benunit=MicroDataFrame( - filtered_benunit.reset_index(drop=True), - weights="benunit_weight", - ), - household=MicroDataFrame( - filtered_household.reset_index(drop=True), - weights="household_weight", - ), + person=filtered["person"], + benunit=filtered["benunit"], + household=filtered["household"], ), ) diff --git a/src/policyengine/tax_benefit_models/us/model.py b/src/policyengine/tax_benefit_models/us/model.py index 0db59259..3c8a5aae 100644 --- a/src/policyengine/tax_benefit_models/us/model.py +++ b/src/policyengine/tax_benefit_models/us/model.py @@ -13,6 +13,10 @@ TaxBenefitModelVersion, Variable, ) +from policyengine.utils.entity_utils import ( + build_entity_relationships, + filter_dataset_by_household_variable, +) from policyengine.utils.parameter_labels import ( build_scale_lookup, generate_label_for_parameter, @@ -23,6 +27,8 @@ if TYPE_CHECKING: from policyengine.core.simulation import Simulation +US_GROUP_ENTITIES = ["household", "tax_unit", "spm_unit", "family", "marital_unit"] + class PolicyEngineUS(TaxBenefitModel): id: str = "policyengine-us" @@ -178,60 +184,9 @@ def __init__(self, **kwargs: dict): def _build_entity_relationships( self, dataset: PolicyEngineUSDataset ) -> pd.DataFrame: - """Build a DataFrame mapping each person to their containing entities. - - Creates an explicit relationship map between persons and all entity - types (household, tax_unit, spm_unit, family, marital_unit). This - enables filtering at any entity level while preserving the integrity - of all related entities. - - Args: - dataset: The dataset to extract relationships from. - - Returns: - A DataFrame indexed by person with columns for each entity ID. - """ + """Build a DataFrame mapping each person to their containing entities.""" person_data = pd.DataFrame(dataset.data.person) - - # Determine column naming convention - household_id_col = ( - "person_household_id" - if "person_household_id" in person_data.columns - else "household_id" - ) - tax_unit_id_col = ( - "person_tax_unit_id" - if "person_tax_unit_id" in person_data.columns - else "tax_unit_id" - ) - spm_unit_id_col = ( - "person_spm_unit_id" - if "person_spm_unit_id" in person_data.columns - else "spm_unit_id" - ) - family_id_col = ( - "person_family_id" - if "person_family_id" in person_data.columns - else "family_id" - ) - marital_unit_id_col = ( - "person_marital_unit_id" - if "person_marital_unit_id" in person_data.columns - else "marital_unit_id" - ) - - entity_rel = pd.DataFrame( - { - "person_id": person_data["person_id"].values, - "household_id": person_data[household_id_col].values, - "tax_unit_id": person_data[tax_unit_id_col].values, - "spm_unit_id": person_data[spm_unit_id_col].values, - "family_id": person_data[family_id_col].values, - "marital_unit_id": person_data[marital_unit_id_col].values, - } - ) - - return entity_rel + return build_entity_relationships(person_data, US_GROUP_ENTITIES) def _filter_dataset_by_household_variable( self, @@ -239,90 +194,13 @@ def _filter_dataset_by_household_variable( variable_name: str, variable_value: str, ) -> PolicyEngineUSDataset: - """Filter a dataset to only include households where a variable matches. - - Uses the entity relationship approach: builds an explicit map of all - entity relationships, filters at the household level, and keeps all - persons in matching households to preserve entity integrity. - - Args: - dataset: The dataset to filter. - variable_name: The name of the household-level variable to filter on. - variable_value: The value to match. Handles both str and bytes encoding. - - Returns: - A new filtered dataset containing only matching households. - """ - # Build entity relationships - entity_rel = self._build_entity_relationships(dataset) - - # Get household-level variable values - household_data = pd.DataFrame(dataset.data.household) - - if variable_name not in household_data.columns: - raise ValueError( - f"Variable '{variable_name}' not found in household data. " - f"Available columns: {list(household_data.columns)}" - ) - - hh_values = household_data[variable_name].values - hh_ids = household_data["household_id"].values - - # Create mask for matching households, handling bytes encoding - if isinstance(variable_value, str): - hh_mask = (hh_values == variable_value) | ( - hh_values == variable_value.encode() - ) - else: - hh_mask = hh_values == variable_value - - matching_hh_ids = set(hh_ids[hh_mask]) - - if len(matching_hh_ids) == 0: - raise ValueError( - f"No households found matching {variable_name}={variable_value}" - ) - - # Filter entity_rel to persons in matching households - person_mask = entity_rel["household_id"].isin(matching_hh_ids) - filtered_entity_rel = entity_rel[person_mask] - - # Get the filtered entity IDs - filtered_person_ids = set(filtered_entity_rel["person_id"]) - filtered_household_ids = matching_hh_ids - filtered_tax_unit_ids = set(filtered_entity_rel["tax_unit_id"]) - filtered_spm_unit_ids = set(filtered_entity_rel["spm_unit_id"]) - filtered_family_ids = set(filtered_entity_rel["family_id"]) - filtered_marital_unit_ids = set(filtered_entity_rel["marital_unit_id"]) - - # Filter each entity DataFrame - person_df = pd.DataFrame(dataset.data.person) - household_df = pd.DataFrame(dataset.data.household) - tax_unit_df = pd.DataFrame(dataset.data.tax_unit) - spm_unit_df = pd.DataFrame(dataset.data.spm_unit) - family_df = pd.DataFrame(dataset.data.family) - marital_unit_df = pd.DataFrame(dataset.data.marital_unit) - - filtered_person = person_df[ - person_df["person_id"].isin(filtered_person_ids) - ] - filtered_household = household_df[ - household_df["household_id"].isin(filtered_household_ids) - ] - filtered_tax_unit = tax_unit_df[ - tax_unit_df["tax_unit_id"].isin(filtered_tax_unit_ids) - ] - filtered_spm_unit = spm_unit_df[ - spm_unit_df["spm_unit_id"].isin(filtered_spm_unit_ids) - ] - filtered_family = family_df[ - family_df["family_id"].isin(filtered_family_ids) - ] - filtered_marital_unit = marital_unit_df[ - marital_unit_df["marital_unit_id"].isin(filtered_marital_unit_ids) - ] - - # Create filtered dataset + """Filter a dataset to only include households where a variable matches.""" + filtered = filter_dataset_by_household_variable( + entity_data=dataset.data.entity_data, + group_entities=US_GROUP_ENTITIES, + variable_name=variable_name, + variable_value=variable_value, + ) return PolicyEngineUSDataset( id=dataset.id + f"_filtered_{variable_name}_{variable_value}", name=dataset.name, @@ -331,30 +209,12 @@ def _filter_dataset_by_household_variable( year=dataset.year, is_output_dataset=dataset.is_output_dataset, data=USYearData( - person=MicroDataFrame( - filtered_person.reset_index(drop=True), - weights="person_weight", - ), - household=MicroDataFrame( - filtered_household.reset_index(drop=True), - weights="household_weight", - ), - tax_unit=MicroDataFrame( - filtered_tax_unit.reset_index(drop=True), - weights="tax_unit_weight", - ), - spm_unit=MicroDataFrame( - filtered_spm_unit.reset_index(drop=True), - weights="spm_unit_weight", - ), - family=MicroDataFrame( - filtered_family.reset_index(drop=True), - weights="family_weight", - ), - marital_unit=MicroDataFrame( - filtered_marital_unit.reset_index(drop=True), - weights="marital_unit_weight", - ), + person=filtered["person"], + marital_unit=filtered["marital_unit"], + family=filtered["family"], + spm_unit=filtered["spm_unit"], + tax_unit=filtered["tax_unit"], + household=filtered["household"], ), ) @@ -363,7 +223,8 @@ def run(self, simulation: "Simulation") -> "Simulation": from policyengine_us.system import system from policyengine.utils.parametric_reforms import ( - reform_dict_from_parameter_values, + build_reform_dict, + merge_reform_dicts, ) assert isinstance(simulation.dataset, PolicyEngineUSDataset) @@ -377,47 +238,12 @@ def run(self, simulation: "Simulation") -> "Simulation": dataset, simulation.filter_field, simulation.filter_value ) - # Build reform dict from policy and dynamic parameter values - # US requires reforms to be passed at Microsimulation construction time - # (unlike UK which supports p.update() after construction) - reform_dict = None - - # Collect policy reforms - if simulation.policy: - if simulation.policy.simulation_modifier is not None: - # Custom simulation modifier - extract parameter values if available - # Fall back to parameter_values if no custom modifier logic needed - if simulation.policy.parameter_values: - reform_dict = reform_dict_from_parameter_values( - simulation.policy.parameter_values - ) - elif simulation.policy.parameter_values: - reform_dict = reform_dict_from_parameter_values( - simulation.policy.parameter_values - ) - - # Merge dynamic reforms into reform_dict - if simulation.dynamic: - dynamic_reform = None - if simulation.dynamic.simulation_modifier is not None: - if simulation.dynamic.parameter_values: - dynamic_reform = reform_dict_from_parameter_values( - simulation.dynamic.parameter_values - ) - elif simulation.dynamic.parameter_values: - dynamic_reform = reform_dict_from_parameter_values( - simulation.dynamic.parameter_values - ) - - if dynamic_reform: - if reform_dict is None: - reform_dict = dynamic_reform - else: - # Merge dynamic reforms into policy reforms - for param_name, period_values in dynamic_reform.items(): - if param_name not in reform_dict: - reform_dict[param_name] = {} - reform_dict[param_name].update(period_values) + # Build reform dict from policy and dynamic parameter values. + # US requires reforms at Microsimulation construction time + # (unlike UK which supports p.update() after construction). + policy_reform = build_reform_dict(simulation.policy) + dynamic_reform = build_reform_dict(simulation.dynamic) + reform_dict = merge_reform_dicts(policy_reform, dynamic_reform) # Create Microsimulation with reform at construction time microsim = Microsimulation(reform=reform_dict) diff --git a/src/policyengine/utils/entity_utils.py b/src/policyengine/utils/entity_utils.py new file mode 100644 index 00000000..fdbcc092 --- /dev/null +++ b/src/policyengine/utils/entity_utils.py @@ -0,0 +1,127 @@ +"""Shared utilities for entity relationship building and dataset filtering.""" + +import pandas as pd +from microdf import MicroDataFrame + + +def _resolve_id_column( + person_data: pd.DataFrame, entity_name: str +) -> str: + """Resolve the ID column name for a group entity in person data. + + Tries `person_{entity}_id` first (standard convention), falls back + to `{entity}_id` (custom datasets). + """ + prefixed = f"person_{entity_name}_id" + bare = f"{entity_name}_id" + if prefixed in person_data.columns: + return prefixed + return bare + + +def build_entity_relationships( + person_data: pd.DataFrame, + group_entities: list[str], +) -> pd.DataFrame: + """Build a DataFrame mapping each person to their containing entities. + + Creates an explicit relationship map between persons and all specified + group entity types. This enables filtering at any entity level while + preserving the integrity of all related entities. + + Args: + person_data: DataFrame of person-level data with ID columns. + group_entities: List of group entity names (e.g., ["household", "tax_unit"]). + + Returns: + A DataFrame with person_id and one {entity}_id column per group entity. + """ + columns = {"person_id": person_data["person_id"].values} + for entity in group_entities: + id_col = _resolve_id_column(person_data, entity) + columns[f"{entity}_id"] = person_data[id_col].values + return pd.DataFrame(columns) + + +def filter_dataset_by_household_variable( + entity_data: dict[str, MicroDataFrame], + group_entities: list[str], + variable_name: str, + variable_value: str, +) -> dict[str, MicroDataFrame]: + """Filter dataset entities to only include households where a variable matches. + + Uses an entity relationship approach: builds an explicit map of all + entity relationships, filters at the household level, and keeps all + persons in matching households to preserve entity integrity. + + Args: + entity_data: Dict mapping entity names to their MicroDataFrames + (from YearData.entity_data). + group_entities: List of group entity names for this country. + variable_name: The household-level variable to filter on. + variable_value: The value to match. Handles both str and bytes encoding. + + Returns: + A dict mapping entity names to filtered MicroDataFrames. + + Raises: + ValueError: If variable_name is not found or no households match. + """ + person_data = pd.DataFrame(entity_data["person"]) + household_data = pd.DataFrame(entity_data["household"]) + + if variable_name not in household_data.columns: + raise ValueError( + f"Variable '{variable_name}' not found in household data. " + f"Available columns: {list(household_data.columns)}" + ) + + # Build entity relationships + entity_rel = build_entity_relationships(person_data, group_entities) + + # Find matching household IDs + hh_values = household_data[variable_name].values + hh_ids = household_data["household_id"].values + + if isinstance(variable_value, str): + hh_mask = (hh_values == variable_value) | ( + hh_values == variable_value.encode() + ) + else: + hh_mask = hh_values == variable_value + + matching_hh_ids = set(hh_ids[hh_mask]) + + if len(matching_hh_ids) == 0: + raise ValueError( + f"No households found matching {variable_name}={variable_value}" + ) + + # Filter persons to those in matching households + person_mask = entity_rel["household_id"].isin(matching_hh_ids) + filtered_rel = entity_rel[person_mask] + + # Collect filtered IDs for each entity + filtered_ids = {"person": set(filtered_rel["person_id"])} + for entity in group_entities: + filtered_ids[entity] = set(filtered_rel[f"{entity}_id"]) + + # Filter each entity DataFrame + result = {} + for entity_name, mdf in entity_data.items(): + df = pd.DataFrame(mdf) + id_col = f"{entity_name}_id" + if entity_name in filtered_ids and id_col in df.columns: + filtered_df = df[df[id_col].isin(filtered_ids[entity_name])] + else: + filtered_df = df + + weight_col = f"{entity_name}_weight" + weights = weight_col if weight_col in filtered_df.columns else None + result[entity_name] = MicroDataFrame( + filtered_df.reset_index(drop=True), + weights=weights, + ) + + return result diff --git a/src/policyengine/utils/parametric_reforms.py b/src/policyengine/utils/parametric_reforms.py index 7a9494a5..4176037a 100644 --- a/src/policyengine/utils/parametric_reforms.py +++ b/src/policyengine/utils/parametric_reforms.py @@ -1,9 +1,16 @@ +from __future__ import annotations + from collections.abc import Callable +from typing import TYPE_CHECKING from policyengine_core.periods import period from policyengine.core import ParameterValue +if TYPE_CHECKING: + from policyengine.core.dynamic import Dynamic + from policyengine.core.policy import Policy + def reform_dict_from_parameter_values( parameter_values: list[ParameterValue], @@ -77,3 +84,52 @@ def modifier(simulation): return simulation return modifier + + +def build_reform_dict(policy_or_dynamic: Policy | Dynamic | None) -> dict | None: + """Extract a reform dict from a Policy or Dynamic object. + + If the object has parameter_values, converts them to reform dict format. + Returns None if the object is None or has no parameter values. + + Args: + policy_or_dynamic: A Policy or Dynamic object, or None. + + Returns: + A reform dict suitable for Microsimulation(reform=...), or None. + """ + if policy_or_dynamic is None: + return None + if policy_or_dynamic.parameter_values: + return reform_dict_from_parameter_values( + policy_or_dynamic.parameter_values + ) + return None + + +def merge_reform_dicts( + base: dict | None, override: dict | None +) -> dict | None: + """Merge two reform dicts, with override values taking precedence. + + Either or both dicts can be None. When both have entries for the same + parameter, period-level values from override replace those in base. + + Args: + base: The base reform dict (e.g., from policy). + override: The override reform dict (e.g., from dynamic). + + Returns: + The merged reform dict, or None if both inputs are None. + """ + if base is None: + return override + if override is None: + return base + + merged = {k: dict(v) for k, v in base.items()} + for param_name, period_values in override.items(): + if param_name not in merged: + merged[param_name] = {} + merged[param_name].update(period_values) + return merged diff --git a/tests/test_entity_utils.py b/tests/test_entity_utils.py new file mode 100644 index 00000000..20c7b3ce --- /dev/null +++ b/tests/test_entity_utils.py @@ -0,0 +1,295 @@ +"""Tests for shared entity utilities and reform dict helpers.""" + +import pandas as pd +import pytest +from microdf import MicroDataFrame + +from policyengine.utils.entity_utils import ( + build_entity_relationships, + filter_dataset_by_household_variable, +) +from policyengine.utils.parametric_reforms import ( + build_reform_dict, + merge_reform_dicts, +) + + +class TestBuildEntityRelationships: + """Tests for the shared build_entity_relationships function.""" + + def test__given_us_style_entities__then_returns_all_columns( + self, us_test_dataset + ): + """Given: Person data with 5 group entities (US style) + When: Building entity relationships + Then: DataFrame has person_id + all 5 entity ID columns + """ + person_data = pd.DataFrame(us_test_dataset.data.person) + group_entities = [ + "household", + "tax_unit", + "spm_unit", + "family", + "marital_unit", + ] + + result = build_entity_relationships(person_data, group_entities) + + expected_columns = { + "person_id", + "household_id", + "tax_unit_id", + "spm_unit_id", + "family_id", + "marital_unit_id", + } + assert set(result.columns) == expected_columns + + def test__given_uk_style_entities__then_returns_all_columns( + self, uk_test_dataset + ): + """Given: Person data with 2 group entities (UK style) + When: Building entity relationships + Then: DataFrame has person_id + 2 entity ID columns + """ + person_data = pd.DataFrame(uk_test_dataset.data.person) + group_entities = ["benunit", "household"] + + result = build_entity_relationships(person_data, group_entities) + + expected_columns = {"person_id", "benunit_id", "household_id"} + assert set(result.columns) == expected_columns + + def test__given_6_persons__then_returns_6_rows(self, us_test_dataset): + """Given: Dataset with 6 persons + When: Building entity relationships + Then: Result has 6 rows + """ + person_data = pd.DataFrame(us_test_dataset.data.person) + + result = build_entity_relationships( + person_data, ["household", "tax_unit"] + ) + + assert len(result) == 6 + + def test__given_prefixed_columns__then_resolves_correctly(self): + """Given: Person data with person_household_id naming convention + When: Building entity relationships + Then: Correctly maps to household_id in result + """ + person_data = pd.DataFrame( + { + "person_id": [1, 2], + "person_household_id": [10, 20], + } + ) + + result = build_entity_relationships(person_data, ["household"]) + + assert list(result["household_id"]) == [10, 20] + + def test__given_bare_columns__then_resolves_correctly(self): + """Given: Person data with household_id naming convention (no prefix) + When: Building entity relationships + Then: Correctly maps to household_id in result + """ + person_data = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 20], + } + ) + + result = build_entity_relationships(person_data, ["household"]) + + assert list(result["household_id"]) == [10, 20] + + +class TestFilterDatasetByHouseholdVariable: + """Tests for the shared filter_dataset_by_household_variable function.""" + + def test__given_matching_value__then_returns_filtered_entities(self): + """Given: Dataset with 2 places + When: Filtering by place_fips=44000 + Then: Returns only matching households and related persons + """ + entity_data = { + "person": MicroDataFrame( + pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [1, 1, 2], + "person_weight": [1.0, 1.0, 1.0], + } + ), + weights="person_weight", + ), + "household": MicroDataFrame( + pd.DataFrame( + { + "household_id": [1, 2], + "household_weight": [1.0, 1.0], + "place": ["A", "B"], + } + ), + weights="household_weight", + ), + } + + result = filter_dataset_by_household_variable( + entity_data=entity_data, + group_entities=["household"], + variable_name="place", + variable_value="A", + ) + + assert len(pd.DataFrame(result["person"])) == 2 + assert len(pd.DataFrame(result["household"])) == 1 + + def test__given_no_match__then_raises_value_error(self): + """Given: Dataset with no matching households + When: Filtering + Then: Raises ValueError + """ + entity_data = { + "person": MicroDataFrame( + pd.DataFrame( + { + "person_id": [1], + "household_id": [1], + "person_weight": [1.0], + } + ), + weights="person_weight", + ), + "household": MicroDataFrame( + pd.DataFrame( + { + "household_id": [1], + "household_weight": [1.0], + "place": ["A"], + } + ), + weights="household_weight", + ), + } + + with pytest.raises(ValueError, match="No households found"): + filter_dataset_by_household_variable( + entity_data=entity_data, + group_entities=["household"], + variable_name="place", + variable_value="Z", + ) + + def test__given_missing_variable__then_raises_value_error(self): + """Given: Dataset without the filter variable + When: Filtering + Then: Raises ValueError + """ + entity_data = { + "person": MicroDataFrame( + pd.DataFrame( + { + "person_id": [1], + "household_id": [1], + "person_weight": [1.0], + } + ), + weights="person_weight", + ), + "household": MicroDataFrame( + pd.DataFrame( + { + "household_id": [1], + "household_weight": [1.0], + } + ), + weights="household_weight", + ), + } + + with pytest.raises(ValueError, match="not found in household data"): + filter_dataset_by_household_variable( + entity_data=entity_data, + group_entities=["household"], + variable_name="nonexistent", + variable_value="x", + ) + + +class TestBuildReformDict: + """Tests for build_reform_dict helper.""" + + def test__given_none__then_returns_none(self): + assert build_reform_dict(None) is None + + def test__given_no_parameter_values__then_returns_none(self): + from unittest.mock import MagicMock + + obj = MagicMock() + obj.parameter_values = [] + assert build_reform_dict(obj) is None + + def test__given_parameter_values__then_returns_reform_dict(self): + from datetime import datetime + from unittest.mock import MagicMock + + param = MagicMock() + param.name = "gov.test.param" + + pv = MagicMock() + pv.parameter = param + pv.value = 1000 + pv.start_date = datetime(2024, 1, 1) + pv.end_date = None + + obj = MagicMock() + obj.parameter_values = [pv] + + result = build_reform_dict(obj) + + assert result == {"gov.test.param": {"2024-01-01": 1000}} + + +class TestMergeReformDicts: + """Tests for merge_reform_dicts helper.""" + + def test__given_both_none__then_returns_none(self): + assert merge_reform_dicts(None, None) is None + + def test__given_base_none__then_returns_override(self): + override = {"param": {"2024-01-01": 100}} + assert merge_reform_dicts(None, override) is override + + def test__given_override_none__then_returns_base(self): + base = {"param": {"2024-01-01": 100}} + assert merge_reform_dicts(base, None) is base + + def test__given_both_dicts__then_merges_correctly(self): + base = {"param_a": {"2024-01-01": 100}} + override = {"param_b": {"2024-01-01": 200}} + + result = merge_reform_dicts(base, override) + + assert result == { + "param_a": {"2024-01-01": 100}, + "param_b": {"2024-01-01": 200}, + } + + def test__given_overlapping_params__then_override_wins(self): + base = {"param": {"2024-01-01": 100}} + override = {"param": {"2024-01-01": 999}} + + result = merge_reform_dicts(base, override) + + assert result == {"param": {"2024-01-01": 999}} + + def test__given_merge__then_does_not_mutate_base(self): + base = {"param": {"2024-01-01": 100}} + override = {"param": {"2024-01-01": 999}} + + merge_reform_dicts(base, override) + + assert base == {"param": {"2024-01-01": 100}}