From 00d818e168e4be00fd671494ae0b8e7092d27fbe Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 2 Jun 2026 10:59:19 -0400 Subject: [PATCH 1/3] Always enable the ACS donor (rent, real_estate_taxes imputation) The ACS donor provider supplies the rent and real_estate_taxes source imputation that eCPS also draws from ACS. It was gated by include_acs, so the no-ACS / "eCPS-shaped" build variants dropped it and exported rent=$0 (the nation/census/rent target read $0 against a ~$735B baseline). Decouple the ACS donor from include_acs / include_donor_surveys so it is always enabled. ACS as a population spine ("multispine") remains a separate, not-yet-enabled control. Update the CLI help text and the two provider- assembly tests that encoded the old can-disable behavior. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../pipelines/pe_us_data_rebuild.py | 19 ++++++++------ .../pe_us_data_rebuild_checkpoint.py | 5 ++-- tests/pipelines/test_pe_us_data_rebuild.py | 25 ++++++++++++------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild.py b/src/microplex_us/pipelines/pe_us_data_rebuild.py index 0d1f137..36064fa 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild.py @@ -155,19 +155,22 @@ def default_policyengine_us_data_rebuild_source_providers( social_security_split_strategy=SOCIAL_SECURITY_SPLIT_STRATEGY_PE_QRF, ), ] - resolved_include_acs = include_donor_surveys if include_acs is None else include_acs resolved_include_sipp = ( include_donor_surveys if include_sipp is None else include_sipp ) resolved_include_scf = include_donor_surveys if include_scf is None else include_scf - if resolved_include_acs: - providers.append( - ACSSourceProvider( - year=int(acs_year), - policyengine_us_data_repo=policyengine_us_data_repo, - policyengine_us_data_python=policyengine_us_data_python, - ) + # The ACS donor is always enabled. It supplies the rent and real_estate_taxes + # source imputation that eCPS also draws from ACS, so omitting it leaves those + # variables at zero. ACS as a population spine ("multispine") is a separate, + # independently controlled feature that is not enabled here; ``include_acs`` is + # retained for backward compatibility and no longer disables the donor. + providers.append( + ACSSourceProvider( + year=int(acs_year), + policyengine_us_data_repo=policyengine_us_data_repo, + policyengine_us_data_python=policyengine_us_data_python, ) + ) if resolved_include_sipp: providers.extend( [ diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py index bcb7e11..f117ae3 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py @@ -2294,8 +2294,9 @@ def main(argv: list[str] | None = None) -> None: action=argparse.BooleanOptionalAction, default=None, help=( - "Include the ACS donor provider. Defaults to --include-donor-surveys; " - "use --no-include-acs for an eCPS-shaped run that keeps SIPP/SCF." + "Deprecated/no-op for the ACS donor: the ACS donor (rent, " + "real_estate_taxes) is always enabled, matching eCPS. Retained for " + "backward compatibility. ACS as a population spine is a separate control." ), ) parser.add_argument( diff --git a/tests/pipelines/test_pe_us_data_rebuild.py b/tests/pipelines/test_pe_us_data_rebuild.py index 85ba167..41e3e1f 100644 --- a/tests/pipelines/test_pe_us_data_rebuild.py +++ b/tests/pipelines/test_pe_us_data_rebuild.py @@ -145,17 +145,21 @@ def test_default_policyengine_us_data_rebuild_source_providers_use_pe_style_bund assert isinstance(providers[5], SCFSourceProvider) -def test_default_policyengine_us_data_rebuild_source_providers_can_disable_donor_surveys() -> ( +def test_default_policyengine_us_data_rebuild_source_providers_keeps_acs_when_donor_surveys_disabled() -> ( None ): + # include_donor_surveys=False disables the SIPP/SCF donors, but the ACS donor is + # always enabled (it supplies the rent / real_estate_taxes imputation), so it + # remains alongside the CPS spine and PUF. providers = default_policyengine_us_data_rebuild_source_providers( include_donor_surveys=False, cps_download=False, ) - assert len(providers) == 2 + assert len(providers) == 3 assert isinstance(providers[0], CPSASECSourceProvider) assert isinstance(providers[1], PUFSourceProvider) + assert isinstance(providers[2], ACSSourceProvider) def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor_surveys() -> ( @@ -177,24 +181,27 @@ def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor assert isinstance(providers[5], SCFSourceProvider) -def test_default_policyengine_us_data_rebuild_source_providers_can_disable_only_acs() -> ( +def test_default_policyengine_us_data_rebuild_source_providers_always_includes_acs_donor() -> ( None ): + # The ACS donor is mandatory: it supplies the rent / real_estate_taxes source + # imputation (as eCPS does), so it is present even when include_acs=False. That + # flag no longer disables the donor. providers = default_policyengine_us_data_rebuild_source_providers( include_donor_surveys=True, include_acs=False, cps_download=False, ) - assert len(providers) == 5 + assert len(providers) == 6 assert isinstance(providers[0], CPSASECSourceProvider) assert isinstance(providers[1], PUFSourceProvider) - assert isinstance(providers[2], SIPPSourceProvider) - assert providers[2].block == "tips" + assert isinstance(providers[2], ACSSourceProvider) assert isinstance(providers[3], SIPPSourceProvider) - assert providers[3].block == "assets" - assert isinstance(providers[4], SCFSourceProvider) - assert not any(isinstance(provider, ACSSourceProvider) for provider in providers) + assert providers[3].block == "tips" + assert isinstance(providers[4], SIPPSourceProvider) + assert providers[4].block == "assets" + assert isinstance(providers[5], SCFSourceProvider) def test_build_policyengine_us_data_rebuild_pipeline_returns_configured_pipeline() -> ( From 3fd497b0430e7cf664e817e3c73c2d6b7d18fdac Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 2 Jun 2026 12:56:04 -0400 Subject: [PATCH 2/3] Populate spm_unit_pre_subsidy_childcare_expenses from CPS ASEC MP exported spm_unit_pre_subsidy_childcare_expenses as zero, so the national childcare target (spm_unit_capped_work_childcare_expenses) read $0 against a ~$348B baseline. eCPS reads the input directly from the CPS ASEC SPM field SPM_CHILDCAREXPNS. Wire SPM_CHILDCAREXPNS -> spm_unit_pre_subsidy_childcare_expenses through the same path as spm_unit_energy_subsidy: the PERSON_VARIABLES rename, the nonnegative and zero-default value-column lists, and the spm_unit source-column aggregation. PolicyEngine computes spm_unit_capped_work_childcare_expenses from this input, so the computed variable is not exported (the frozen export contract deliberately excludes it as formula-owned). Mirror the existing energy-subsidy mapping and entity-aggregation tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/microplex_us/data_sources/cps.py | 5 +++++ src/microplex_us/pipelines/us.py | 1 + tests/pipelines/test_us.py | 5 +++++ tests/test_cps_source_provider.py | 2 ++ 4 files changed, 13 insertions(+) diff --git a/src/microplex_us/data_sources/cps.py b/src/microplex_us/data_sources/cps.py index 273e487..d43c1f8 100644 --- a/src/microplex_us/data_sources/cps.py +++ b/src/microplex_us/data_sources/cps.py @@ -117,6 +117,9 @@ "WICYN": "_receives_wic", "SPM_CAPHOUSESUB": "_spm_capped_housing_subsidy", "SPM_ENGVAL": "spm_unit_energy_subsidy", + # Capped work childcare expenses are a PolicyEngine-computed variable + # (derived from this pre-subsidy input), so only the input is exported. + "SPM_CHILDCAREXPNS": "spm_unit_pre_subsidy_childcare_expenses", # Person relationship-to-householder code (eCPS cps.py:190-195, :1219). # Codes 43/44/46/47 mark an unmarried partner of the household head. "PERRP": "_person_relationship_to_householder", @@ -200,6 +203,7 @@ "social_security_survivors", "social_security_dependents", "spm_unit_energy_subsidy", + "spm_unit_pre_subsidy_childcare_expenses", ) PERSON_ZERO_DEFAULT_VALUE_COLUMNS = ( @@ -216,6 +220,7 @@ "social_security_survivors", "social_security_dependents", "spm_unit_energy_subsidy", + "spm_unit_pre_subsidy_childcare_expenses", ) PERSON_CACHE_REQUIRED_COLUMNS = ( diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index 3c718a2..11131af 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -9891,6 +9891,7 @@ def _attach_spm_unit_source_columns( "takes_up_snap_if_eligible": "max", "takes_up_tanf_if_eligible": "max", "spm_unit_energy_subsidy": "first", + "spm_unit_pre_subsidy_childcare_expenses": "first", } aggregations = { column: aggregation diff --git a/tests/pipelines/test_us.py b/tests/pipelines/test_us.py index ef6411f..08b4d0e 100644 --- a/tests/pipelines/test_us.py +++ b/tests/pipelines/test_us.py @@ -1014,6 +1014,7 @@ def test_build_policyengine_entity_tables_preserves_spm_source_inputs( "takes_up_housing_assistance_if_eligible": [False, True, False], "takes_up_snap_if_eligible": [False, True, False], "spm_unit_energy_subsidy": [90.0, 90.0, 0.0], + "spm_unit_pre_subsidy_childcare_expenses": [1500.0, 1500.0, 0.0], } ) @@ -1028,6 +1029,10 @@ def test_build_policyengine_entity_tables_preserves_spm_source_inputs( ] assert spm_units["takes_up_snap_if_eligible"].tolist() == [True, False] assert spm_units["spm_unit_energy_subsidy"].tolist() == [90.0, 0.0] + assert spm_units["spm_unit_pre_subsidy_childcare_expenses"].tolist() == [ + 1500.0, + 0.0, + ] def test_build_policyengine_entity_tables_adds_deterministic_snap_takeup( self, diff --git a/tests/test_cps_source_provider.py b/tests/test_cps_source_provider.py index d8d6205..986651c 100644 --- a/tests/test_cps_source_provider.py +++ b/tests/test_cps_source_provider.py @@ -447,6 +447,7 @@ def test_load_cps_asec_derives_policyengine_value_inputs(tmp_path): "WICYN": [1, 2], "SPM_CAPHOUSESUB": [700, 0], "SPM_ENGVAL": [90, -1], + "SPM_CHILDCAREXPNS": [1500, -1], "PHIP_VAL": [900, -1], "POTC_VAL": [120, -1], "PMED_VAL": [450, -1], @@ -474,6 +475,7 @@ def test_load_cps_asec_derives_policyengine_value_inputs(tmp_path): assert persons["receives_housing_assistance"].tolist() == [True, False] assert persons["takes_up_housing_assistance_if_eligible"].tolist() == [True, False] assert persons["spm_unit_energy_subsidy"].tolist() == [90, 0] + assert persons["spm_unit_pre_subsidy_childcare_expenses"].tolist() == [1500, 0] assert persons["health_insurance_premiums_without_medicare_part_b"].tolist() == [900, 0] assert persons["over_the_counter_health_expenses"].tolist() == [120, 0] assert persons["other_medical_expenses"].tolist() == [450, 0] From bd730f60afdfa308dddefd20e78cc27177c13c46 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 2 Jun 2026 14:04:23 -0400 Subject: [PATCH 3/3] Fix cycle review findings: drop include_acs param; guard childcare in export map - Drop the now-dead include_acs parameter entirely. After the ACS donor became mandatory it was a silent no-op, so remove it from the provider function, the checkpoint build function, and the --include-acs CLI flag, and delete the redundant test that exercised include_acs=False. ACS-as-spine (multispine) remains the separate, not-yet-enabled control. - Add spm_unit_pre_subsidy_childcare_expenses to test_build_policyengine_us_export_variable_maps_includes_contract_inputs so a fast unit test guards that childcare reaches the inferred export map, not only tables.spm_units. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../pipelines/pe_us_data_rebuild.py | 4 +--- .../pe_us_data_rebuild_checkpoint.py | 13 ----------- tests/pipelines/test_pe_us_data_rebuild.py | 23 ------------------- tests/policyengine/test_us.py | 2 ++ 4 files changed, 3 insertions(+), 39 deletions(-) diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild.py b/src/microplex_us/pipelines/pe_us_data_rebuild.py index 36064fa..468ce8d 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild.py @@ -105,7 +105,6 @@ def default_policyengine_us_data_rebuild_source_providers( puf_demographics_path: str | Path | None = None, puf_expand_persons: bool = True, include_donor_surveys: bool = True, - include_acs: bool | None = None, include_sipp: bool | None = None, include_scf: bool | None = None, acs_year: int = 2022, @@ -162,8 +161,7 @@ def default_policyengine_us_data_rebuild_source_providers( # The ACS donor is always enabled. It supplies the rent and real_estate_taxes # source imputation that eCPS also draws from ACS, so omitting it leaves those # variables at zero. ACS as a population spine ("multispine") is a separate, - # independently controlled feature that is not enabled here; ``include_acs`` is - # retained for backward compatibility and no longer disables the donor. + # independently controlled feature that is not enabled here. providers.append( ACSSourceProvider( year=int(acs_year), diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py index f117ae3..56295ce 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py @@ -1979,7 +1979,6 @@ def run_policyengine_us_data_rebuild_checkpoint( puf_demographics_path: str | Path | None = None, puf_expand_persons: bool = True, include_donor_surveys: bool = True, - include_acs: bool | None = None, include_sipp: bool | None = None, include_scf: bool | None = None, acs_year: int = 2022, @@ -2066,7 +2065,6 @@ def run_policyengine_us_data_rebuild_checkpoint( puf_demographics_path=puf_demographics_path, puf_expand_persons=puf_expand_persons, include_donor_surveys=include_donor_surveys, - include_acs=include_acs, include_sipp=include_sipp, include_scf=include_scf, acs_year=acs_year, @@ -2289,16 +2287,6 @@ def main(argv: list[str] | None = None) -> None: action=argparse.BooleanOptionalAction, default=True, ) - parser.add_argument( - "--include-acs", - action=argparse.BooleanOptionalAction, - default=None, - help=( - "Deprecated/no-op for the ACS donor: the ACS donor (rent, " - "real_estate_taxes) is always enabled, matching eCPS. Retained for " - "backward compatibility. ACS as a population spine is a separate control." - ), - ) parser.add_argument( "--include-sipp", action=argparse.BooleanOptionalAction, @@ -2489,7 +2477,6 @@ def main(argv: list[str] | None = None) -> None: puf_demographics_path=args.puf_demographics_path, puf_expand_persons=not args.no_puf_expand_persons, include_donor_surveys=args.include_donor_surveys, - include_acs=args.include_acs, include_sipp=args.include_sipp, include_scf=args.include_scf, acs_year=args.acs_year, diff --git a/tests/pipelines/test_pe_us_data_rebuild.py b/tests/pipelines/test_pe_us_data_rebuild.py index 41e3e1f..335be0c 100644 --- a/tests/pipelines/test_pe_us_data_rebuild.py +++ b/tests/pipelines/test_pe_us_data_rebuild.py @@ -181,29 +181,6 @@ def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor assert isinstance(providers[5], SCFSourceProvider) -def test_default_policyengine_us_data_rebuild_source_providers_always_includes_acs_donor() -> ( - None -): - # The ACS donor is mandatory: it supplies the rent / real_estate_taxes source - # imputation (as eCPS does), so it is present even when include_acs=False. That - # flag no longer disables the donor. - providers = default_policyengine_us_data_rebuild_source_providers( - include_donor_surveys=True, - include_acs=False, - cps_download=False, - ) - - assert len(providers) == 6 - assert isinstance(providers[0], CPSASECSourceProvider) - assert isinstance(providers[1], PUFSourceProvider) - assert isinstance(providers[2], ACSSourceProvider) - assert isinstance(providers[3], SIPPSourceProvider) - assert providers[3].block == "tips" - assert isinstance(providers[4], SIPPSourceProvider) - assert providers[4].block == "assets" - assert isinstance(providers[5], SCFSourceProvider) - - def test_build_policyengine_us_data_rebuild_pipeline_returns_configured_pipeline() -> ( None ): diff --git a/tests/policyengine/test_us.py b/tests/policyengine/test_us.py index 002d772..46a76b5 100644 --- a/tests/policyengine/test_us.py +++ b/tests/policyengine/test_us.py @@ -2189,6 +2189,7 @@ def __init__(self, entity): ) spm_unit_contract_inputs = ( "receives_housing_assistance", + "spm_unit_pre_subsidy_childcare_expenses", "spm_unit_tenure_type", ) legacy_spm_unit_contract_inputs = ( @@ -2246,6 +2247,7 @@ class FakeSystem: "spm_unit_id": [1000], "household_id": [10], "receives_housing_assistance": [True], + "spm_unit_pre_subsidy_childcare_expenses": [1500.0], "spm_unit_tenure_type": ["RENTER"], **{name: [1.0] for name in legacy_spm_unit_contract_inputs}, }