diff --git a/src/microplex_us/data_sources/cps.py b/src/microplex_us/data_sources/cps.py index 273e487..d43c1f8 100644 --- a/src/microplex_us/data_sources/cps.py +++ b/src/microplex_us/data_sources/cps.py @@ -117,6 +117,9 @@ "WICYN": "_receives_wic", "SPM_CAPHOUSESUB": "_spm_capped_housing_subsidy", "SPM_ENGVAL": "spm_unit_energy_subsidy", + # Capped work childcare expenses are a PolicyEngine-computed variable + # (derived from this pre-subsidy input), so only the input is exported. + "SPM_CHILDCAREXPNS": "spm_unit_pre_subsidy_childcare_expenses", # Person relationship-to-householder code (eCPS cps.py:190-195, :1219). # Codes 43/44/46/47 mark an unmarried partner of the household head. "PERRP": "_person_relationship_to_householder", @@ -200,6 +203,7 @@ "social_security_survivors", "social_security_dependents", "spm_unit_energy_subsidy", + "spm_unit_pre_subsidy_childcare_expenses", ) PERSON_ZERO_DEFAULT_VALUE_COLUMNS = ( @@ -216,6 +220,7 @@ "social_security_survivors", "social_security_dependents", "spm_unit_energy_subsidy", + "spm_unit_pre_subsidy_childcare_expenses", ) PERSON_CACHE_REQUIRED_COLUMNS = ( diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild.py b/src/microplex_us/pipelines/pe_us_data_rebuild.py index 0d1f137..468ce8d 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild.py @@ -105,7 +105,6 @@ def default_policyengine_us_data_rebuild_source_providers( puf_demographics_path: str | Path | None = None, puf_expand_persons: bool = True, include_donor_surveys: bool = True, - include_acs: bool | None = None, include_sipp: bool | None = None, include_scf: bool | None = None, acs_year: int = 2022, @@ -155,19 +154,21 @@ def default_policyengine_us_data_rebuild_source_providers( social_security_split_strategy=SOCIAL_SECURITY_SPLIT_STRATEGY_PE_QRF, ), ] - resolved_include_acs = include_donor_surveys if include_acs is None else include_acs resolved_include_sipp = ( include_donor_surveys if include_sipp is None else include_sipp ) resolved_include_scf = include_donor_surveys if include_scf is None else include_scf - if resolved_include_acs: - providers.append( - ACSSourceProvider( - year=int(acs_year), - policyengine_us_data_repo=policyengine_us_data_repo, - policyengine_us_data_python=policyengine_us_data_python, - ) + # The ACS donor is always enabled. It supplies the rent and real_estate_taxes + # source imputation that eCPS also draws from ACS, so omitting it leaves those + # variables at zero. ACS as a population spine ("multispine") is a separate, + # independently controlled feature that is not enabled here. + providers.append( + ACSSourceProvider( + year=int(acs_year), + policyengine_us_data_repo=policyengine_us_data_repo, + policyengine_us_data_python=policyengine_us_data_python, ) + ) if resolved_include_sipp: providers.extend( [ diff --git a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py index bcb7e11..56295ce 100644 --- a/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py +++ b/src/microplex_us/pipelines/pe_us_data_rebuild_checkpoint.py @@ -1979,7 +1979,6 @@ def run_policyengine_us_data_rebuild_checkpoint( puf_demographics_path: str | Path | None = None, puf_expand_persons: bool = True, include_donor_surveys: bool = True, - include_acs: bool | None = None, include_sipp: bool | None = None, include_scf: bool | None = None, acs_year: int = 2022, @@ -2066,7 +2065,6 @@ def run_policyengine_us_data_rebuild_checkpoint( puf_demographics_path=puf_demographics_path, puf_expand_persons=puf_expand_persons, include_donor_surveys=include_donor_surveys, - include_acs=include_acs, include_sipp=include_sipp, include_scf=include_scf, acs_year=acs_year, @@ -2289,15 +2287,6 @@ def main(argv: list[str] | None = None) -> None: action=argparse.BooleanOptionalAction, default=True, ) - parser.add_argument( - "--include-acs", - action=argparse.BooleanOptionalAction, - default=None, - help=( - "Include the ACS donor provider. Defaults to --include-donor-surveys; " - "use --no-include-acs for an eCPS-shaped run that keeps SIPP/SCF." - ), - ) parser.add_argument( "--include-sipp", action=argparse.BooleanOptionalAction, @@ -2488,7 +2477,6 @@ def main(argv: list[str] | None = None) -> None: puf_demographics_path=args.puf_demographics_path, puf_expand_persons=not args.no_puf_expand_persons, include_donor_surveys=args.include_donor_surveys, - include_acs=args.include_acs, include_sipp=args.include_sipp, include_scf=args.include_scf, acs_year=args.acs_year, diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index 3c718a2..11131af 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -9891,6 +9891,7 @@ def _attach_spm_unit_source_columns( "takes_up_snap_if_eligible": "max", "takes_up_tanf_if_eligible": "max", "spm_unit_energy_subsidy": "first", + "spm_unit_pre_subsidy_childcare_expenses": "first", } aggregations = { column: aggregation diff --git a/tests/pipelines/test_pe_us_data_rebuild.py b/tests/pipelines/test_pe_us_data_rebuild.py index 85ba167..335be0c 100644 --- a/tests/pipelines/test_pe_us_data_rebuild.py +++ b/tests/pipelines/test_pe_us_data_rebuild.py @@ -145,17 +145,21 @@ def test_default_policyengine_us_data_rebuild_source_providers_use_pe_style_bund assert isinstance(providers[5], SCFSourceProvider) -def test_default_policyengine_us_data_rebuild_source_providers_can_disable_donor_surveys() -> ( +def test_default_policyengine_us_data_rebuild_source_providers_keeps_acs_when_donor_surveys_disabled() -> ( None ): + # include_donor_surveys=False disables the SIPP/SCF donors, but the ACS donor is + # always enabled (it supplies the rent / real_estate_taxes imputation), so it + # remains alongside the CPS spine and PUF. providers = default_policyengine_us_data_rebuild_source_providers( include_donor_surveys=False, cps_download=False, ) - assert len(providers) == 2 + assert len(providers) == 3 assert isinstance(providers[0], CPSASECSourceProvider) assert isinstance(providers[1], PUFSourceProvider) + assert isinstance(providers[2], ACSSourceProvider) def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor_surveys() -> ( @@ -177,26 +181,6 @@ def test_default_policyengine_us_data_rebuild_source_providers_can_include_donor assert isinstance(providers[5], SCFSourceProvider) -def test_default_policyengine_us_data_rebuild_source_providers_can_disable_only_acs() -> ( - None -): - providers = default_policyengine_us_data_rebuild_source_providers( - include_donor_surveys=True, - include_acs=False, - cps_download=False, - ) - - assert len(providers) == 5 - assert isinstance(providers[0], CPSASECSourceProvider) - assert isinstance(providers[1], PUFSourceProvider) - assert isinstance(providers[2], SIPPSourceProvider) - assert providers[2].block == "tips" - assert isinstance(providers[3], SIPPSourceProvider) - assert providers[3].block == "assets" - assert isinstance(providers[4], SCFSourceProvider) - assert not any(isinstance(provider, ACSSourceProvider) for provider in providers) - - def test_build_policyengine_us_data_rebuild_pipeline_returns_configured_pipeline() -> ( None ): diff --git a/tests/pipelines/test_us.py b/tests/pipelines/test_us.py index ef6411f..08b4d0e 100644 --- a/tests/pipelines/test_us.py +++ b/tests/pipelines/test_us.py @@ -1014,6 +1014,7 @@ def test_build_policyengine_entity_tables_preserves_spm_source_inputs( "takes_up_housing_assistance_if_eligible": [False, True, False], "takes_up_snap_if_eligible": [False, True, False], "spm_unit_energy_subsidy": [90.0, 90.0, 0.0], + "spm_unit_pre_subsidy_childcare_expenses": [1500.0, 1500.0, 0.0], } ) @@ -1028,6 +1029,10 @@ def test_build_policyengine_entity_tables_preserves_spm_source_inputs( ] assert spm_units["takes_up_snap_if_eligible"].tolist() == [True, False] assert spm_units["spm_unit_energy_subsidy"].tolist() == [90.0, 0.0] + assert spm_units["spm_unit_pre_subsidy_childcare_expenses"].tolist() == [ + 1500.0, + 0.0, + ] def test_build_policyengine_entity_tables_adds_deterministic_snap_takeup( self, diff --git a/tests/policyengine/test_us.py b/tests/policyengine/test_us.py index 002d772..46a76b5 100644 --- a/tests/policyengine/test_us.py +++ b/tests/policyengine/test_us.py @@ -2189,6 +2189,7 @@ def __init__(self, entity): ) spm_unit_contract_inputs = ( "receives_housing_assistance", + "spm_unit_pre_subsidy_childcare_expenses", "spm_unit_tenure_type", ) legacy_spm_unit_contract_inputs = ( @@ -2246,6 +2247,7 @@ class FakeSystem: "spm_unit_id": [1000], "household_id": [10], "receives_housing_assistance": [True], + "spm_unit_pre_subsidy_childcare_expenses": [1500.0], "spm_unit_tenure_type": ["RENTER"], **{name: [1.0] for name in legacy_spm_unit_contract_inputs}, } diff --git a/tests/test_cps_source_provider.py b/tests/test_cps_source_provider.py index d8d6205..986651c 100644 --- a/tests/test_cps_source_provider.py +++ b/tests/test_cps_source_provider.py @@ -447,6 +447,7 @@ def test_load_cps_asec_derives_policyengine_value_inputs(tmp_path): "WICYN": [1, 2], "SPM_CAPHOUSESUB": [700, 0], "SPM_ENGVAL": [90, -1], + "SPM_CHILDCAREXPNS": [1500, -1], "PHIP_VAL": [900, -1], "POTC_VAL": [120, -1], "PMED_VAL": [450, -1], @@ -474,6 +475,7 @@ def test_load_cps_asec_derives_policyengine_value_inputs(tmp_path): assert persons["receives_housing_assistance"].tolist() == [True, False] assert persons["takes_up_housing_assistance_if_eligible"].tolist() == [True, False] assert persons["spm_unit_energy_subsidy"].tolist() == [90, 0] + assert persons["spm_unit_pre_subsidy_childcare_expenses"].tolist() == [1500, 0] assert persons["health_insurance_premiums_without_medicare_part_b"].tolist() == [900, 0] assert persons["over_the_counter_health_expenses"].tolist() == [120, 0] assert persons["other_medical_expenses"].tolist() == [450, 0]