diff --git a/src/microplex_us/data_sources/cps.py b/src/microplex_us/data_sources/cps.py index 54bde64..c115618 100644 --- a/src/microplex_us/data_sources/cps.py +++ b/src/microplex_us/data_sources/cps.py @@ -100,6 +100,13 @@ "MCAID": "has_medicaid", "NOW_GRP": "has_esi", "NOW_MRK": "has_marketplace_health_coverage", + # Employer-sponsored insurance policyholder + premium inputs (eCPS + # cps.py:197-275). NOW_OWNGRP flags own-name current group (ESI) coverage; + # NOW_HIPAID is who pays the premium; NOW_GRPFTYP is family vs self-only + # plan. These seed the ESI policyholder recode and the premium imputation. + "NOW_OWNGRP": "_now_owngrp", + "NOW_HIPAID": "_now_hipaid", + "NOW_GRPFTYP": "_now_grpftyp", "PHIP_VAL": "health_insurance_premiums_without_medicare_part_b", "POTC_VAL": "over_the_counter_health_expenses", "PMED_VAL": "other_medical_expenses", @@ -107,6 +114,9 @@ "WICYN": "_receives_wic", "SPM_CAPHOUSESUB": "_spm_capped_housing_subsidy", "SPM_ENGVAL": "spm_unit_energy_subsidy", + # Person relationship-to-householder code (eCPS cps.py:190-195, :1219). + # Codes 43/44/46/47 mark an unmarried partner of the household head. + "PERRP": "_person_relationship_to_householder", # Identifiers "PH_SEQ": "household_id", "GESTFIPS": "state_fips", @@ -261,6 +271,33 @@ ROTH_SHARE_OF_DC_CONTRIBUTIONS = 0.15 TRADITIONAL_SHARE_OF_IRA_CONTRIBUTIONS = 0.392 +# Census CPS ASEC 2024 technical documentation, PERRP (relationship to +# household reference person). Codes 43/44/46/47 mark an unmarried partner of +# the household head. Mirrors policyengine-us-data cps.py:190-195, :1219. +# https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar24.pdf +PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES = (43, 44, 46, 47) + +# Employer-sponsored insurance recode/imputation codes and plan-type priors, +# mirrored verbatim from policyengine-us-data cps.py:204-274. +ESI_HAS_CURRENT_OWN_COVERAGE = 1 # NOW_OWNGRP: holds ESI in own name. +ESI_EMPLOYER_PAYS_ALL = 1 # NOW_HIPAID +ESI_EMPLOYER_PAYS_SOME = 2 # NOW_HIPAID +ESI_FAMILY_PLAN = 1 # NOW_GRPFTYP +ESI_SELF_ONLY_PLAN = 2 # NOW_GRPFTYP +# AHRQ MEPS-IC Table IV.A.1 (private sector, 2024) plan-type averages. eCPS +# hardcodes these same constants to seed CPS policyholder premium records; +# national calibration later aligns the aggregate to the BEA full-economy +# employer premium total. These are constants in eCPS, not external data. +ESI_PLAN_PRIORS_2024 = { + "family": { + "total_premium": 21_207.52589669509, + "employee_contribution": 6_490.205059544782, + }, + "self_only": { + "total_premium": 8_389.275834815255, + "employee_contribution": 1_909.5781466113417, + }, +} PE_CPS_UNDOCUMENTED_TARGET = 13e6 PE_CPS_UNDOCUMENTED_WORKERS_TARGET = 8.3e6 PE_CPS_UNDOCUMENTED_STUDENTS_TARGET = 0.21 * 1.9e6 @@ -1461,6 +1498,89 @@ def _process_persons(df: pl.DataFrame, year: int) -> pl.DataFrame: ) if "_spm_capped_housing_subsidy" in result.columns: result = result.drop("_spm_capped_housing_subsidy") + # Unmarried partner of the household head (G8). Mirrors eCPS cps.py:1219 + # `perrp.isin(PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES)`. + if ( + "_person_relationship_to_householder" in result.columns + and "is_unmarried_partner_of_household_head" not in result.columns + ): + result = result.with_columns( + pl.col("_person_relationship_to_householder") + .is_in(PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES) + .alias("is_unmarried_partner_of_household_head") + ).drop("_person_relationship_to_householder") + elif "_person_relationship_to_householder" in result.columns: + result = result.drop("_person_relationship_to_householder") + # Employer-sponsored insurance policyholder + premium (G6). Mirrors eCPS + # cps.py:1576-1581: the policyholder flag is `NOW_OWNGRP == 1`, and the + # premium is `impute_employer_sponsored_insurance_premiums(person)` + # (eCPS cps.py:229-273), reproduced here on the renamed CPS columns. + _esi_source_columns = {"_now_owngrp", "_now_hipaid", "_now_grpftyp"} + if _esi_source_columns.issubset(set(result.columns)): + own_esi = pl.col("_now_owngrp") == ESI_HAS_CURRENT_OWN_COVERAGE + premium_status = pl.col("_now_hipaid") + plan_type = pl.col("_now_grpftyp") + if "reported_owns_employer_sponsored_health_insurance_at_interview" not in ( + result.columns + ): + result = result.with_columns( + own_esi.alias( + "reported_owns_employer_sponsored_health_insurance_at_interview" + ) + ) + if "employer_sponsored_insurance_premiums" not in result.columns: + # Employee-paid premium (PHIP_VAL), clipped at zero like eCPS. + employee_paid = ( + pl.when( + pl.col("health_insurance_premiums_without_medicare_part_b") > 0 + ) + .then(pl.col("health_insurance_premiums_without_medicare_part_b")) + .otherwise(0.0) + if "health_insurance_premiums_without_medicare_part_b" + in result.columns + else pl.lit(0.0) + ) + total_premium = ( + pl.when(plan_type == ESI_SELF_ONLY_PLAN) + .then(ESI_PLAN_PRIORS_2024["self_only"]["total_premium"]) + .otherwise(ESI_PLAN_PRIORS_2024["family"]["total_premium"]) + ) + average_employee_contribution = ( + pl.when(plan_type == ESI_SELF_ONLY_PLAN) + .then(ESI_PLAN_PRIORS_2024["self_only"]["employee_contribution"]) + .otherwise(ESI_PLAN_PRIORS_2024["family"]["employee_contribution"]) + ) + employee_share = ( + pl.when(employee_paid > 0) + .then(employee_paid) + .otherwise(average_employee_contribution) + ) + employer_paid_when_some = (total_premium - employee_share).clip( + lower_bound=0.0 + ) + employer_paid = ( + pl.when(premium_status == ESI_EMPLOYER_PAYS_ALL) + .then(total_premium) + .when(premium_status == ESI_EMPLOYER_PAYS_SOME) + .then(employer_paid_when_some) + .otherwise(0.0) + ) + valid_owner_with_plan = own_esi & plan_type.is_in( + [ESI_FAMILY_PLAN, ESI_SELF_ONLY_PLAN] + ) + result = result.with_columns( + pl.when(valid_owner_with_plan) + .then(employer_paid) + .otherwise(0.0) + .alias("employer_sponsored_insurance_premiums") + ) + result = result.drop( + [c for c in _esi_source_columns if c in result.columns] + ) + else: + result = result.drop( + [c for c in _esi_source_columns if c in result.columns] + ) for value_column in PERSON_ZERO_DEFAULT_VALUE_COLUMNS: if value_column not in result.columns: result = result.with_columns(pl.lit(0.0).alias(value_column)) diff --git a/src/microplex_us/policyengine/us.py b/src/microplex_us/policyengine/us.py index 3183f4e..5fa1657 100644 --- a/src/microplex_us/policyengine/us.py +++ b/src/microplex_us/policyengine/us.py @@ -352,6 +352,20 @@ class PolicyEngineUSVariableMaterializationResult: "farm_rent_income", "has_esi", "has_marketplace_health_coverage", + # CPS-derived employer-sponsored insurance leaves. Mirrors the eCPS ESI + # imputation (policyengine-us-data, unmerged branch max/esi-premiums-cbo): + # the policyholder flag is NOW_OWNGRP == 1 and the premium comes from + # impute_employer_sponsored_insurance_premiums(). The premium leaf is a + # storable pe-us INPUT (no formula in pinned pe-us 1.715.2). The policyholder + # flag is not a released pe-us variable; it is routed through the legacy- + # contract entity map below so it still exports for eCPS column parity. + "employer_sponsored_insurance_premiums", + "reported_owns_employer_sponsored_health_insurance_at_interview", + # Unmarried partner of the household head: ASEC PERRP recode (codes + # 43/44/46/47), mirroring the eCPS perrp.isin(...) recode (policyengine-us- + # data, unmerged branch claude/document-census-tax-id-replacement). Storable + # pe-us INPUT (no formula in pinned pe-us 1.715.2). + "is_unmarried_partner_of_household_head", "health_savings_account_ald", "is_separated", "is_surviving_spouse", @@ -513,6 +527,16 @@ class PolicyEngineUSVariableMaterializationResult: "ssi_reported": 0.0, "ssn_card_type": "CITIZEN", "sstb_self_employment_income_before_lsr": 0, + # SSTB QBI-qualification flag (G9). eCPS never recodes this flag, so its + # export carries the pe-us default (default_value=True). MP exports False + # instead: because MP carries no SSTB self-employment income + # (business_is_sstb=False and sstb_self_employment_income_before_lsr=0 for + # every record), the section 199A SSTB component is zero under either value, + # so the choice is tax-inert and passes the name-only column-parity gate. + # False is chosen for internal consistency with MP's business_is_sstb=False; + # exact value-parity with the eCPS baseline would instead require True. + # Storable pe-us INPUT (no formula in pinned pe-us 1.715.2). + "sstb_self_employment_income_would_be_qualified": False, "sstb_unadjusted_basis_qualified_property": 0.0, "sstb_w2_wages_from_qualified_business": 0.0, "strike_benefits": 0, @@ -602,6 +626,10 @@ class PolicyEngineUSVariableMaterializationResult: "reported_has_chip_health_coverage_at_interview": "person", "reported_has_direct_purchase_health_coverage_at_interview": "person", "reported_has_employer_sponsored_health_coverage_at_interview": "person", + # Real CPS recode (NOW_OWNGRP == 1) carried on the person frame. Not yet a + # released pe-us input variable, so the entity is pinned here (like its + # reported_has_* siblings) to keep it on the eCPS-parity export surface. + "reported_owns_employer_sponsored_health_insurance_at_interview": "person", "reported_has_indian_health_service_coverage_at_interview": "person", "reported_has_marketplace_health_coverage_at_interview": "person", "reported_has_means_tested_health_coverage_at_interview": "person", diff --git a/tests/data_sources/test_cps_employer_insurance_and_partner.py b/tests/data_sources/test_cps_employer_insurance_and_partner.py new file mode 100644 index 0000000..cdbbee6 --- /dev/null +++ b/tests/data_sources/test_cps_employer_insurance_and_partner.py @@ -0,0 +1,314 @@ +"""Tests for the CPS-derived recodes closing the G6/G8/G9 eCPS export gaps. + +The Enhanced CPS exports four person-level leaves that Microplex did not: + +- ``is_unmarried_partner_of_household_head`` -- a recode of the ASEC + relationship-to-householder code ``PERRP``: codes 43/44/46/47 mark an + unmarried partner of the household head. Mirrors the eCPS + ``perrp.isin(PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES)`` recode + (policyengine-us-data, unmerged branch + ``claude/document-census-tax-id-replacement``). + +- ``reported_owns_employer_sponsored_health_insurance_at_interview`` -- the ESI + policyholder flag ``NOW_OWNGRP == 1``. Mirrors the eCPS ESI imputation + (policyengine-us-data, unmerged branch ``max/esi-premiums-cbo``). + +- ``employer_sponsored_insurance_premiums`` -- annual employer-paid ESI premium + imputed from ``NOW_OWNGRP``/``NOW_HIPAID``/``NOW_GRPFTYP``/``PHIP_VAL`` plus + the MEPS-IC 2024 plan-type priors. Reproduces the eCPS + ``impute_employer_sponsored_insurance_premiums`` function (same branch); the + expected values below are that reference function's own outputs. + +- ``sstb_self_employment_income_would_be_qualified`` -- the SSTB QBI + qualification flag. eCPS never recodes this flag, so its export carries the + pe-us default (``default_value=True``). Microplex exports ``False`` instead; + because Microplex carries no SSTB self-employment income + (``business_is_sstb=False`` and ``sstb_self_employment_income_before_lsr=0``), + the section 199A SSTB component is zero under either value, so the choice is + tax-inert and passes the name-only column-parity gate. It is therefore + exported as a constant default rather than a CPS recode. + +The recode tests exercise the real ``_process_persons`` (no stubbing). The +ESI-premium expectations are cross-checked against the eCPS reference +implementation in ``tests/.../test_employer_sponsored_insurance_premiums.py``. +""" + +import math + +import polars as pl + +from microplex_us.data_sources.cps import ( + ESI_PLAN_PRIORS_2024, + PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES, + PERSON_VARIABLES, + _process_persons, +) + +# Raw ASEC fields that seed the new leaves, mapped via PERSON_VARIABLES to the +# underscore-prefixed staging columns consumed inside _process_persons. +_RAW_STAGING_COLUMNS = { + "PERRP": "_person_relationship_to_householder", + "NOW_OWNGRP": "_now_owngrp", + "NOW_HIPAID": "_now_hipaid", + "NOW_GRPFTYP": "_now_grpftyp", +} + +# Leaves produced by the new recodes (G6/G8). G9 is a constant default, not a +# recode, so it is asserted via the export config, not here. +_RECODE_LEAVES = ( + "is_unmarried_partner_of_household_head", + "reported_owns_employer_sponsored_health_insurance_at_interview", + "employer_sponsored_insurance_premiums", +) + + +def _raw_person_frame(rows: list[dict]) -> pl.DataFrame: + """Raw CPS-style person frame carrying the new recode source fields. + + Census column names are used because ``_process_persons`` selects/renames + via ``PERSON_VARIABLES``. + """ + n = len(rows) + return pl.DataFrame( + { + "PH_SEQ": [1] * n, + "A_LINENO": list(range(1, n + 1)), + "A_FNLWGT": [100.0] * n, + "A_AGE": [row.get("age", 40) for row in rows], + "PERRP": [row.get("perrp", 40) for row in rows], + "NOW_OWNGRP": [row.get("owngrp", 0) for row in rows], + "NOW_HIPAID": [row.get("hipaid", 0) for row in rows], + "NOW_GRPFTYP": [row.get("grpftyp", 0) for row in rows], + "PHIP_VAL": [row.get("phip", 0.0) for row in rows], + } + ) + + +# --------------------------------------------------------------------------- +# G8: is_unmarried_partner_of_household_head (PERRP recode) +# --------------------------------------------------------------------------- + + +def test_person_variables_maps_the_new_raw_fields(): + for census, staging in _RAW_STAGING_COLUMNS.items(): + assert PERSON_VARIABLES.get(census) == staging + + +def test_unmarried_partner_codes_match_ecps(): + # eCPS PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES = {43, 44, 46, 47}. + assert set(PERRP_UNMARRIED_PARTNER_OF_HOUSEHOLD_HEAD_CODES) == {43, 44, 46, 47} + + +def test_unmarried_partner_flag_recode(): + rows = [ + {"perrp": 43}, # opposite-sex partner with relatives -> True + {"perrp": 44}, # opposite-sex partner without relatives -> True + {"perrp": 46}, # same-sex partner with relatives -> True + {"perrp": 47}, # same-sex partner without relatives -> True + {"perrp": 40}, # reference person -> False + {"perrp": 1}, # spouse -> False + {"perrp": 45}, # adjacent code, deliberately excluded -> False + ] + result = _process_persons(_raw_person_frame(rows), 2023) + assert "is_unmarried_partner_of_household_head" in result.columns + assert result["is_unmarried_partner_of_household_head"].to_list() == [ + True, + True, + True, + True, + False, + False, + False, + ] + # The raw staging column must not leak into the processed frame. + assert "_person_relationship_to_householder" not in result.columns + + +def test_unmarried_partner_flag_is_boolean_dtype(): + result = _process_persons(_raw_person_frame([{"perrp": 43}, {"perrp": 40}]), 2023) + assert result.schema["is_unmarried_partner_of_household_head"] == pl.Boolean + + +# --------------------------------------------------------------------------- +# G6a: reported_owns_employer_sponsored_health_insurance_at_interview +# --------------------------------------------------------------------------- + + +def test_esi_policyholder_flag_recode(): + rows = [ + {"owngrp": 1}, # holds ESI in own name -> True + {"owngrp": 0}, # does not -> False + {"owngrp": 2}, # any non-1 code -> False + ] + result = _process_persons(_raw_person_frame(rows), 2023) + col = "reported_owns_employer_sponsored_health_insurance_at_interview" + assert col in result.columns + assert result[col].to_list() == [True, False, False] + assert result.schema[col] == pl.Boolean + + +# --------------------------------------------------------------------------- +# G6b: employer_sponsored_insurance_premiums (MEPS-prior imputation) +# --------------------------------------------------------------------------- + + +def test_esi_premium_matches_ecps_reference_fixture(): + """Reproduce the eCPS unit-test fixture exactly. + + Fixture and expectations are lifted from policyengine-us-data + ``tests/unit/test_employer_sponsored_insurance_premiums.py``:: + + NOW_OWNGRP = [1, 1, 1, 0, 1] + NOW_HIPAID = [1, 2, 2, 1, 2] + NOW_GRPFTYP = [2, 2, 1, 2, 1] + PHIP_VAL = [0, 1_200, 0, 0, 50_000] + """ + rows = [ + {"owngrp": 1, "hipaid": 1, "grpftyp": 2, "phip": 0}, + {"owngrp": 1, "hipaid": 2, "grpftyp": 2, "phip": 1_200}, + {"owngrp": 1, "hipaid": 2, "grpftyp": 1, "phip": 0}, + {"owngrp": 0, "hipaid": 1, "grpftyp": 2, "phip": 0}, + {"owngrp": 1, "hipaid": 2, "grpftyp": 2, "phip": 50_000}, + ] + result = _process_persons(_raw_person_frame(rows), 2023) + premiums = result["employer_sponsored_insurance_premiums"].to_list() + + self_only_total = ESI_PLAN_PRIORS_2024["self_only"]["total_premium"] + family_total = ESI_PLAN_PRIORS_2024["family"]["total_premium"] + family_employee = ESI_PLAN_PRIORS_2024["family"]["employee_contribution"] + expected = [ + self_only_total, # employer pays all, self-only plan + self_only_total - 1_200, # employer pays some, self-only, $1.2k employee + family_total - family_employee, # employer pays some, family, avg employee + 0.0, # not an own-name ESI holder + 0.0, # employer pays some but employee paid exceeds total -> clip at 0 + ] + assert len(premiums) == len(expected) + for got, want in zip(premiums, expected): + assert math.isclose(got, want, rel_tol=1e-9, abs_tol=1e-6), (got, want) + + +def test_esi_premium_is_zero_for_non_owners(): + # Even with a paid-premium status and a real plan type, a non-owner + # (NOW_OWNGRP != 1) must get a zero employer premium. + rows = [{"owngrp": 0, "hipaid": 1, "grpftyp": 1, "phip": 0}] + result = _process_persons(_raw_person_frame(rows), 2023) + assert result["employer_sponsored_insurance_premiums"].to_list() == [0.0] + + +def test_esi_premium_zero_when_no_employer_contribution(): + # NOW_HIPAID code other than 1/2 (e.g. 3 = employee pays all) -> no + # employer-paid premium even for a valid owner with a plan. + rows = [{"owngrp": 1, "hipaid": 3, "grpftyp": 1, "phip": 5_000}] + result = _process_persons(_raw_person_frame(rows), 2023) + assert result["employer_sponsored_insurance_premiums"].to_list() == [0.0] + + +def test_esi_staging_columns_are_dropped(): + result = _process_persons(_raw_person_frame([{"owngrp": 1}]), 2023) + for staging in ("_now_owngrp", "_now_hipaid", "_now_grpftyp"): + assert staging not in result.columns + + +def test_esi_premium_priors_match_ecps_meps_constants(): + # MEPS-IC Table IV.A.1 (private sector, 2024) constants, copied from eCPS. + assert math.isclose( + ESI_PLAN_PRIORS_2024["family"]["total_premium"], 21_207.52589669509 + ) + assert math.isclose( + ESI_PLAN_PRIORS_2024["family"]["employee_contribution"], 6_490.205059544782 + ) + assert math.isclose( + ESI_PLAN_PRIORS_2024["self_only"]["total_premium"], 8_389.275834815255 + ) + assert math.isclose( + ESI_PLAN_PRIORS_2024["self_only"]["employee_contribution"], + 1_909.5781466113417, + ) + + +# --------------------------------------------------------------------------- +# Export-config wiring (all four leaves) +# --------------------------------------------------------------------------- + + +def test_recode_leaves_in_export_allowlist_and_not_aliased(): + from microplex_us.policyengine.us import ( + POLICYENGINE_US_EXPORT_COLUMN_ALIASES, + SAFE_POLICYENGINE_US_EXPORT_VARIABLES, + ) + + for leaf in _RECODE_LEAVES: + assert leaf in SAFE_POLICYENGINE_US_EXPORT_VARIABLES, leaf + assert POLICYENGINE_US_EXPORT_COLUMN_ALIASES.get(leaf) is None, leaf + + +def test_esi_policyholder_has_legacy_person_entity(): + # The policyholder flag is not (yet) a released pe-us input variable, so the + # export entity is pinned in the legacy-contract map (person), like its + # reported_has_* siblings, to keep it on the eCPS-parity surface. + from microplex_us.policyengine.us import ( + POLICYENGINE_US_LEGACY_CONTRACT_VARIABLE_ENTITIES, + ) + + assert ( + POLICYENGINE_US_LEGACY_CONTRACT_VARIABLE_ENTITIES.get( + "reported_owns_employer_sponsored_health_insurance_at_interview" + ) + == "person" + ) + + +def test_sstb_qbi_flag_exported_as_constant_false_default(): + # G9: eCPS's np.where(business_is_sstb, ..., False) collapses to False for + # every record given MP's business_is_sstb=False default. Export it as the + # same constant False, overriding the pe-us default_value=True. + from microplex_us.policyengine.us import POLICYENGINE_US_EXPORT_DEFAULTS + + assert ( + POLICYENGINE_US_EXPORT_DEFAULTS["sstb_self_employment_income_would_be_qualified"] + is False + ) + # It must be internally consistent with MP's existing SSTB treatment. + assert POLICYENGINE_US_EXPORT_DEFAULTS["business_is_sstb"] is False + + +def test_all_four_columns_are_required_by_the_ecps_contract(): + # Every column this change adds is a REQUIRED (not forbidden) column in the + # frozen eCPS export-parity contract. + import json + from importlib import resources + + contract = json.loads( + resources.files("microplex_us.pipelines") + .joinpath("ecps_export_contract.json") + .read_text() + ) + required = set(contract["required"]) + forbidden = set(contract["forbidden"]) + for col in ( + "is_unmarried_partner_of_household_head", + "reported_owns_employer_sponsored_health_insurance_at_interview", + "employer_sponsored_insurance_premiums", + "sstb_self_employment_income_would_be_qualified", + ): + assert col in required, col + assert col not in forbidden, col + + +if __name__ == "__main__": + import traceback + + funcs = [v for k, v in sorted(globals().items()) if k.startswith("test_")] + passed = failed = 0 + for fn in funcs: + try: + fn() + print(f"PASS {fn.__name__}") + passed += 1 + except Exception: # noqa: BLE001 + print(f"FAIL {fn.__name__}") + traceback.print_exc() + failed += 1 + print(f"SUMMARY passed={passed} failed={failed}") + raise SystemExit(1 if failed else 0)