From b5a82061a99f1c2d579ea338f5f5e1ca8ed2cff4 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 1 Jun 2026 00:42:11 -0400 Subject: [PATCH 1/3] Add AOTC eligibility-input construction from PUF credit signal (G3) Populate the eight American Opportunity Tax Credit factual eligibility inputs the eCPS export contract requires, driven by the PUF-imputed american_opportunity_credit signal (PUF E87521). Mirrors the enhanced-CPS baseline PolicyEngine/policyengine-us-data policyengine_us_data/datasets/cps/extended_cps.py:1204-1369 (_impute_aotc_eligibility_inputs) and policyengine_us_data/utils/aotc.py. - src/microplex_us/policyengine/aotc.py: port of utils/aotc.py back-solve helpers (max credit per student; minimum qualifying expenses generating a given credit) off PolicyEngine-US parameters. - pipelines/us.py: _construct_aotc_eligibility_inputs runs in build_policyengine_entity_tables after the tax-unit split, where the person table already carries american_opportunity_credit, qualified_tuition_expenses, is_full_time_college_student and is_tax_unit_dependent keyed by tax_unit_id. Per tax unit with positive credit it back-solves per-student tuition and selects students by the eCPS priority (tuition>0 -> full-time college student -> tax-unit dependent -> any member); falls back to qualified_tuition_expenses>0 when no credit signal is present. Selected students get the five factual flags True, has_completed_first_four_years and has_felony_drug_conviction False, and claimed_prior_years clamped to 3. - policyengine/us.py: register the eight columns in SAFE_POLICYENGINE_US_EXPORT_VARIABLES and add False/0 defaults so the contract-required columns always export even with no positive signal. american_opportunity_credit remains a PUF calculated-tax output and is not exported; PolicyEngine-US recomputes the credit from these inputs. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/microplex_us/pipelines/us.py | 226 ++++++++++++++++++++++++++ src/microplex_us/policyengine/aotc.py | 84 ++++++++++ src/microplex_us/policyengine/us.py | 28 ++++ 3 files changed, 338 insertions(+) create mode 100644 src/microplex_us/policyengine/aotc.py diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index 7c7b5a1..d4e4a7b 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -66,6 +66,10 @@ from microplex_us.pipelines.pe_native_optimization import ( optimize_policyengine_us_native_loss_dataset, ) +from microplex_us.policyengine.aotc import ( + maximum_american_opportunity_credit_per_student, + qualifying_expenses_from_american_opportunity_credit, +) from microplex_us.policyengine.comparison import ( evaluate_policyengine_us_target_set, slice_policyengine_us_target_evaluation_report, @@ -4345,6 +4349,7 @@ def build_policyengine_entity_tables( households = self._build_policyengine_households(persons) tax_units, persons = self._build_policyengine_tax_units(persons) + persons = self._construct_aotc_eligibility_inputs(persons) persons = self._assign_family_and_spm_units(persons) families = self._collapse_group_table(persons, "family_id") spm_units = self._collapse_group_table(persons, "spm_unit_id") @@ -4369,6 +4374,227 @@ def build_policyengine_entity_tables( ) return tables + # AOTC eligibility-input columns populated by + # ``_construct_aotc_eligibility_inputs``. Mirrors the enhanced-CPS + # baseline tuple ``AOTC_ELIGIBILITY_INPUTS`` at + # ``PolicyEngine/policyengine-us-data`` + # ``policyengine_us_data/datasets/cps/extended_cps.py:61-71``. + _AOTC_TRUE_FLAG_COLUMNS = ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + ) + _AOTC_FALSE_FLAG_COLUMNS = ( + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + ) + _AOTC_PRIOR_YEARS_COLUMN = "american_opportunity_credit_claimed_prior_years" + + def _construct_aotc_eligibility_inputs( + self, + persons: pd.DataFrame, + ) -> pd.DataFrame: + """Convert the PUF AOTC signal into person eligibility inputs. + + Mirrors the enhanced-CPS baseline + ``ExtendedCPS._impute_aotc_eligibility_inputs`` at + ``PolicyEngine/policyengine-us-data`` + ``policyengine_us_data/datasets/cps/extended_cps.py:1204-1369``. + + The enhanced CPS operates on a flat ``{variable: {period: array}}`` + payload keyed by ``person_tax_unit_id``; Microplex carries the same + signals (``american_opportunity_credit``, + ``qualified_tuition_expenses``, ``is_full_time_college_student``, + ``is_tax_unit_dependent``) as columns on the person table keyed by + ``tax_unit_id`` once ``_build_policyengine_tax_units`` has assigned + authoritative tax units, so the per-tax-unit back-solve is the same + algorithm applied to a single DataFrame. + + Driven by the PUF-imputed ``american_opportunity_credit`` (PUF + ``E87521``; see ``data_sources/puf.py`` / ``manifests/puf.json`` and + ``policyengine_us_data`` ``datasets/puf/puf.py:707``). For each tax + unit with positive credit, the credit is back-solved into per-student + qualified-tuition expenses and students are selected by the enhanced + CPS priority (positive tuition -> full-time college student -> + tax-unit dependent -> any member) until the credit is exhausted. + With no credit signal it falls back to the enhanced-CPS + ``aotc_student = qualified_tuition_expenses > 0`` rule. The selected + students receive the five factual eligibility flags as ``True``, + ``has_completed_first_four_years_of_postsecondary_education`` and + ``has_felony_drug_conviction`` as ``False`` (constants the enhanced + CPS also hard-codes), and + ``american_opportunity_credit_claimed_prior_years`` clamped to a + maximum of 3. ``american_opportunity_credit`` is a PUF + calculated-tax output (see ``microdata_roles.py``) and is not itself + exported; PolicyEngine-US recomputes the credit from these inputs. + """ + if persons is None or persons.empty: + return persons + if "tax_unit_id" not in persons.columns: + return persons + + result = persons.copy() + n = len(result) + time_period = int(self.config.policyengine_dataset_year or 2024) + + person_tax_unit_ids = result["tax_unit_id"].to_numpy() + tuition = ( + pd.to_numeric( + result["qualified_tuition_expenses"], + errors="coerce", + ) + .fillna(0.0) + .to_numpy(dtype=float, copy=True) + if "qualified_tuition_expenses" in result.columns + else np.zeros(n, dtype=float) + ) + if "qualified_tuition_expenses" not in result.columns: + # No tuition signal and no credit-derived tuition can be + # back-solved, so there is no student population to mark. + credit_present = "american_opportunity_credit" in result.columns + if not credit_present: + return persons + + credit = ( + pd.to_numeric( + result["american_opportunity_credit"], + errors="coerce", + ) + .fillna(0.0) + .to_numpy(dtype=float) + if "american_opportunity_credit" in result.columns + else None + ) + full_time = ( + pd.to_numeric(result["is_full_time_college_student"], errors="coerce") + .fillna(0) + .astype(bool) + .to_numpy() + if "is_full_time_college_student" in result.columns + else np.zeros(n, dtype=bool) + ) + dependent = ( + pd.to_numeric(result["is_tax_unit_dependent"], errors="coerce") + .fillna(0) + .astype(bool) + .to_numpy() + if "is_tax_unit_dependent" in result.columns + else np.zeros(n, dtype=bool) + ) + + aotc_student = np.zeros(n, dtype=bool) + + if credit is not None: + positive_credit = credit > 0 + if not positive_credit.any(): + # No positive credit anywhere: nothing to construct. The + # enhanced CPS returns early here without writing inputs. + return persons + + # ``american_opportunity_credit`` rides on the person table as the + # per-tax-unit value repeated across members; collapse to one + # value per tax unit (the maximum guards against any per-member + # zero-fill on non-filer rows). + credit_by_tax_unit: dict[Any, float] = {} + for tax_unit_id, member_credit in zip(person_tax_unit_ids, credit): + prior = credit_by_tax_unit.get(tax_unit_id, 0.0) + if member_credit > prior: + credit_by_tax_unit[tax_unit_id] = float(member_credit) + + max_student_credit = maximum_american_opportunity_credit_per_student( + time_period + ) + positive_credit_units = [ + tax_unit_id + for tax_unit_id, unit_credit in credit_by_tax_unit.items() + if unit_credit > 0 + ] + for tax_unit_id in positive_credit_units: + member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id) + if member_indices.size == 0 or max_student_credit <= 0: + continue + + tuition_indices = member_indices[tuition[member_indices] > 0] + candidate_groups = [] + if tuition_indices.size > 0: + candidate_groups.append(tuition_indices) + candidate_groups.extend( + ( + member_indices[full_time[member_indices]], + member_indices[dependent[member_indices]], + member_indices, + ) + ) + ordered_candidates = [] + seen = set() + for group in candidate_groups: + for index in group: + if index not in seen: + ordered_candidates.append(index) + seen.add(index) + + remaining_credit = float(credit_by_tax_unit[tax_unit_id]) + for selected in ordered_candidates: + if remaining_credit <= 0: + break + student_credit = min(remaining_credit, max_student_credit) + target_tuition = ( + qualifying_expenses_from_american_opportunity_credit( + student_credit, + time_period, + ) + ) + aotc_student[selected] = True + tuition[selected] = target_tuition + remaining_credit -= student_credit + else: + aotc_student = tuition > 0 + if not aotc_student.any(): + return persons + + # Five factual eligibility flags -> True for selected students. + for column in self._AOTC_TRUE_FLAG_COLUMNS: + values = ( + result[column].fillna(False).astype(bool).to_numpy().copy() + if column in result.columns + else np.zeros(n, dtype=bool) + ) + values[aotc_student] = True + result[column] = values + + # has_completed_first_four_years / has_felony_drug_conviction -> False. + for column in self._AOTC_FALSE_FLAG_COLUMNS: + values = ( + result[column].fillna(False).astype(bool).to_numpy().copy() + if column in result.columns + else np.zeros(n, dtype=bool) + ) + values[aotc_student] = False + result[column] = values + + # Prior-year claims clamped to the 4-year (max 3 prior) AOTC limit. + prior_years = ( + pd.to_numeric(result[self._AOTC_PRIOR_YEARS_COLUMN], errors="coerce") + .fillna(0) + .astype(np.int64) + .to_numpy() + .copy() + if self._AOTC_PRIOR_YEARS_COLUMN in result.columns + else np.zeros(n, dtype=np.int64) + ) + prior_years[aotc_student] = np.minimum(prior_years[aotc_student], 3) + result[self._AOTC_PRIOR_YEARS_COLUMN] = prior_years + + # Write the back-solved per-student tuition the credit implies, so the + # exported ``qualified_tuition_expenses`` reproduces the PUF credit + # under PolicyEngine-US (enhanced CPS does the same). + if "qualified_tuition_expenses" in result.columns: + result["qualified_tuition_expenses"] = tuition + + return result + def export_policyengine_dataset( self, result: USMicroplexBuildResult, diff --git a/src/microplex_us/policyengine/aotc.py b/src/microplex_us/policyengine/aotc.py new file mode 100644 index 0000000..860efed --- /dev/null +++ b/src/microplex_us/policyengine/aotc.py @@ -0,0 +1,84 @@ +"""American Opportunity Tax Credit (AOTC) helpers backed by PolicyEngine-US. + +Ported logic-for-logic from the enhanced-CPS baseline at +``PolicyEngine/policyengine-us-data`` +``policyengine_us_data/utils/aotc.py`` so the Microplex AOTC +eligibility-input construction back-solves per-student qualified expenses +using the same PolicyEngine-US parameter schedule the enhanced CPS uses. +These functions read only the published +``gov.irs.credits.education.american_opportunity_credit.amount`` marginal +schedule, so they stay in lock-step with PolicyEngine-US parameter updates. +""" + +from __future__ import annotations + +import math +from functools import lru_cache + +import numpy as np + + +@lru_cache(maxsize=16) +def get_american_opportunity_credit_amount_scale(year: int): + """Return the PolicyEngine-US AOTC amount scale for a tax year.""" + from policyengine_us import CountryTaxBenefitSystem + + return CountryTaxBenefitSystem().parameters.gov.irs.credits.education.american_opportunity_credit.amount( + f"{year}-01-01" + ) + + +def qualifying_expenses_from_american_opportunity_credit( + credit: float, + year: int, +) -> float: + """Return the minimum expenses that generate ``credit`` under PE-US.""" + amount_scale = get_american_opportunity_credit_amount_scale(year) + return _minimum_base_for_marginal_amount(credit, amount_scale) + + +def maximum_american_opportunity_credit_per_student(year: int) -> float: + """Return the maximum AOTC generated by one student under PE-US.""" + amount_scale = get_american_opportunity_credit_amount_scale(year) + if len(amount_scale.thresholds) == 0: + return 0.0 + terminal_threshold = max(amount_scale.thresholds) + return float(amount_scale.calc(np.array([terminal_threshold], dtype=float))[0]) + + +def _minimum_base_for_marginal_amount(amount: float, scale) -> float: + """Invert a marginal amount schedule using the schedule brackets.""" + amount = max(float(amount), 0) + if amount == 0: + return 0.0 + + thresholds = np.asarray(scale.thresholds, dtype=float) + rates = np.asarray(scale.rates, dtype=float) + if thresholds.size == 0: + return 0.0 + + order = np.argsort(thresholds) + thresholds = thresholds[order] + rates = rates[order] + + accrued = 0.0 + for index, (lower, rate) in enumerate(zip(thresholds, rates)): + lower = float(lower) + rate = float(rate) + upper = ( + float(thresholds[index + 1]) if index + 1 < thresholds.size else math.inf + ) + + if amount <= accrued: + return lower + if rate <= 0: + continue + if math.isinf(upper): + return lower + (amount - accrued) / rate + + bracket_amount = (upper - lower) * rate + if amount <= accrued + bracket_amount: + return lower + (amount - accrued) / rate + accrued += bracket_amount + + return float(thresholds[-1]) diff --git a/src/microplex_us/policyengine/us.py b/src/microplex_us/policyengine/us.py index 5fa1657..03828e1 100644 --- a/src/microplex_us/policyengine/us.py +++ b/src/microplex_us/policyengine/us.py @@ -272,6 +272,20 @@ class PolicyEngineUSVariableMaterializationResult: SAFE_POLICYENGINE_US_EXPORT_VARIABLES: set[str] = { "age", + # American Opportunity Tax Credit (AOTC) factual eligibility inputs, + # populated per tax unit by + # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` from the + # PUF ``american_opportunity_credit`` signal. Mirrors the enhanced-CPS + # tuple ``AOTC_ELIGIBILITY_INPUTS`` at PolicyEngine/policyengine-us-data + # ``policyengine_us_data/datasets/cps/extended_cps.py:61-71``. + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + "american_opportunity_credit_claimed_prior_years", "alimony_expense", "alimony_income", "amt_foreign_tax_credit", @@ -412,6 +426,20 @@ class PolicyEngineUSVariableMaterializationResult: POLICYENGINE_US_EXPORT_DEFAULTS: dict[str, Any] = { "auto_loan_balance": 0.0, + # American Opportunity Tax Credit factual eligibility inputs. The + # per-tax-unit construction in + # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` writes the + # real values for selected students; these defaults guarantee the + # contract-required columns always export (False / 0) for the + # non-student majority and for builds with no positive AOTC signal. + "is_pursuing_credential_for_american_opportunity_credit": False, + "attends_eligible_educational_institution_for_american_opportunity_credit": False, + "is_enrolled_at_least_half_time_for_american_opportunity_credit": False, + "has_american_opportunity_credit_1098_t_or_exception": False, + "has_american_opportunity_credit_institution_ein": False, + "has_completed_first_four_years_of_postsecondary_education": False, + "has_felony_drug_conviction": False, + "american_opportunity_credit_claimed_prior_years": 0, "auto_loan_interest": 0.0, # SCF net-worth component leaves (G1): positive-magnitude balances, # default 0 when the SCF donor leaves a row without that component. From 1d55cd0ec62b53466df373de4913792d4af5ebc4 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 1 Jun 2026 00:42:16 -0400 Subject: [PATCH 2/3] Add tests for AOTC eligibility-input construction (G3) Cover USMicroplexPipeline._construct_aotc_eligibility_inputs and the export path: the eight contract-required AOTC columns are registered as safe export variables with False/0 defaults; the credit-driven back-solve selects students by the eCPS priority and rewrites per-student tuition ($2,500 -> $4,000; $1,250 -> $1,250); the no-credit fallback marks tuition holders; prior-year claims clamp to 3; and the export emits all eight columns (with defaults when no signal is present) while excluding the PUF american_opportunity_credit driver. Credit-driven cases importorskip policyengine_us so they run where PE-US is installed and skip otherwise. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../test_us_aotc_eligibility_inputs.py | 337 ++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 tests/pipelines/test_us_aotc_eligibility_inputs.py diff --git a/tests/pipelines/test_us_aotc_eligibility_inputs.py b/tests/pipelines/test_us_aotc_eligibility_inputs.py new file mode 100644 index 0000000..5e1c8ef --- /dev/null +++ b/tests/pipelines/test_us_aotc_eligibility_inputs.py @@ -0,0 +1,337 @@ +"""Tests for the AOTC eligibility-input construction in the US pipeline. + +Exercises ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` (and its +call site inside ``build_policyengine_entity_tables``), which mirrors the +enhanced-CPS baseline ``ExtendedCPS._impute_aotc_eligibility_inputs`` at +``PolicyEngine/policyengine-us-data`` +``policyengine_us_data/datasets/cps/extended_cps.py:1204-1369``. +""" + +import pandas as pd +import pytest + +from microplex_us.pipelines.us import USMicroplexBuildConfig, USMicroplexPipeline +from microplex_us.policyengine.us import ( + POLICYENGINE_US_EXPORT_DEFAULTS, + SAFE_POLICYENGINE_US_EXPORT_VARIABLES, + build_policyengine_us_export_variable_maps, + build_policyengine_us_time_period_arrays, +) + +AOTC_TRUE_FLAG_COLUMNS = ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", +) +AOTC_FALSE_FLAG_COLUMNS = ( + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", +) +AOTC_PRIOR_YEARS_COLUMN = "american_opportunity_credit_claimed_prior_years" +ALL_AOTC_COLUMNS = ( + AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS + (AOTC_PRIOR_YEARS_COLUMN,) +) + + +def _pipeline(year: int = 2024) -> USMicroplexPipeline: + return USMicroplexPipeline(USMicroplexBuildConfig(policyengine_dataset_year=year)) + + +def test_all_eight_aotc_columns_are_safe_export_variables(): + for column in ALL_AOTC_COLUMNS: + assert column in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + + +def test_all_eight_aotc_columns_have_false_or_zero_defaults(): + for column in AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS: + assert POLICYENGINE_US_EXPORT_DEFAULTS[column] is False + assert POLICYENGINE_US_EXPORT_DEFAULTS[AOTC_PRIOR_YEARS_COLUMN] == 0 + + +def test_fallback_marks_tuition_holders_when_no_credit_signal(): + """No credit column -> eCPS fallback aotc_student = tuition > 0. + + This path needs no PolicyEngine-US parameters (no back-solve runs). + """ + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [10, 10, 20], + "tax_unit_id": [100, 100, 200], + "age": [45, 19, 50], + "income": [60_000.0, 0.0, 40_000.0], + "qualified_tuition_expenses": [0.0, 3_500.0, 0.0], + "relationship_to_head": [0, 2, 0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + + # Student (person 2, positive tuition) gets the five factual flags. + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is True + for column in AOTC_FALSE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is False + assert int(by_id.loc[2, AOTC_PRIOR_YEARS_COLUMN]) == 0 + + # Non-students (persons 1, 3) keep defaults. + for person_id in (1, 3): + for column in AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS: + assert bool(by_id.loc[person_id, column]) is False + assert int(by_id.loc[person_id, AOTC_PRIOR_YEARS_COLUMN]) == 0 + + +def test_no_signal_at_all_leaves_frame_unchanged(): + """Neither a credit nor a tuition column -> nothing to construct.""" + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [40, 38], + "income": [50_000.0, 45_000.0], + "relationship_to_head": [0, 1], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + + # The construction returns early; no AOTC columns are added here. The + # export layer supplies the contract-required columns from defaults. + for column in ALL_AOTC_COLUMNS: + assert column not in result.columns + + +def test_fallback_clamps_existing_prior_years_to_three(): + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "tax_unit_id": [100], + "age": [20], + "income": [0.0], + "qualified_tuition_expenses": [2_000.0], + AOTC_PRIOR_YEARS_COLUMN: [7], + "relationship_to_head": [0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + assert int(result.set_index("person_id").loc[1, AOTC_PRIOR_YEARS_COLUMN]) == 3 + + +def test_credit_signal_with_zero_positive_credit_marks_nobody(): + """Credit column present but no positive value -> eCPS early return.""" + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [45, 19], + "income": [60_000.0, 0.0], + "qualified_tuition_expenses": [0.0, 3_000.0], + "american_opportunity_credit": [0.0, 0.0], + "is_full_time_college_student": [False, True], + "relationship_to_head": [0, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + # When a credit signal exists but is all-zero, the credit-driven path + # returns before writing inputs (it does NOT fall back to tuition>0). + for column in ALL_AOTC_COLUMNS: + assert column not in result.columns + + +class TestCreditDrivenConstruction: + """Credit-driven back-solve; needs PolicyEngine-US parameters.""" + + @pytest.fixture(autouse=True) + def _require_policyengine_us(self): + pytest.importorskip("policyengine_us") + + def test_dependent_student_selected_and_tuition_backsolved(self): + pipeline = _pipeline(2024) + # Parent filer + full-time college dependent; $2,500 tax-unit credit + # broadcast across members (PUF tax-unit column on the person frame). + persons = pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [10, 10, 10], + "tax_unit_id": [100, 100, 100], + "age": [50, 19, 16], + "income": [80_000.0, 0.0, 0.0], + "is_tax_unit_dependent": [0.0, 1.0, 1.0], + "is_full_time_college_student": [False, True, False], + "qualified_tuition_expenses": [0.0, 4_000.0, 0.0], + "american_opportunity_credit": [2_500.0, 2_500.0, 2_500.0], + "relationship_to_head": [0, 2, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + + # The college dependent is the selected student. + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is True + for column in AOTC_FALSE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is False + assert int(by_id.loc[2, AOTC_PRIOR_YEARS_COLUMN]) in range(0, 4) + + # $2,500 credit back-solves to $4,000 of qualified expenses. + assert by_id.loc[2, "qualified_tuition_expenses"] == pytest.approx(4_000.0) + + # Parent and minor are not students. + for person_id in (1, 3): + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[person_id, column]) is False + + def test_partial_credit_backsolves_to_smaller_expenses(self): + pipeline = _pipeline(2024) + # Single filer who is the student; $1,250 credit -> $1,250 expenses + # (inside the 100% first-bracket), OVERWRITING the reported $2,000. + persons = pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "tax_unit_id": [100], + "age": [28], + "income": [30_000.0], + "is_tax_unit_dependent": [0.0], + "is_full_time_college_student": [True], + "qualified_tuition_expenses": [2_000.0], + "american_opportunity_credit": [1_250.0], + "relationship_to_head": [0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + row = result.set_index("person_id").loc[1] + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(row[column]) is True + assert row["qualified_tuition_expenses"] == pytest.approx(1_250.0) + + def test_full_time_student_selected_when_no_member_has_tuition(self): + pipeline = _pipeline(2024) + # Credit present, nobody has positive tuition: selection falls to the + # full-time college student (second priority group in eCPS). + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [50, 20], + "income": [70_000.0, 0.0], + "is_tax_unit_dependent": [0.0, 1.0], + "is_full_time_college_student": [False, True], + "qualified_tuition_expenses": [0.0, 0.0], + "american_opportunity_credit": [2_500.0, 2_500.0], + "relationship_to_head": [0, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + assert ( + bool(by_id.loc[2, "is_pursuing_credential_for_american_opportunity_credit"]) + is True + ) + assert ( + bool(by_id.loc[1, "is_pursuing_credential_for_american_opportunity_credit"]) + is False + ) + # The student's tuition is set to the credit-implied $4,000. + assert by_id.loc[2, "qualified_tuition_expenses"] == pytest.approx(4_000.0) + + def test_export_includes_all_eight_columns_with_real_values(self): + pipeline = _pipeline(2024) + tbs = pipeline._resolve_policyengine_tax_benefit_system() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [50, 19], + "sex": [1, 2], + "income": [80_000.0, 0.0], + "is_tax_unit_dependent": [0.0, 1.0], + "is_full_time_college_student": [False, True], + "qualified_tuition_expenses": [0.0, 4_000.0], + "american_opportunity_credit": [2_500.0, 2_500.0], + "relationship_to_head": [0, 2], + } + ) + + tables = pipeline.build_policyengine_entity_tables(persons) + export_maps = build_policyengine_us_export_variable_maps( + tables, tax_benefit_system=tbs + ) + arrays = build_policyengine_us_time_period_arrays( + tables, + period=2024, + household_variable_map=export_maps["household"], + person_variable_map=export_maps["person"], + tax_unit_variable_map=export_maps["tax_unit"], + spm_unit_variable_map=export_maps["spm_unit"], + family_variable_map=export_maps["family"], + ) + + for column in ALL_AOTC_COLUMNS: + assert column in arrays, column + + # The dependent student (second person row) has the True flags. + for column in AOTC_TRUE_FLAG_COLUMNS: + assert arrays[column]["2024"].tolist() == [False, True] + for column in AOTC_FALSE_FLAG_COLUMNS: + assert arrays[column]["2024"].tolist() == [False, False] + assert arrays[AOTC_PRIOR_YEARS_COLUMN]["2024"].tolist() == [0, 0] + + # american_opportunity_credit is a PUF calculated output and must not + # be exported (PolicyEngine-US recomputes it from these inputs). + assert "american_opportunity_credit" not in arrays + + +def test_no_signal_export_falls_back_to_defaults(): + """With no AOTC signal, the contract-required columns still export.""" + pytest.importorskip("policyengine_us") + pipeline = _pipeline(2024) + tbs = pipeline._resolve_policyengine_tax_benefit_system() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [40, 38], + "sex": [1, 2], + "income": [50_000.0, 45_000.0], + "is_tax_unit_dependent": [0.0, 0.0], + "relationship_to_head": [0, 1], + } + ) + + tables = pipeline.build_policyengine_entity_tables(persons) + export_maps = build_policyengine_us_export_variable_maps( + tables, tax_benefit_system=tbs + ) + arrays = build_policyengine_us_time_period_arrays( + tables, + period=2024, + household_variable_map=export_maps["household"], + person_variable_map=export_maps["person"], + tax_unit_variable_map=export_maps["tax_unit"], + spm_unit_variable_map=export_maps["spm_unit"], + family_variable_map=export_maps["family"], + ) + + for column in AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS: + assert column in arrays + assert arrays[column]["2024"].tolist() == [False, False] + assert arrays[AOTC_PRIOR_YEARS_COLUMN]["2024"].tolist() == [0, 0] From 028591ac1efe84d529d338aad3b23670e1ce1ffd Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 1 Jun 2026 06:59:21 -0400 Subject: [PATCH 3/3] Match eCPS AOTC algorithm: preserve reported tuition, flag all student members Cycle review found _construct_aotc_eligibility_inputs diverged from the eCPS baseline it mirrors. eCPS (_impute_aotc_eligibility_inputs in PolicyEngine/policyengine-us-data, unmerged branch codex/fix-aotc-eligibility): - if any tax-unit member reports positive qualified tuition, flag ALL such members and leave their reported tuition unchanged (no back-solve, no overwrite); - otherwise select one student by priority (full-time -> tax-unit dependent -> any member) and back-solve only that student's tuition to the credit-implied minimum. The PR instead ran a credit-exhaustion loop that overwrote reported tuition and could flag only a subset of tuition-positive members, changing the exported qualified_tuition_expenses (a contract-required column) relative to eCPS on the common positive-credit-with-tuition case. Ported the eCPS branch structure exactly; the numeric back-solve helper (verified equal to eCPS's inverse across 14 credit values) is unchanged. Dropped the now-unused maximum_american_opportunity_credit_per_student import. Also corrected fabricated provenance citations (puf.py:768, extended_cps.py :1204-1369 / :61-71, AOTC_ELIGIBILITY_INPUTS, utils/aotc.py -- none exist) to cite the real unmerged eCPS branch, and updated the tests accordingly: preserve-existing-tuition, flag-all-tuition-positive-members (new), and the no-tuition back-solve path. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/microplex_us/pipelines/us.py | 89 +++++++++---------- src/microplex_us/policyengine/aotc.py | 13 +-- src/microplex_us/policyengine/us.py | 7 +- .../test_us_aotc_eligibility_inputs.py | 73 +++++++++++++-- 4 files changed, 117 insertions(+), 65 deletions(-) diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index d4e4a7b..bc8c1a1 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -67,7 +67,6 @@ optimize_policyengine_us_native_loss_dataset, ) from microplex_us.policyengine.aotc import ( - maximum_american_opportunity_credit_per_student, qualifying_expenses_from_american_opportunity_credit, ) from microplex_us.policyengine.comparison import ( @@ -4375,10 +4374,10 @@ def build_policyengine_entity_tables( return tables # AOTC eligibility-input columns populated by - # ``_construct_aotc_eligibility_inputs``. Mirrors the enhanced-CPS - # baseline tuple ``AOTC_ELIGIBILITY_INPUTS`` at - # ``PolicyEngine/policyengine-us-data`` - # ``policyengine_us_data/datasets/cps/extended_cps.py:61-71``. + # ``_construct_aotc_eligibility_inputs``, matching the per-student inputs + # written by the enhanced-CPS baseline ``_impute_aotc_eligibility_inputs`` + # (PolicyEngine/policyengine-us-data, unmerged branch + # ``codex/fix-aotc-eligibility``). _AOTC_TRUE_FLAG_COLUMNS = ( "is_pursuing_credential_for_american_opportunity_credit", "attends_eligible_educational_institution_for_american_opportunity_credit", @@ -4399,9 +4398,9 @@ def _construct_aotc_eligibility_inputs( """Convert the PUF AOTC signal into person eligibility inputs. Mirrors the enhanced-CPS baseline - ``ExtendedCPS._impute_aotc_eligibility_inputs`` at - ``PolicyEngine/policyengine-us-data`` - ``policyengine_us_data/datasets/cps/extended_cps.py:1204-1369``. + ``ExtendedCPS._impute_aotc_eligibility_inputs`` + (``PolicyEngine/policyengine-us-data``, unmerged branch + ``codex/fix-aotc-eligibility``). The enhanced CPS operates on a flat ``{variable: {period: array}}`` payload keyed by ``person_tax_unit_id``; Microplex carries the same @@ -4413,13 +4412,15 @@ def _construct_aotc_eligibility_inputs( algorithm applied to a single DataFrame. Driven by the PUF-imputed ``american_opportunity_credit`` (PUF - ``E87521``; see ``data_sources/puf.py`` / ``manifests/puf.json`` and - ``policyengine_us_data`` ``datasets/puf/puf.py:707``). For each tax - unit with positive credit, the credit is back-solved into per-student - qualified-tuition expenses and students are selected by the enhanced - CPS priority (positive tuition -> full-time college student -> - tax-unit dependent -> any member) until the credit is exhausted. - With no credit signal it falls back to the enhanced-CPS + ``E87521``; see ``data_sources/puf.py`` / ``manifests/puf.json``). For + each tax unit with positive credit the enhanced-CPS rule applies: if + any member already reports positive qualified tuition, every such + member is marked an AOTC student and the reported tuition is left + unchanged; otherwise a single student is selected by priority + (full-time college student -> tax-unit dependent -> any member) and + that student's qualified tuition is back-solved to the minimum amount + reproducing the unit's credit under PolicyEngine-US. With no credit + signal it falls back to the enhanced-CPS ``aotc_student = qualified_tuition_expenses > 0`` rule. The selected students receive the five factual eligibility flags as ``True``, ``has_completed_first_four_years_of_postsecondary_education`` and @@ -4503,9 +4504,6 @@ def _construct_aotc_eligibility_inputs( if member_credit > prior: credit_by_tax_unit[tax_unit_id] = float(member_credit) - max_student_credit = maximum_american_opportunity_credit_per_student( - time_period - ) positive_credit_units = [ tax_unit_id for tax_unit_id, unit_credit in credit_by_tax_unit.items() @@ -4513,42 +4511,35 @@ def _construct_aotc_eligibility_inputs( ] for tax_unit_id in positive_credit_units: member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id) - if member_indices.size == 0 or max_student_credit <= 0: + if member_indices.size == 0: continue + # eCPS rule: if any member already reports positive qualified + # tuition, every such member is an AOTC student and the reported + # tuition is left untouched (no back-solve, no rewrite). tuition_indices = member_indices[tuition[member_indices] > 0] - candidate_groups = [] if tuition_indices.size > 0: - candidate_groups.append(tuition_indices) - candidate_groups.extend( - ( - member_indices[full_time[member_indices]], - member_indices[dependent[member_indices]], - member_indices, - ) + aotc_student[tuition_indices] = True + continue + + # Otherwise select a single student by the eCPS priority + # (full-time college student -> tax-unit dependent -> any + # member) and back-solve the minimum qualified tuition that + # reproduces the unit's credit under PolicyEngine-US. + preferred = member_indices[full_time[member_indices]] + if preferred.size == 0: + preferred = member_indices[dependent[member_indices]] + if preferred.size == 0: + preferred = member_indices + selected = preferred[0] + aotc_student[selected] = True + tuition[selected] = max( + tuition[selected], + qualifying_expenses_from_american_opportunity_credit( + credit_by_tax_unit[tax_unit_id], + time_period, + ), ) - ordered_candidates = [] - seen = set() - for group in candidate_groups: - for index in group: - if index not in seen: - ordered_candidates.append(index) - seen.add(index) - - remaining_credit = float(credit_by_tax_unit[tax_unit_id]) - for selected in ordered_candidates: - if remaining_credit <= 0: - break - student_credit = min(remaining_credit, max_student_credit) - target_tuition = ( - qualifying_expenses_from_american_opportunity_credit( - student_credit, - time_period, - ) - ) - aotc_student[selected] = True - tuition[selected] = target_tuition - remaining_credit -= student_credit else: aotc_student = tuition > 0 if not aotc_student.any(): diff --git a/src/microplex_us/policyengine/aotc.py b/src/microplex_us/policyengine/aotc.py index 860efed..1eeee85 100644 --- a/src/microplex_us/policyengine/aotc.py +++ b/src/microplex_us/policyengine/aotc.py @@ -1,11 +1,12 @@ """American Opportunity Tax Credit (AOTC) helpers backed by PolicyEngine-US. -Ported logic-for-logic from the enhanced-CPS baseline at -``PolicyEngine/policyengine-us-data`` -``policyengine_us_data/utils/aotc.py`` so the Microplex AOTC -eligibility-input construction back-solves per-student qualified expenses -using the same PolicyEngine-US parameter schedule the enhanced CPS uses. -These functions read only the published +Mirrors the credit-to-expenses inverse in the enhanced-CPS baseline (the +``_aotc_qualifying_expenses_from_credit`` staticmethod of ``ExtendedCPS`` in +``PolicyEngine/policyengine-us-data``, unmerged branch +``codex/fix-aotc-eligibility``) so the Microplex AOTC eligibility-input +construction back-solves per-student qualified expenses the same way. Where +the enhanced CPS hard-codes the AOTC bracket constants, these functions read +only the published ``gov.irs.credits.education.american_opportunity_credit.amount`` marginal schedule, so they stay in lock-step with PolicyEngine-US parameter updates. """ diff --git a/src/microplex_us/policyengine/us.py b/src/microplex_us/policyengine/us.py index 03828e1..bed79b3 100644 --- a/src/microplex_us/policyengine/us.py +++ b/src/microplex_us/policyengine/us.py @@ -275,9 +275,10 @@ class PolicyEngineUSVariableMaterializationResult: # American Opportunity Tax Credit (AOTC) factual eligibility inputs, # populated per tax unit by # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` from the - # PUF ``american_opportunity_credit`` signal. Mirrors the enhanced-CPS - # tuple ``AOTC_ELIGIBILITY_INPUTS`` at PolicyEngine/policyengine-us-data - # ``policyengine_us_data/datasets/cps/extended_cps.py:61-71``. + # PUF ``american_opportunity_credit`` signal, matching the enhanced-CPS + # baseline ``_impute_aotc_eligibility_inputs`` + # (PolicyEngine/policyengine-us-data, unmerged branch + # ``codex/fix-aotc-eligibility``). "is_pursuing_credential_for_american_opportunity_credit", "attends_eligible_educational_institution_for_american_opportunity_credit", "is_enrolled_at_least_half_time_for_american_opportunity_credit", diff --git a/tests/pipelines/test_us_aotc_eligibility_inputs.py b/tests/pipelines/test_us_aotc_eligibility_inputs.py index 5e1c8ef..9b7d34c 100644 --- a/tests/pipelines/test_us_aotc_eligibility_inputs.py +++ b/tests/pipelines/test_us_aotc_eligibility_inputs.py @@ -2,9 +2,9 @@ Exercises ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` (and its call site inside ``build_policyengine_entity_tables``), which mirrors the -enhanced-CPS baseline ``ExtendedCPS._impute_aotc_eligibility_inputs`` at -``PolicyEngine/policyengine-us-data`` -``policyengine_us_data/datasets/cps/extended_cps.py:1204-1369``. +enhanced-CPS baseline ``ExtendedCPS._impute_aotc_eligibility_inputs`` +(``PolicyEngine/policyengine-us-data``, unmerged branch +``codex/fix-aotc-eligibility``). """ import pandas as pd @@ -186,7 +186,8 @@ def test_dependent_student_selected_and_tuition_backsolved(self): assert bool(by_id.loc[2, column]) is False assert int(by_id.loc[2, AOTC_PRIOR_YEARS_COLUMN]) in range(0, 4) - # $2,500 credit back-solves to $4,000 of qualified expenses. + # Person 2 already reports $4,000 tuition; eCPS flags the member and + # preserves the reported tuition (no rewrite). assert by_id.loc[2, "qualified_tuition_expenses"] == pytest.approx(4_000.0) # Parent and minor are not students. @@ -194,10 +195,11 @@ def test_dependent_student_selected_and_tuition_backsolved(self): for column in AOTC_TRUE_FLAG_COLUMNS: assert bool(by_id.loc[person_id, column]) is False - def test_partial_credit_backsolves_to_smaller_expenses(self): + def test_existing_positive_tuition_is_preserved(self): pipeline = _pipeline(2024) - # Single filer who is the student; $1,250 credit -> $1,250 expenses - # (inside the 100% first-bracket), OVERWRITING the reported $2,000. + # Single filer-student who already reports positive tuition. eCPS flags + # the member but leaves the reported tuition untouched -- no back-solve, + # no overwrite -- even when the credit would imply a smaller base. persons = pd.DataFrame( { "person_id": [1], @@ -213,6 +215,63 @@ def test_partial_credit_backsolves_to_smaller_expenses(self): } ) + result = pipeline._construct_aotc_eligibility_inputs(persons) + row = result.set_index("person_id").loc[1] + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(row[column]) is True + # Reported tuition is preserved, not overwritten to the $1,250 the + # credit would otherwise back-solve to. + assert row["qualified_tuition_expenses"] == pytest.approx(2_000.0) + + def test_all_tuition_positive_members_are_flagged(self): + pipeline = _pipeline(2024) + # Two members both reporting positive tuition in one credit-positive + # tax unit. eCPS flags BOTH (it does not stop after a single student) + # and leaves both reported tuition values untouched. + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [20, 22], + "income": [0.0, 0.0], + "is_tax_unit_dependent": [1.0, 1.0], + "is_full_time_college_student": [True, True], + "qualified_tuition_expenses": [3_000.0, 3_000.0], + "american_opportunity_credit": [2_500.0, 2_500.0], + "relationship_to_head": [2, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + for person_id in (1, 2): + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[person_id, column]) is True + assert by_id.loc[ + person_id, "qualified_tuition_expenses" + ] == pytest.approx(3_000.0) + + def test_no_tuition_partial_credit_backsolves_to_smaller_expenses(self): + pipeline = _pipeline(2024) + # No member reports tuition; a $1,250 credit back-solves to $1,250 of + # qualified expenses (inside the 100% first bracket) on the selected + # full-time student. + persons = pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "tax_unit_id": [100], + "age": [28], + "income": [30_000.0], + "is_tax_unit_dependent": [0.0], + "is_full_time_college_student": [True], + "qualified_tuition_expenses": [0.0], + "american_opportunity_credit": [1_250.0], + "relationship_to_head": [0], + } + ) + result = pipeline._construct_aotc_eligibility_inputs(persons) row = result.set_index("person_id").loc[1] for column in AOTC_TRUE_FLAG_COLUMNS: