diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index 7c7b5a1..bc8c1a1 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -66,6 +66,9 @@ from microplex_us.pipelines.pe_native_optimization import ( optimize_policyengine_us_native_loss_dataset, ) +from microplex_us.policyengine.aotc import ( + qualifying_expenses_from_american_opportunity_credit, +) from microplex_us.policyengine.comparison import ( evaluate_policyengine_us_target_set, slice_policyengine_us_target_evaluation_report, @@ -4345,6 +4348,7 @@ def build_policyengine_entity_tables( households = self._build_policyengine_households(persons) tax_units, persons = self._build_policyengine_tax_units(persons) + persons = self._construct_aotc_eligibility_inputs(persons) persons = self._assign_family_and_spm_units(persons) families = self._collapse_group_table(persons, "family_id") spm_units = self._collapse_group_table(persons, "spm_unit_id") @@ -4369,6 +4373,219 @@ def build_policyengine_entity_tables( ) return tables + # AOTC eligibility-input columns populated by + # ``_construct_aotc_eligibility_inputs``, matching the per-student inputs + # written by the enhanced-CPS baseline ``_impute_aotc_eligibility_inputs`` + # (PolicyEngine/policyengine-us-data, unmerged branch + # ``codex/fix-aotc-eligibility``). + _AOTC_TRUE_FLAG_COLUMNS = ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + ) + _AOTC_FALSE_FLAG_COLUMNS = ( + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + ) + _AOTC_PRIOR_YEARS_COLUMN = "american_opportunity_credit_claimed_prior_years" + + def _construct_aotc_eligibility_inputs( + self, + persons: pd.DataFrame, + ) -> pd.DataFrame: + """Convert the PUF AOTC signal into person eligibility inputs. + + Mirrors the enhanced-CPS baseline + ``ExtendedCPS._impute_aotc_eligibility_inputs`` + (``PolicyEngine/policyengine-us-data``, unmerged branch + ``codex/fix-aotc-eligibility``). + + The enhanced CPS operates on a flat ``{variable: {period: array}}`` + payload keyed by ``person_tax_unit_id``; Microplex carries the same + signals (``american_opportunity_credit``, + ``qualified_tuition_expenses``, ``is_full_time_college_student``, + ``is_tax_unit_dependent``) as columns on the person table keyed by + ``tax_unit_id`` once ``_build_policyengine_tax_units`` has assigned + authoritative tax units, so the per-tax-unit back-solve is the same + algorithm applied to a single DataFrame. + + Driven by the PUF-imputed ``american_opportunity_credit`` (PUF + ``E87521``; see ``data_sources/puf.py`` / ``manifests/puf.json``). For + each tax unit with positive credit the enhanced-CPS rule applies: if + any member already reports positive qualified tuition, every such + member is marked an AOTC student and the reported tuition is left + unchanged; otherwise a single student is selected by priority + (full-time college student -> tax-unit dependent -> any member) and + that student's qualified tuition is back-solved to the minimum amount + reproducing the unit's credit under PolicyEngine-US. With no credit + signal it falls back to the enhanced-CPS + ``aotc_student = qualified_tuition_expenses > 0`` rule. The selected + students receive the five factual eligibility flags as ``True``, + ``has_completed_first_four_years_of_postsecondary_education`` and + ``has_felony_drug_conviction`` as ``False`` (constants the enhanced + CPS also hard-codes), and + ``american_opportunity_credit_claimed_prior_years`` clamped to a + maximum of 3. ``american_opportunity_credit`` is a PUF + calculated-tax output (see ``microdata_roles.py``) and is not itself + exported; PolicyEngine-US recomputes the credit from these inputs. + """ + if persons is None or persons.empty: + return persons + if "tax_unit_id" not in persons.columns: + return persons + + result = persons.copy() + n = len(result) + time_period = int(self.config.policyengine_dataset_year or 2024) + + person_tax_unit_ids = result["tax_unit_id"].to_numpy() + tuition = ( + pd.to_numeric( + result["qualified_tuition_expenses"], + errors="coerce", + ) + .fillna(0.0) + .to_numpy(dtype=float, copy=True) + if "qualified_tuition_expenses" in result.columns + else np.zeros(n, dtype=float) + ) + if "qualified_tuition_expenses" not in result.columns: + # No tuition signal and no credit-derived tuition can be + # back-solved, so there is no student population to mark. + credit_present = "american_opportunity_credit" in result.columns + if not credit_present: + return persons + + credit = ( + pd.to_numeric( + result["american_opportunity_credit"], + errors="coerce", + ) + .fillna(0.0) + .to_numpy(dtype=float) + if "american_opportunity_credit" in result.columns + else None + ) + full_time = ( + pd.to_numeric(result["is_full_time_college_student"], errors="coerce") + .fillna(0) + .astype(bool) + .to_numpy() + if "is_full_time_college_student" in result.columns + else np.zeros(n, dtype=bool) + ) + dependent = ( + pd.to_numeric(result["is_tax_unit_dependent"], errors="coerce") + .fillna(0) + .astype(bool) + .to_numpy() + if "is_tax_unit_dependent" in result.columns + else np.zeros(n, dtype=bool) + ) + + aotc_student = np.zeros(n, dtype=bool) + + if credit is not None: + positive_credit = credit > 0 + if not positive_credit.any(): + # No positive credit anywhere: nothing to construct. The + # enhanced CPS returns early here without writing inputs. + return persons + + # ``american_opportunity_credit`` rides on the person table as the + # per-tax-unit value repeated across members; collapse to one + # value per tax unit (the maximum guards against any per-member + # zero-fill on non-filer rows). + credit_by_tax_unit: dict[Any, float] = {} + for tax_unit_id, member_credit in zip(person_tax_unit_ids, credit): + prior = credit_by_tax_unit.get(tax_unit_id, 0.0) + if member_credit > prior: + credit_by_tax_unit[tax_unit_id] = float(member_credit) + + positive_credit_units = [ + tax_unit_id + for tax_unit_id, unit_credit in credit_by_tax_unit.items() + if unit_credit > 0 + ] + for tax_unit_id in positive_credit_units: + member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id) + if member_indices.size == 0: + continue + + # eCPS rule: if any member already reports positive qualified + # tuition, every such member is an AOTC student and the reported + # tuition is left untouched (no back-solve, no rewrite). + tuition_indices = member_indices[tuition[member_indices] > 0] + if tuition_indices.size > 0: + aotc_student[tuition_indices] = True + continue + + # Otherwise select a single student by the eCPS priority + # (full-time college student -> tax-unit dependent -> any + # member) and back-solve the minimum qualified tuition that + # reproduces the unit's credit under PolicyEngine-US. + preferred = member_indices[full_time[member_indices]] + if preferred.size == 0: + preferred = member_indices[dependent[member_indices]] + if preferred.size == 0: + preferred = member_indices + selected = preferred[0] + aotc_student[selected] = True + tuition[selected] = max( + tuition[selected], + qualifying_expenses_from_american_opportunity_credit( + credit_by_tax_unit[tax_unit_id], + time_period, + ), + ) + else: + aotc_student = tuition > 0 + if not aotc_student.any(): + return persons + + # Five factual eligibility flags -> True for selected students. + for column in self._AOTC_TRUE_FLAG_COLUMNS: + values = ( + result[column].fillna(False).astype(bool).to_numpy().copy() + if column in result.columns + else np.zeros(n, dtype=bool) + ) + values[aotc_student] = True + result[column] = values + + # has_completed_first_four_years / has_felony_drug_conviction -> False. + for column in self._AOTC_FALSE_FLAG_COLUMNS: + values = ( + result[column].fillna(False).astype(bool).to_numpy().copy() + if column in result.columns + else np.zeros(n, dtype=bool) + ) + values[aotc_student] = False + result[column] = values + + # Prior-year claims clamped to the 4-year (max 3 prior) AOTC limit. + prior_years = ( + pd.to_numeric(result[self._AOTC_PRIOR_YEARS_COLUMN], errors="coerce") + .fillna(0) + .astype(np.int64) + .to_numpy() + .copy() + if self._AOTC_PRIOR_YEARS_COLUMN in result.columns + else np.zeros(n, dtype=np.int64) + ) + prior_years[aotc_student] = np.minimum(prior_years[aotc_student], 3) + result[self._AOTC_PRIOR_YEARS_COLUMN] = prior_years + + # Write the back-solved per-student tuition the credit implies, so the + # exported ``qualified_tuition_expenses`` reproduces the PUF credit + # under PolicyEngine-US (enhanced CPS does the same). + if "qualified_tuition_expenses" in result.columns: + result["qualified_tuition_expenses"] = tuition + + return result + def export_policyengine_dataset( self, result: USMicroplexBuildResult, diff --git a/src/microplex_us/policyengine/aotc.py b/src/microplex_us/policyengine/aotc.py new file mode 100644 index 0000000..1eeee85 --- /dev/null +++ b/src/microplex_us/policyengine/aotc.py @@ -0,0 +1,85 @@ +"""American Opportunity Tax Credit (AOTC) helpers backed by PolicyEngine-US. + +Mirrors the credit-to-expenses inverse in the enhanced-CPS baseline (the +``_aotc_qualifying_expenses_from_credit`` staticmethod of ``ExtendedCPS`` in +``PolicyEngine/policyengine-us-data``, unmerged branch +``codex/fix-aotc-eligibility``) so the Microplex AOTC eligibility-input +construction back-solves per-student qualified expenses the same way. Where +the enhanced CPS hard-codes the AOTC bracket constants, these functions read +only the published +``gov.irs.credits.education.american_opportunity_credit.amount`` marginal +schedule, so they stay in lock-step with PolicyEngine-US parameter updates. +""" + +from __future__ import annotations + +import math +from functools import lru_cache + +import numpy as np + + +@lru_cache(maxsize=16) +def get_american_opportunity_credit_amount_scale(year: int): + """Return the PolicyEngine-US AOTC amount scale for a tax year.""" + from policyengine_us import CountryTaxBenefitSystem + + return CountryTaxBenefitSystem().parameters.gov.irs.credits.education.american_opportunity_credit.amount( + f"{year}-01-01" + ) + + +def qualifying_expenses_from_american_opportunity_credit( + credit: float, + year: int, +) -> float: + """Return the minimum expenses that generate ``credit`` under PE-US.""" + amount_scale = get_american_opportunity_credit_amount_scale(year) + return _minimum_base_for_marginal_amount(credit, amount_scale) + + +def maximum_american_opportunity_credit_per_student(year: int) -> float: + """Return the maximum AOTC generated by one student under PE-US.""" + amount_scale = get_american_opportunity_credit_amount_scale(year) + if len(amount_scale.thresholds) == 0: + return 0.0 + terminal_threshold = max(amount_scale.thresholds) + return float(amount_scale.calc(np.array([terminal_threshold], dtype=float))[0]) + + +def _minimum_base_for_marginal_amount(amount: float, scale) -> float: + """Invert a marginal amount schedule using the schedule brackets.""" + amount = max(float(amount), 0) + if amount == 0: + return 0.0 + + thresholds = np.asarray(scale.thresholds, dtype=float) + rates = np.asarray(scale.rates, dtype=float) + if thresholds.size == 0: + return 0.0 + + order = np.argsort(thresholds) + thresholds = thresholds[order] + rates = rates[order] + + accrued = 0.0 + for index, (lower, rate) in enumerate(zip(thresholds, rates)): + lower = float(lower) + rate = float(rate) + upper = ( + float(thresholds[index + 1]) if index + 1 < thresholds.size else math.inf + ) + + if amount <= accrued: + return lower + if rate <= 0: + continue + if math.isinf(upper): + return lower + (amount - accrued) / rate + + bracket_amount = (upper - lower) * rate + if amount <= accrued + bracket_amount: + return lower + (amount - accrued) / rate + accrued += bracket_amount + + return float(thresholds[-1]) diff --git a/src/microplex_us/policyengine/us.py b/src/microplex_us/policyengine/us.py index 5fa1657..bed79b3 100644 --- a/src/microplex_us/policyengine/us.py +++ b/src/microplex_us/policyengine/us.py @@ -272,6 +272,21 @@ class PolicyEngineUSVariableMaterializationResult: SAFE_POLICYENGINE_US_EXPORT_VARIABLES: set[str] = { "age", + # American Opportunity Tax Credit (AOTC) factual eligibility inputs, + # populated per tax unit by + # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` from the + # PUF ``american_opportunity_credit`` signal, matching the enhanced-CPS + # baseline ``_impute_aotc_eligibility_inputs`` + # (PolicyEngine/policyengine-us-data, unmerged branch + # ``codex/fix-aotc-eligibility``). + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + "american_opportunity_credit_claimed_prior_years", "alimony_expense", "alimony_income", "amt_foreign_tax_credit", @@ -412,6 +427,20 @@ class PolicyEngineUSVariableMaterializationResult: POLICYENGINE_US_EXPORT_DEFAULTS: dict[str, Any] = { "auto_loan_balance": 0.0, + # American Opportunity Tax Credit factual eligibility inputs. The + # per-tax-unit construction in + # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` writes the + # real values for selected students; these defaults guarantee the + # contract-required columns always export (False / 0) for the + # non-student majority and for builds with no positive AOTC signal. + "is_pursuing_credential_for_american_opportunity_credit": False, + "attends_eligible_educational_institution_for_american_opportunity_credit": False, + "is_enrolled_at_least_half_time_for_american_opportunity_credit": False, + "has_american_opportunity_credit_1098_t_or_exception": False, + "has_american_opportunity_credit_institution_ein": False, + "has_completed_first_four_years_of_postsecondary_education": False, + "has_felony_drug_conviction": False, + "american_opportunity_credit_claimed_prior_years": 0, "auto_loan_interest": 0.0, # SCF net-worth component leaves (G1): positive-magnitude balances, # default 0 when the SCF donor leaves a row without that component. diff --git a/tests/pipelines/test_us_aotc_eligibility_inputs.py b/tests/pipelines/test_us_aotc_eligibility_inputs.py new file mode 100644 index 0000000..9b7d34c --- /dev/null +++ b/tests/pipelines/test_us_aotc_eligibility_inputs.py @@ -0,0 +1,396 @@ +"""Tests for the AOTC eligibility-input construction in the US pipeline. + +Exercises ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` (and its +call site inside ``build_policyengine_entity_tables``), which mirrors the +enhanced-CPS baseline ``ExtendedCPS._impute_aotc_eligibility_inputs`` +(``PolicyEngine/policyengine-us-data``, unmerged branch +``codex/fix-aotc-eligibility``). +""" + +import pandas as pd +import pytest + +from microplex_us.pipelines.us import USMicroplexBuildConfig, USMicroplexPipeline +from microplex_us.policyengine.us import ( + POLICYENGINE_US_EXPORT_DEFAULTS, + SAFE_POLICYENGINE_US_EXPORT_VARIABLES, + build_policyengine_us_export_variable_maps, + build_policyengine_us_time_period_arrays, +) + +AOTC_TRUE_FLAG_COLUMNS = ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", +) +AOTC_FALSE_FLAG_COLUMNS = ( + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", +) +AOTC_PRIOR_YEARS_COLUMN = "american_opportunity_credit_claimed_prior_years" +ALL_AOTC_COLUMNS = ( + AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS + (AOTC_PRIOR_YEARS_COLUMN,) +) + + +def _pipeline(year: int = 2024) -> USMicroplexPipeline: + return USMicroplexPipeline(USMicroplexBuildConfig(policyengine_dataset_year=year)) + + +def test_all_eight_aotc_columns_are_safe_export_variables(): + for column in ALL_AOTC_COLUMNS: + assert column in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + + +def test_all_eight_aotc_columns_have_false_or_zero_defaults(): + for column in AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS: + assert POLICYENGINE_US_EXPORT_DEFAULTS[column] is False + assert POLICYENGINE_US_EXPORT_DEFAULTS[AOTC_PRIOR_YEARS_COLUMN] == 0 + + +def test_fallback_marks_tuition_holders_when_no_credit_signal(): + """No credit column -> eCPS fallback aotc_student = tuition > 0. + + This path needs no PolicyEngine-US parameters (no back-solve runs). + """ + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [10, 10, 20], + "tax_unit_id": [100, 100, 200], + "age": [45, 19, 50], + "income": [60_000.0, 0.0, 40_000.0], + "qualified_tuition_expenses": [0.0, 3_500.0, 0.0], + "relationship_to_head": [0, 2, 0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + + # Student (person 2, positive tuition) gets the five factual flags. + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is True + for column in AOTC_FALSE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is False + assert int(by_id.loc[2, AOTC_PRIOR_YEARS_COLUMN]) == 0 + + # Non-students (persons 1, 3) keep defaults. + for person_id in (1, 3): + for column in AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS: + assert bool(by_id.loc[person_id, column]) is False + assert int(by_id.loc[person_id, AOTC_PRIOR_YEARS_COLUMN]) == 0 + + +def test_no_signal_at_all_leaves_frame_unchanged(): + """Neither a credit nor a tuition column -> nothing to construct.""" + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [40, 38], + "income": [50_000.0, 45_000.0], + "relationship_to_head": [0, 1], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + + # The construction returns early; no AOTC columns are added here. The + # export layer supplies the contract-required columns from defaults. + for column in ALL_AOTC_COLUMNS: + assert column not in result.columns + + +def test_fallback_clamps_existing_prior_years_to_three(): + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "tax_unit_id": [100], + "age": [20], + "income": [0.0], + "qualified_tuition_expenses": [2_000.0], + AOTC_PRIOR_YEARS_COLUMN: [7], + "relationship_to_head": [0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + assert int(result.set_index("person_id").loc[1, AOTC_PRIOR_YEARS_COLUMN]) == 3 + + +def test_credit_signal_with_zero_positive_credit_marks_nobody(): + """Credit column present but no positive value -> eCPS early return.""" + pipeline = _pipeline() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [45, 19], + "income": [60_000.0, 0.0], + "qualified_tuition_expenses": [0.0, 3_000.0], + "american_opportunity_credit": [0.0, 0.0], + "is_full_time_college_student": [False, True], + "relationship_to_head": [0, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + # When a credit signal exists but is all-zero, the credit-driven path + # returns before writing inputs (it does NOT fall back to tuition>0). + for column in ALL_AOTC_COLUMNS: + assert column not in result.columns + + +class TestCreditDrivenConstruction: + """Credit-driven back-solve; needs PolicyEngine-US parameters.""" + + @pytest.fixture(autouse=True) + def _require_policyengine_us(self): + pytest.importorskip("policyengine_us") + + def test_dependent_student_selected_and_tuition_backsolved(self): + pipeline = _pipeline(2024) + # Parent filer + full-time college dependent; $2,500 tax-unit credit + # broadcast across members (PUF tax-unit column on the person frame). + persons = pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [10, 10, 10], + "tax_unit_id": [100, 100, 100], + "age": [50, 19, 16], + "income": [80_000.0, 0.0, 0.0], + "is_tax_unit_dependent": [0.0, 1.0, 1.0], + "is_full_time_college_student": [False, True, False], + "qualified_tuition_expenses": [0.0, 4_000.0, 0.0], + "american_opportunity_credit": [2_500.0, 2_500.0, 2_500.0], + "relationship_to_head": [0, 2, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + + # The college dependent is the selected student. + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is True + for column in AOTC_FALSE_FLAG_COLUMNS: + assert bool(by_id.loc[2, column]) is False + assert int(by_id.loc[2, AOTC_PRIOR_YEARS_COLUMN]) in range(0, 4) + + # Person 2 already reports $4,000 tuition; eCPS flags the member and + # preserves the reported tuition (no rewrite). + assert by_id.loc[2, "qualified_tuition_expenses"] == pytest.approx(4_000.0) + + # Parent and minor are not students. + for person_id in (1, 3): + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[person_id, column]) is False + + def test_existing_positive_tuition_is_preserved(self): + pipeline = _pipeline(2024) + # Single filer-student who already reports positive tuition. eCPS flags + # the member but leaves the reported tuition untouched -- no back-solve, + # no overwrite -- even when the credit would imply a smaller base. + persons = pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "tax_unit_id": [100], + "age": [28], + "income": [30_000.0], + "is_tax_unit_dependent": [0.0], + "is_full_time_college_student": [True], + "qualified_tuition_expenses": [2_000.0], + "american_opportunity_credit": [1_250.0], + "relationship_to_head": [0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + row = result.set_index("person_id").loc[1] + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(row[column]) is True + # Reported tuition is preserved, not overwritten to the $1,250 the + # credit would otherwise back-solve to. + assert row["qualified_tuition_expenses"] == pytest.approx(2_000.0) + + def test_all_tuition_positive_members_are_flagged(self): + pipeline = _pipeline(2024) + # Two members both reporting positive tuition in one credit-positive + # tax unit. eCPS flags BOTH (it does not stop after a single student) + # and leaves both reported tuition values untouched. + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [20, 22], + "income": [0.0, 0.0], + "is_tax_unit_dependent": [1.0, 1.0], + "is_full_time_college_student": [True, True], + "qualified_tuition_expenses": [3_000.0, 3_000.0], + "american_opportunity_credit": [2_500.0, 2_500.0], + "relationship_to_head": [2, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + for person_id in (1, 2): + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(by_id.loc[person_id, column]) is True + assert by_id.loc[ + person_id, "qualified_tuition_expenses" + ] == pytest.approx(3_000.0) + + def test_no_tuition_partial_credit_backsolves_to_smaller_expenses(self): + pipeline = _pipeline(2024) + # No member reports tuition; a $1,250 credit back-solves to $1,250 of + # qualified expenses (inside the 100% first bracket) on the selected + # full-time student. + persons = pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "tax_unit_id": [100], + "age": [28], + "income": [30_000.0], + "is_tax_unit_dependent": [0.0], + "is_full_time_college_student": [True], + "qualified_tuition_expenses": [0.0], + "american_opportunity_credit": [1_250.0], + "relationship_to_head": [0], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + row = result.set_index("person_id").loc[1] + for column in AOTC_TRUE_FLAG_COLUMNS: + assert bool(row[column]) is True + assert row["qualified_tuition_expenses"] == pytest.approx(1_250.0) + + def test_full_time_student_selected_when_no_member_has_tuition(self): + pipeline = _pipeline(2024) + # Credit present, nobody has positive tuition: selection falls to the + # full-time college student (second priority group in eCPS). + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [50, 20], + "income": [70_000.0, 0.0], + "is_tax_unit_dependent": [0.0, 1.0], + "is_full_time_college_student": [False, True], + "qualified_tuition_expenses": [0.0, 0.0], + "american_opportunity_credit": [2_500.0, 2_500.0], + "relationship_to_head": [0, 2], + } + ) + + result = pipeline._construct_aotc_eligibility_inputs(persons) + by_id = result.set_index("person_id") + assert ( + bool(by_id.loc[2, "is_pursuing_credential_for_american_opportunity_credit"]) + is True + ) + assert ( + bool(by_id.loc[1, "is_pursuing_credential_for_american_opportunity_credit"]) + is False + ) + # The student's tuition is set to the credit-implied $4,000. + assert by_id.loc[2, "qualified_tuition_expenses"] == pytest.approx(4_000.0) + + def test_export_includes_all_eight_columns_with_real_values(self): + pipeline = _pipeline(2024) + tbs = pipeline._resolve_policyengine_tax_benefit_system() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [50, 19], + "sex": [1, 2], + "income": [80_000.0, 0.0], + "is_tax_unit_dependent": [0.0, 1.0], + "is_full_time_college_student": [False, True], + "qualified_tuition_expenses": [0.0, 4_000.0], + "american_opportunity_credit": [2_500.0, 2_500.0], + "relationship_to_head": [0, 2], + } + ) + + tables = pipeline.build_policyengine_entity_tables(persons) + export_maps = build_policyengine_us_export_variable_maps( + tables, tax_benefit_system=tbs + ) + arrays = build_policyengine_us_time_period_arrays( + tables, + period=2024, + household_variable_map=export_maps["household"], + person_variable_map=export_maps["person"], + tax_unit_variable_map=export_maps["tax_unit"], + spm_unit_variable_map=export_maps["spm_unit"], + family_variable_map=export_maps["family"], + ) + + for column in ALL_AOTC_COLUMNS: + assert column in arrays, column + + # The dependent student (second person row) has the True flags. + for column in AOTC_TRUE_FLAG_COLUMNS: + assert arrays[column]["2024"].tolist() == [False, True] + for column in AOTC_FALSE_FLAG_COLUMNS: + assert arrays[column]["2024"].tolist() == [False, False] + assert arrays[AOTC_PRIOR_YEARS_COLUMN]["2024"].tolist() == [0, 0] + + # american_opportunity_credit is a PUF calculated output and must not + # be exported (PolicyEngine-US recomputes it from these inputs). + assert "american_opportunity_credit" not in arrays + + +def test_no_signal_export_falls_back_to_defaults(): + """With no AOTC signal, the contract-required columns still export.""" + pytest.importorskip("policyengine_us") + pipeline = _pipeline(2024) + tbs = pipeline._resolve_policyengine_tax_benefit_system() + persons = pd.DataFrame( + { + "person_id": [1, 2], + "household_id": [10, 10], + "tax_unit_id": [100, 100], + "age": [40, 38], + "sex": [1, 2], + "income": [50_000.0, 45_000.0], + "is_tax_unit_dependent": [0.0, 0.0], + "relationship_to_head": [0, 1], + } + ) + + tables = pipeline.build_policyengine_entity_tables(persons) + export_maps = build_policyengine_us_export_variable_maps( + tables, tax_benefit_system=tbs + ) + arrays = build_policyengine_us_time_period_arrays( + tables, + period=2024, + household_variable_map=export_maps["household"], + person_variable_map=export_maps["person"], + tax_unit_variable_map=export_maps["tax_unit"], + spm_unit_variable_map=export_maps["spm_unit"], + family_variable_map=export_maps["family"], + ) + + for column in AOTC_TRUE_FLAG_COLUMNS + AOTC_FALSE_FLAG_COLUMNS: + assert column in arrays + assert arrays[column]["2024"].tolist() == [False, False] + assert arrays[AOTC_PRIOR_YEARS_COLUMN]["2024"].tolist() == [0, 0]