Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions src/microplex_us/pipelines/us.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@
from microplex_us.pipelines.pe_native_optimization import (
optimize_policyengine_us_native_loss_dataset,
)
from microplex_us.policyengine.aotc import (
qualifying_expenses_from_american_opportunity_credit,
)
from microplex_us.policyengine.comparison import (
evaluate_policyengine_us_target_set,
slice_policyengine_us_target_evaluation_report,
Expand Down Expand Up @@ -4345,6 +4348,7 @@ def build_policyengine_entity_tables(

households = self._build_policyengine_households(persons)
tax_units, persons = self._build_policyengine_tax_units(persons)
persons = self._construct_aotc_eligibility_inputs(persons)
persons = self._assign_family_and_spm_units(persons)
families = self._collapse_group_table(persons, "family_id")
spm_units = self._collapse_group_table(persons, "spm_unit_id")
Expand All @@ -4369,6 +4373,219 @@ def build_policyengine_entity_tables(
)
return tables

# AOTC eligibility-input columns populated by
# ``_construct_aotc_eligibility_inputs``, matching the per-student inputs
# written by the enhanced-CPS baseline ``_impute_aotc_eligibility_inputs``
# (PolicyEngine/policyengine-us-data, unmerged branch
# ``codex/fix-aotc-eligibility``).
_AOTC_TRUE_FLAG_COLUMNS = (
"is_pursuing_credential_for_american_opportunity_credit",
"attends_eligible_educational_institution_for_american_opportunity_credit",
"is_enrolled_at_least_half_time_for_american_opportunity_credit",
"has_american_opportunity_credit_1098_t_or_exception",
"has_american_opportunity_credit_institution_ein",
)
_AOTC_FALSE_FLAG_COLUMNS = (
"has_completed_first_four_years_of_postsecondary_education",
"has_felony_drug_conviction",
)
_AOTC_PRIOR_YEARS_COLUMN = "american_opportunity_credit_claimed_prior_years"

def _construct_aotc_eligibility_inputs(
self,
persons: pd.DataFrame,
) -> pd.DataFrame:
"""Convert the PUF AOTC signal into person eligibility inputs.

Mirrors the enhanced-CPS baseline
``ExtendedCPS._impute_aotc_eligibility_inputs``
(``PolicyEngine/policyengine-us-data``, unmerged branch
``codex/fix-aotc-eligibility``).

The enhanced CPS operates on a flat ``{variable: {period: array}}``
payload keyed by ``person_tax_unit_id``; Microplex carries the same
signals (``american_opportunity_credit``,
``qualified_tuition_expenses``, ``is_full_time_college_student``,
``is_tax_unit_dependent``) as columns on the person table keyed by
``tax_unit_id`` once ``_build_policyengine_tax_units`` has assigned
authoritative tax units, so the per-tax-unit back-solve is the same
algorithm applied to a single DataFrame.

Driven by the PUF-imputed ``american_opportunity_credit`` (PUF
``E87521``; see ``data_sources/puf.py`` / ``manifests/puf.json``). For
each tax unit with positive credit the enhanced-CPS rule applies: if
any member already reports positive qualified tuition, every such
member is marked an AOTC student and the reported tuition is left
unchanged; otherwise a single student is selected by priority
(full-time college student -> tax-unit dependent -> any member) and
that student's qualified tuition is back-solved to the minimum amount
reproducing the unit's credit under PolicyEngine-US. With no credit
signal it falls back to the enhanced-CPS
``aotc_student = qualified_tuition_expenses > 0`` rule. The selected
students receive the five factual eligibility flags as ``True``,
``has_completed_first_four_years_of_postsecondary_education`` and
``has_felony_drug_conviction`` as ``False`` (constants the enhanced
CPS also hard-codes), and
``american_opportunity_credit_claimed_prior_years`` clamped to a
maximum of 3. ``american_opportunity_credit`` is a PUF
calculated-tax output (see ``microdata_roles.py``) and is not itself
exported; PolicyEngine-US recomputes the credit from these inputs.
"""
if persons is None or persons.empty:
return persons
if "tax_unit_id" not in persons.columns:
return persons

result = persons.copy()
n = len(result)
time_period = int(self.config.policyengine_dataset_year or 2024)

person_tax_unit_ids = result["tax_unit_id"].to_numpy()
tuition = (
pd.to_numeric(
result["qualified_tuition_expenses"],
errors="coerce",
)
.fillna(0.0)
.to_numpy(dtype=float, copy=True)
if "qualified_tuition_expenses" in result.columns
else np.zeros(n, dtype=float)
)
if "qualified_tuition_expenses" not in result.columns:
# No tuition signal and no credit-derived tuition can be
# back-solved, so there is no student population to mark.
credit_present = "american_opportunity_credit" in result.columns
if not credit_present:
return persons

credit = (
pd.to_numeric(
result["american_opportunity_credit"],
errors="coerce",
)
.fillna(0.0)
.to_numpy(dtype=float)
if "american_opportunity_credit" in result.columns
else None
)
full_time = (
pd.to_numeric(result["is_full_time_college_student"], errors="coerce")
.fillna(0)
.astype(bool)
.to_numpy()
if "is_full_time_college_student" in result.columns
else np.zeros(n, dtype=bool)
)
dependent = (
pd.to_numeric(result["is_tax_unit_dependent"], errors="coerce")
.fillna(0)
.astype(bool)
.to_numpy()
if "is_tax_unit_dependent" in result.columns
else np.zeros(n, dtype=bool)
)

aotc_student = np.zeros(n, dtype=bool)

if credit is not None:
positive_credit = credit > 0
if not positive_credit.any():
# No positive credit anywhere: nothing to construct. The
# enhanced CPS returns early here without writing inputs.
return persons

# ``american_opportunity_credit`` rides on the person table as the
# per-tax-unit value repeated across members; collapse to one
# value per tax unit (the maximum guards against any per-member
# zero-fill on non-filer rows).
credit_by_tax_unit: dict[Any, float] = {}
for tax_unit_id, member_credit in zip(person_tax_unit_ids, credit):
prior = credit_by_tax_unit.get(tax_unit_id, 0.0)
if member_credit > prior:
credit_by_tax_unit[tax_unit_id] = float(member_credit)

positive_credit_units = [
tax_unit_id
for tax_unit_id, unit_credit in credit_by_tax_unit.items()
if unit_credit > 0
]
for tax_unit_id in positive_credit_units:
member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id)
if member_indices.size == 0:
continue

# eCPS rule: if any member already reports positive qualified
# tuition, every such member is an AOTC student and the reported
# tuition is left untouched (no back-solve, no rewrite).
tuition_indices = member_indices[tuition[member_indices] > 0]
if tuition_indices.size > 0:
aotc_student[tuition_indices] = True
continue

# Otherwise select a single student by the eCPS priority
# (full-time college student -> tax-unit dependent -> any
# member) and back-solve the minimum qualified tuition that
# reproduces the unit's credit under PolicyEngine-US.
preferred = member_indices[full_time[member_indices]]
if preferred.size == 0:
preferred = member_indices[dependent[member_indices]]
if preferred.size == 0:
preferred = member_indices
selected = preferred[0]
aotc_student[selected] = True
tuition[selected] = max(
tuition[selected],
qualifying_expenses_from_american_opportunity_credit(
credit_by_tax_unit[tax_unit_id],
time_period,
),
)
else:
aotc_student = tuition > 0
if not aotc_student.any():
return persons

# Five factual eligibility flags -> True for selected students.
for column in self._AOTC_TRUE_FLAG_COLUMNS:
values = (
result[column].fillna(False).astype(bool).to_numpy().copy()
if column in result.columns
else np.zeros(n, dtype=bool)
)
values[aotc_student] = True
result[column] = values

# has_completed_first_four_years / has_felony_drug_conviction -> False.
for column in self._AOTC_FALSE_FLAG_COLUMNS:
values = (
result[column].fillna(False).astype(bool).to_numpy().copy()
if column in result.columns
else np.zeros(n, dtype=bool)
)
values[aotc_student] = False
result[column] = values

# Prior-year claims clamped to the 4-year (max 3 prior) AOTC limit.
prior_years = (
pd.to_numeric(result[self._AOTC_PRIOR_YEARS_COLUMN], errors="coerce")
.fillna(0)
.astype(np.int64)
.to_numpy()
.copy()
if self._AOTC_PRIOR_YEARS_COLUMN in result.columns
else np.zeros(n, dtype=np.int64)
)
prior_years[aotc_student] = np.minimum(prior_years[aotc_student], 3)
result[self._AOTC_PRIOR_YEARS_COLUMN] = prior_years

# Write the back-solved per-student tuition the credit implies, so the
# exported ``qualified_tuition_expenses`` reproduces the PUF credit
# under PolicyEngine-US (enhanced CPS does the same).
if "qualified_tuition_expenses" in result.columns:
result["qualified_tuition_expenses"] = tuition

return result

def export_policyengine_dataset(
self,
result: USMicroplexBuildResult,
Expand Down
85 changes: 85 additions & 0 deletions src/microplex_us/policyengine/aotc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""American Opportunity Tax Credit (AOTC) helpers backed by PolicyEngine-US.

Mirrors the credit-to-expenses inverse in the enhanced-CPS baseline (the
``_aotc_qualifying_expenses_from_credit`` staticmethod of ``ExtendedCPS`` in
``PolicyEngine/policyengine-us-data``, unmerged branch
``codex/fix-aotc-eligibility``) so the Microplex AOTC eligibility-input
construction back-solves per-student qualified expenses the same way. Where
the enhanced CPS hard-codes the AOTC bracket constants, these functions read
only the published
``gov.irs.credits.education.american_opportunity_credit.amount`` marginal
schedule, so they stay in lock-step with PolicyEngine-US parameter updates.
"""

from __future__ import annotations

import math
from functools import lru_cache

import numpy as np


@lru_cache(maxsize=16)
def get_american_opportunity_credit_amount_scale(year: int):
"""Return the PolicyEngine-US AOTC amount scale for a tax year."""
from policyengine_us import CountryTaxBenefitSystem

return CountryTaxBenefitSystem().parameters.gov.irs.credits.education.american_opportunity_credit.amount(
f"{year}-01-01"
)


def qualifying_expenses_from_american_opportunity_credit(
credit: float,
year: int,
) -> float:
"""Return the minimum expenses that generate ``credit`` under PE-US."""
amount_scale = get_american_opportunity_credit_amount_scale(year)
return _minimum_base_for_marginal_amount(credit, amount_scale)


def maximum_american_opportunity_credit_per_student(year: int) -> float:
"""Return the maximum AOTC generated by one student under PE-US."""
amount_scale = get_american_opportunity_credit_amount_scale(year)
if len(amount_scale.thresholds) == 0:
return 0.0
terminal_threshold = max(amount_scale.thresholds)
return float(amount_scale.calc(np.array([terminal_threshold], dtype=float))[0])


def _minimum_base_for_marginal_amount(amount: float, scale) -> float:
"""Invert a marginal amount schedule using the schedule brackets."""
amount = max(float(amount), 0)
if amount == 0:
return 0.0

thresholds = np.asarray(scale.thresholds, dtype=float)
rates = np.asarray(scale.rates, dtype=float)
if thresholds.size == 0:
return 0.0

order = np.argsort(thresholds)
thresholds = thresholds[order]
rates = rates[order]

accrued = 0.0
for index, (lower, rate) in enumerate(zip(thresholds, rates)):
lower = float(lower)
rate = float(rate)
upper = (
float(thresholds[index + 1]) if index + 1 < thresholds.size else math.inf
)

if amount <= accrued:
return lower
if rate <= 0:
continue
if math.isinf(upper):
return lower + (amount - accrued) / rate

bracket_amount = (upper - lower) * rate
if amount <= accrued + bracket_amount:
return lower + (amount - accrued) / rate
accrued += bracket_amount

return float(thresholds[-1])
29 changes: 29 additions & 0 deletions src/microplex_us/policyengine/us.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,21 @@ class PolicyEngineUSVariableMaterializationResult:

SAFE_POLICYENGINE_US_EXPORT_VARIABLES: set[str] = {
"age",
# American Opportunity Tax Credit (AOTC) factual eligibility inputs,
# populated per tax unit by
# ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` from the
# PUF ``american_opportunity_credit`` signal, matching the enhanced-CPS
# baseline ``_impute_aotc_eligibility_inputs``
# (PolicyEngine/policyengine-us-data, unmerged branch
# ``codex/fix-aotc-eligibility``).
"is_pursuing_credential_for_american_opportunity_credit",
"attends_eligible_educational_institution_for_american_opportunity_credit",
"is_enrolled_at_least_half_time_for_american_opportunity_credit",
"has_american_opportunity_credit_1098_t_or_exception",
"has_american_opportunity_credit_institution_ein",
"has_completed_first_four_years_of_postsecondary_education",
"has_felony_drug_conviction",
"american_opportunity_credit_claimed_prior_years",
"alimony_expense",
"alimony_income",
"amt_foreign_tax_credit",
Expand Down Expand Up @@ -412,6 +427,20 @@ class PolicyEngineUSVariableMaterializationResult:

POLICYENGINE_US_EXPORT_DEFAULTS: dict[str, Any] = {
"auto_loan_balance": 0.0,
# American Opportunity Tax Credit factual eligibility inputs. The
# per-tax-unit construction in
# ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` writes the
# real values for selected students; these defaults guarantee the
# contract-required columns always export (False / 0) for the
# non-student majority and for builds with no positive AOTC signal.
"is_pursuing_credential_for_american_opportunity_credit": False,
"attends_eligible_educational_institution_for_american_opportunity_credit": False,
"is_enrolled_at_least_half_time_for_american_opportunity_credit": False,
"has_american_opportunity_credit_1098_t_or_exception": False,
"has_american_opportunity_credit_institution_ein": False,
"has_completed_first_four_years_of_postsecondary_education": False,
"has_felony_drug_conviction": False,
"american_opportunity_credit_claimed_prior_years": 0,
"auto_loan_interest": 0.0,
# SCF net-worth component leaves (G1): positive-magnitude balances,
# default 0 when the SCF donor leaves a row without that component.
Expand Down
Loading
Loading