PolicyEngine · MaxGhenis · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py
@@ -66,6 +66,9 @@
 from microplex_us.pipelines.pe_native_optimization import (
     optimize_policyengine_us_native_loss_dataset,
 )
+from microplex_us.policyengine.aotc import (
+    qualifying_expenses_from_american_opportunity_credit,
+)
 from microplex_us.policyengine.comparison import (
     evaluate_policyengine_us_target_set,
     slice_policyengine_us_target_evaluation_report,
@@ -4345,6 +4348,7 @@ def build_policyengine_entity_tables(
 
         households = self._build_policyengine_households(persons)
         tax_units, persons = self._build_policyengine_tax_units(persons)
+        persons = self._construct_aotc_eligibility_inputs(persons)
         persons = self._assign_family_and_spm_units(persons)
         families = self._collapse_group_table(persons, "family_id")
         spm_units = self._collapse_group_table(persons, "spm_unit_id")
@@ -4369,6 +4373,219 @@ def build_policyengine_entity_tables(
         )
         return tables
 
+    # AOTC eligibility-input columns populated by
+    # ``_construct_aotc_eligibility_inputs``, matching the per-student inputs
+    # written by the enhanced-CPS baseline ``_impute_aotc_eligibility_inputs``
+    # (PolicyEngine/policyengine-us-data, unmerged branch
+    # ``codex/fix-aotc-eligibility``).
+    _AOTC_TRUE_FLAG_COLUMNS = (
+        "is_pursuing_credential_for_american_opportunity_credit",
+        "attends_eligible_educational_institution_for_american_opportunity_credit",
+        "is_enrolled_at_least_half_time_for_american_opportunity_credit",
+        "has_american_opportunity_credit_1098_t_or_exception",
+        "has_american_opportunity_credit_institution_ein",
+    )
+    _AOTC_FALSE_FLAG_COLUMNS = (
+        "has_completed_first_four_years_of_postsecondary_education",
+        "has_felony_drug_conviction",
+    )
+    _AOTC_PRIOR_YEARS_COLUMN = "american_opportunity_credit_claimed_prior_years"
+
+    def _construct_aotc_eligibility_inputs(
+        self,
+        persons: pd.DataFrame,
+    ) -> pd.DataFrame:
+        """Convert the PUF AOTC signal into person eligibility inputs.
+
+        Mirrors the enhanced-CPS baseline
+        ``ExtendedCPS._impute_aotc_eligibility_inputs``
+        (``PolicyEngine/policyengine-us-data``, unmerged branch
+        ``codex/fix-aotc-eligibility``).
+
+        The enhanced CPS operates on a flat ``{variable: {period: array}}``
+        payload keyed by ``person_tax_unit_id``; Microplex carries the same
+        signals (``american_opportunity_credit``,
+        ``qualified_tuition_expenses``, ``is_full_time_college_student``,
+        ``is_tax_unit_dependent``) as columns on the person table keyed by
+        ``tax_unit_id`` once ``_build_policyengine_tax_units`` has assigned
+        authoritative tax units, so the per-tax-unit back-solve is the same
+        algorithm applied to a single DataFrame.
+
+        Driven by the PUF-imputed ``american_opportunity_credit`` (PUF
+        ``E87521``; see ``data_sources/puf.py`` / ``manifests/puf.json``). For
+        each tax unit with positive credit the enhanced-CPS rule applies: if
+        any member already reports positive qualified tuition, every such
+        member is marked an AOTC student and the reported tuition is left
+        unchanged; otherwise a single student is selected by priority
+        (full-time college student -> tax-unit dependent -> any member) and
+        that student's qualified tuition is back-solved to the minimum amount
+        reproducing the unit's credit under PolicyEngine-US. With no credit
+        signal it falls back to the enhanced-CPS
+        ``aotc_student = qualified_tuition_expenses > 0`` rule. The selected
+        students receive the five factual eligibility flags as ``True``,
+        ``has_completed_first_four_years_of_postsecondary_education`` and
+        ``has_felony_drug_conviction`` as ``False`` (constants the enhanced
+        CPS also hard-codes), and
+        ``american_opportunity_credit_claimed_prior_years`` clamped to a
+        maximum of 3. ``american_opportunity_credit`` is a PUF
+        calculated-tax output (see ``microdata_roles.py``) and is not itself
+        exported; PolicyEngine-US recomputes the credit from these inputs.
+        """
+        if persons is None or persons.empty:
+            return persons
+        if "tax_unit_id" not in persons.columns:
+            return persons
+
+        result = persons.copy()
+        n = len(result)
+        time_period = int(self.config.policyengine_dataset_year or 2024)
+
+        person_tax_unit_ids = result["tax_unit_id"].to_numpy()
+        tuition = (
+            pd.to_numeric(
+                result["qualified_tuition_expenses"],
+                errors="coerce",
+            )
+            .fillna(0.0)
+            .to_numpy(dtype=float, copy=True)
+            if "qualified_tuition_expenses" in result.columns
+            else np.zeros(n, dtype=float)
+        )
+        if "qualified_tuition_expenses" not in result.columns:
+            # No tuition signal and no credit-derived tuition can be
+            # back-solved, so there is no student population to mark.
+            credit_present = "american_opportunity_credit" in result.columns
+            if not credit_present:
+                return persons
+
+        credit = (
+            pd.to_numeric(
+                result["american_opportunity_credit"],
+                errors="coerce",
+            )
+            .fillna(0.0)
+            .to_numpy(dtype=float)
+            if "american_opportunity_credit" in result.columns
+            else None
+        )
+        full_time = (
+            pd.to_numeric(result["is_full_time_college_student"], errors="coerce")
+            .fillna(0)
+            .astype(bool)
+            .to_numpy()
+            if "is_full_time_college_student" in result.columns
+            else np.zeros(n, dtype=bool)
+        )
+        dependent = (
+            pd.to_numeric(result["is_tax_unit_dependent"], errors="coerce")
+            .fillna(0)
+            .astype(bool)
+            .to_numpy()
+            if "is_tax_unit_dependent" in result.columns
+            else np.zeros(n, dtype=bool)
+        )
+
+        aotc_student = np.zeros(n, dtype=bool)
+
+        if credit is not None:
+            positive_credit = credit > 0
+            if not positive_credit.any():
+                # No positive credit anywhere: nothing to construct. The
+                # enhanced CPS returns early here without writing inputs.
+                return persons
+
+            # ``american_opportunity_credit`` rides on the person table as the
+            # per-tax-unit value repeated across members; collapse to one
+            # value per tax unit (the maximum guards against any per-member
+            # zero-fill on non-filer rows).
+            credit_by_tax_unit: dict[Any, float] = {}
+            for tax_unit_id, member_credit in zip(person_tax_unit_ids, credit):
+                prior = credit_by_tax_unit.get(tax_unit_id, 0.0)
+                if member_credit > prior:
+                    credit_by_tax_unit[tax_unit_id] = float(member_credit)
+
+            positive_credit_units = [
+                tax_unit_id
+                for tax_unit_id, unit_credit in credit_by_tax_unit.items()
+                if unit_credit > 0
+            ]
+            for tax_unit_id in positive_credit_units:
+                member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id)
+                if member_indices.size == 0:
+                    continue
+
+                # eCPS rule: if any member already reports positive qualified
+                # tuition, every such member is an AOTC student and the reported
+                # tuition is left untouched (no back-solve, no rewrite).
+                tuition_indices = member_indices[tuition[member_indices] > 0]
+                if tuition_indices.size > 0:
+                    aotc_student[tuition_indices] = True
+                    continue
+
+                # Otherwise select a single student by the eCPS priority
+                # (full-time college student -> tax-unit dependent -> any
+                # member) and back-solve the minimum qualified tuition that
+                # reproduces the unit's credit under PolicyEngine-US.
+                preferred = member_indices[full_time[member_indices]]
+                if preferred.size == 0:
+                    preferred = member_indices[dependent[member_indices]]
+                if preferred.size == 0:
+                    preferred = member_indices
+                selected = preferred[0]
+                aotc_student[selected] = True
+                tuition[selected] = max(
+                    tuition[selected],
+                    qualifying_expenses_from_american_opportunity_credit(
+                        credit_by_tax_unit[tax_unit_id],
+                        time_period,
+                    ),
+                )
+        else:
+            aotc_student = tuition > 0
+            if not aotc_student.any():
+                return persons
+
+        # Five factual eligibility flags -> True for selected students.
+        for column in self._AOTC_TRUE_FLAG_COLUMNS:
+            values = (
+                result[column].fillna(False).astype(bool).to_numpy().copy()
+                if column in result.columns
+                else np.zeros(n, dtype=bool)
+            )
+            values[aotc_student] = True
+            result[column] = values
+
+        # has_completed_first_four_years / has_felony_drug_conviction -> False.
+        for column in self._AOTC_FALSE_FLAG_COLUMNS:
+            values = (
+                result[column].fillna(False).astype(bool).to_numpy().copy()
+                if column in result.columns
+                else np.zeros(n, dtype=bool)
+            )
+            values[aotc_student] = False
+            result[column] = values
+
+        # Prior-year claims clamped to the 4-year (max 3 prior) AOTC limit.
+        prior_years = (
+            pd.to_numeric(result[self._AOTC_PRIOR_YEARS_COLUMN], errors="coerce")
+            .fillna(0)
+            .astype(np.int64)
+            .to_numpy()
+            .copy()
+            if self._AOTC_PRIOR_YEARS_COLUMN in result.columns
+            else np.zeros(n, dtype=np.int64)
+        )
+        prior_years[aotc_student] = np.minimum(prior_years[aotc_student], 3)
+        result[self._AOTC_PRIOR_YEARS_COLUMN] = prior_years
+
+        # Write the back-solved per-student tuition the credit implies, so the
+        # exported ``qualified_tuition_expenses`` reproduces the PUF credit
+        # under PolicyEngine-US (enhanced CPS does the same).
+        if "qualified_tuition_expenses" in result.columns:
+            result["qualified_tuition_expenses"] = tuition
+
+        return result
+
     def export_policyengine_dataset(
         self,
         result: USMicroplexBuildResult,

diff --git a/src/microplex_us/policyengine/aotc.py b/src/microplex_us/policyengine/aotc.py
@@ -0,0 +1,85 @@
+"""American Opportunity Tax Credit (AOTC) helpers backed by PolicyEngine-US.
+
+Mirrors the credit-to-expenses inverse in the enhanced-CPS baseline (the
+``_aotc_qualifying_expenses_from_credit`` staticmethod of ``ExtendedCPS`` in
+``PolicyEngine/policyengine-us-data``, unmerged branch
+``codex/fix-aotc-eligibility``) so the Microplex AOTC eligibility-input
+construction back-solves per-student qualified expenses the same way. Where
+the enhanced CPS hard-codes the AOTC bracket constants, these functions read
+only the published
+``gov.irs.credits.education.american_opportunity_credit.amount`` marginal
+schedule, so they stay in lock-step with PolicyEngine-US parameter updates.
+"""
+
+from __future__ import annotations
+
+import math
+from functools import lru_cache
+
+import numpy as np
+
+
+@lru_cache(maxsize=16)
+def get_american_opportunity_credit_amount_scale(year: int):
+    """Return the PolicyEngine-US AOTC amount scale for a tax year."""
+    from policyengine_us import CountryTaxBenefitSystem
+
+    return CountryTaxBenefitSystem().parameters.gov.irs.credits.education.american_opportunity_credit.amount(
+        f"{year}-01-01"
+    )
+
+
+def qualifying_expenses_from_american_opportunity_credit(
+    credit: float,
+    year: int,
+) -> float:
+    """Return the minimum expenses that generate ``credit`` under PE-US."""
+    amount_scale = get_american_opportunity_credit_amount_scale(year)
+    return _minimum_base_for_marginal_amount(credit, amount_scale)
+
+
+def maximum_american_opportunity_credit_per_student(year: int) -> float:
+    """Return the maximum AOTC generated by one student under PE-US."""
+    amount_scale = get_american_opportunity_credit_amount_scale(year)
+    if len(amount_scale.thresholds) == 0:
+        return 0.0
+    terminal_threshold = max(amount_scale.thresholds)
+    return float(amount_scale.calc(np.array([terminal_threshold], dtype=float))[0])
+
+
+def _minimum_base_for_marginal_amount(amount: float, scale) -> float:
+    """Invert a marginal amount schedule using the schedule brackets."""
+    amount = max(float(amount), 0)
+    if amount == 0:
+        return 0.0
+
+    thresholds = np.asarray(scale.thresholds, dtype=float)
+    rates = np.asarray(scale.rates, dtype=float)
+    if thresholds.size == 0:
+        return 0.0
+
+    order = np.argsort(thresholds)
+    thresholds = thresholds[order]
+    rates = rates[order]
+
+    accrued = 0.0
+    for index, (lower, rate) in enumerate(zip(thresholds, rates)):
+        lower = float(lower)
+        rate = float(rate)
+        upper = (
+            float(thresholds[index + 1]) if index + 1 < thresholds.size else math.inf
+        )
+
+        if amount <= accrued:
+            return lower
+        if rate <= 0:
+            continue
+        if math.isinf(upper):
+            return lower + (amount - accrued) / rate
+
+        bracket_amount = (upper - lower) * rate
+        if amount <= accrued + bracket_amount:
+            return lower + (amount - accrued) / rate
+        accrued += bracket_amount
+
+    return float(thresholds[-1])
diff --git a/src/microplex_us/policyengine/us.py b/src/microplex_us/policyengine/us.py
@@ -272,6 +272,21 @@ class PolicyEngineUSVariableMaterializationResult:
 
 SAFE_POLICYENGINE_US_EXPORT_VARIABLES: set[str] = {
     "age",
+    # American Opportunity Tax Credit (AOTC) factual eligibility inputs,
+    # populated per tax unit by
+    # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` from the
+    # PUF ``american_opportunity_credit`` signal, matching the enhanced-CPS
+    # baseline ``_impute_aotc_eligibility_inputs``
+    # (PolicyEngine/policyengine-us-data, unmerged branch
+    # ``codex/fix-aotc-eligibility``).
+    "is_pursuing_credential_for_american_opportunity_credit",
+    "attends_eligible_educational_institution_for_american_opportunity_credit",
+    "is_enrolled_at_least_half_time_for_american_opportunity_credit",
+    "has_american_opportunity_credit_1098_t_or_exception",
+    "has_american_opportunity_credit_institution_ein",
+    "has_completed_first_four_years_of_postsecondary_education",
+    "has_felony_drug_conviction",
+    "american_opportunity_credit_claimed_prior_years",
     "alimony_expense",
     "alimony_income",
     "amt_foreign_tax_credit",
@@ -412,6 +427,20 @@ class PolicyEngineUSVariableMaterializationResult:
 
 POLICYENGINE_US_EXPORT_DEFAULTS: dict[str, Any] = {
     "auto_loan_balance": 0.0,
+    # American Opportunity Tax Credit factual eligibility inputs. The
+    # per-tax-unit construction in
+    # ``USMicroplexPipeline._construct_aotc_eligibility_inputs`` writes the
+    # real values for selected students; these defaults guarantee the
+    # contract-required columns always export (False / 0) for the
+    # non-student majority and for builds with no positive AOTC signal.
+    "is_pursuing_credential_for_american_opportunity_credit": False,
+    "attends_eligible_educational_institution_for_american_opportunity_credit": False,
+    "is_enrolled_at_least_half_time_for_american_opportunity_credit": False,
+    "has_american_opportunity_credit_1098_t_or_exception": False,
+    "has_american_opportunity_credit_institution_ein": False,
+    "has_completed_first_four_years_of_postsecondary_education": False,
+    "has_felony_drug_conviction": False,
+    "american_opportunity_credit_claimed_prior_years": 0,
     "auto_loan_interest": 0.0,
     # SCF net-worth component leaves (G1): positive-magnitude balances,
     # default 0 when the SCF donor leaves a row without that component.