diff --git a/CLAUDE.md b/CLAUDE.md index e44660d7..d193e6a7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -161,6 +161,14 @@ postgresql+pg8000://{user}:{password}@{host}:{port}/{database} - **Legacy Migration**: Transfer scripts convert from UTM (SRID 26913) to WGS84 - **GeoAlchemy2**: Used for SQLAlchemy ↔ PostGIS integration +### Refine UI list filters + +Ocotillo UI passes DataGrid filters as repeated query parameters named `filter`, each containing JSON `{ "field", "operator", "value" }`. Association-backed columns (`contacts` on wells, `things` on contacts) are **virtual**: they map to EXISTS subqueries in `services/query_helper.py`, not to `ILIKE` on an ORM proxy. + +Sorting the wells list by **Monitoring status** or **Well status** uses SQL subqueries on `StatusHistory`, not Python `@property` accessors, because `ORDER BY` must see database expressions. + +Read **`docs/refine-json-filters-and-virtual-fields.md`** before changing filter behavior or adding virtual fields. + ### Error Handling All custom exceptions should use `PydanticStyleException` for consistent API error responses: diff --git a/README.md b/README.md index d9a42a32..90ca4bc9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,8 @@ supports research, field operations, and public data delivery for the Bureau of - 🌐 RESTful API for managing sample location data - 🗺️ Native GeoJSON support via PostGIS -- 🔎 Filtering by location, date, type, and more +- 🔎 Filtering by location, date, type, and more + Refine-powered list pages send JSON filters via repeated `filter=` query params. Virtual fields (`contacts` on things, `things` on contacts) implement association search in SQL. Background and rationale: [docs/refine-json-filters-and-virtual-fields.md](docs/refine-json-filters-and-virtual-fields.md). - 📦 PostgreSQL + PostGIS database backend - 🔐 Optional authentication and role-based access - 🧾 Interactive API documentation via OpenAPI and ReDoc diff --git a/api/contact.py b/api/contact.py index f5d46c03..7e294ab4 100644 --- a/api/contact.py +++ b/api/contact.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +from typing import Annotated + from fastapi import APIRouter, Query from sqlalchemy import select from starlette import status @@ -469,15 +471,23 @@ def get_contacts( user: amp_viewer_dependency, sort: str = None, order: str = None, - filter_: str = Query(alias="filter", default=None), + filter_params: Annotated[list[str] | None, Query(alias="filter")] = None, thing_id: int | None = None, ) -> CustomPage[ContactResponse]: - """ - Retrieve all contacts from the database. - :param session: - :return: - """ - return get_db_contacts(session, thing_id, sort, order, filter_) + """Paginated contacts. + + **Filtering.** ``filter_params`` collects every ``filter=`` query parameter. + Refine sends one JSON object per active DataGrid column (AND semantics). + Virtual field ``things`` filters by linked monitoring site ``Thing.name``. + See docs/refine-json-filters-and-virtual-fields.md and ``get_db_contacts``. + """ + return get_db_contacts( + session, + thing_id, + sort, + order, + filters=filter_params, + ) @router.get("/{contact_id}", summary="Get contact by ID") diff --git a/docs/refine-json-filters-and-virtual-fields.md b/docs/refine-json-filters-and-virtual-fields.md new file mode 100644 index 00000000..e00a7c20 --- /dev/null +++ b/docs/refine-json-filters-and-virtual-fields.md @@ -0,0 +1,122 @@ +# Refine JSON filters and virtual association fields + +This document explains why list endpoints accept repeated `filter` query parameters and why some filter `field` names bypass normal SQL columns. It complements the shorter comments at the top of [`services/query_helper.py`](../services/query_helper.py). + +## Who this is for + +Anyone changing list APIs that back the Ocotillo UI (Refine + DataGrid), adding new filterable columns that are backed by associations, or debugging **400 Invalid JSON**, **422 missing keys**, or **wrong row counts** after filtering. + +## What problem we are solving + +### 1. UI sends many logical filters as many HTTP parameters + +The Ocotillo UI uses Refine’s `getList`, which builds query strings like: + +```http +GET /contact?page=1&size=50&filter={"field":"things","operator":"contains","value":"DE"} +``` + +Each active DataGrid filter becomes **one** `filter=...` entry. FastAPI exposes that as **`list[str]`** when declared as `Query(alias="filter")`. Every JSON object is applied in sequence (see below). Older endpoints that accepted **only one** string `filter` silently dropped extra filters when the UI added a second column filter, which looked like bugs (totals vs rows mismatch). + +So we standardize list routes on **`filter_params: list[str] | None`**, merged inside `order_sort_filter` instead of splitting behavior between “single filter string” vs “many”. + +### 2. Some DataGrid columns are not database columns + +The Contacts list shows **Associated Sites** by reading related `Thing` rows and joining names in the UI layer. On the backend, **`Contact.things`** is an **association proxy**, not a `String` column you can **`ILIKE`** in SQL. + +If `_apply_json_filter_clause` tries `getattr(Contact, "things")` and treats it like a column, filtering fails at runtime (**400**) because: + +- Proxies or relationships do not behave like **`Column`** objects for `ilike`. +- Even if they did, “contains” semantically means “match text across **zero to many** related sites”, which requires a join or subquery, not one scalar cell. + +Therefore we declare **virtual filter fields**: string names agreed with the UI (`field` in JSON) that map to dedicated Python helpers. Those helpers express the intended semantics in SQL. + +### 3. Inverse symmetry: wells vs contacts + +Associations are stored in **`ThingContactAssociation`** (`thing_id`, `contact_id`). + +| List resource | Virtual `field` | Meaning | Implementation sketch | +|---------------|------------------|---------|------------------------| +| Thing (wells) | `contacts` | “Does **any** linked contact’s **name** match?” | EXISTS over `ThingContactAssociation` joining `Contact`, predicate on **`Contact.name`** | +| Contact | `things` | “Does **any** linked monitoring site (**thing**) **name** match?” | EXISTS over **`ThingContactAssociation`** joining **`Thing`**, predicate on **`Thing.name`** | + +We keep naming aligned with ORM accessors (`Thing.contacts`-style summaries in API responses use **contacts**, and **`Contact`** side uses **`things`** for parity with the association proxy). + +### 4. Why `EXISTS (SELECT 1 …)` instead of joining in the outer query? + +You could inner join `ThingContactAssociation` + `Thing` onto `Contact` and add `ILIKE` on **`Thing.name`**. That duplicates parent rows when one contact ties to multiple sites. Pagination counts and `LIMIT`/`OFFSET` then drift from what the UI expects (**one row per contact**). + +`EXISTS` preserves one row per parent entity while still answering “**any** related row matches”. For negation (**ncontains**, **null**, **ne** where appropriate), we negate the **`EXISTS`** rather than multiplying rows. + +### 5. Which text we search + +For these virtual association filters we intentionally filter on **`Contact.name`** (things → contacts direction) and **`Thing.name`** (contacts → things direction). + +We do **not** search **`organization`**, **`role`**, **`contact_type`**, coordinates, alternative IDs in the association table, or concatenated strings that the UI displays. If product needs those, extend the helpers with explicit, documented predicates so behavior stays predictable for API clients. + +### 6. Operators and DataGrid wording + +Operators come from Refine/MUI conventions: + +- **contains**, **startswith**, **endswith**, **eq**, **ne**, **ncontains**: text predicates on **`name`** inside the **`EXISTS`** branch. +- **null** / **nnull**: empty vs non-empty association (no **`ILIKE`** value semantics). **`value`** may still appear in JSON (for example **`true`**); handler logic follows **operator**. + +## Sorting non-column list fields (virtual `sort` parameters) + +The DataGrid passes **`sort`** and **`order`** the same way as filters. **`order_sort_filter`** must never call **`getattr(Thing, sort).asc()`** for: + +- Python **`@property`** attributes (no mapped column). +- **`AssociationProxy`** collections (**`contacts`**, **`aquifers`** in the API payload). + +Those paths previously raised **500**. Virtual sorts are implemented in **`_apply_thing_virtual_sort`** and **`_apply_contact_virtual_sort`** in **`services/query_helper.py`**. + +### Thing (`GET /thing/...` lists) + +| `sort` value | SQL idea (tied with `Thing.id`) | +|--------------|----------------------------------| +| `monitoring_status`, `well_status`, `datalogger_suitability_status` | Same “latest open” **`StatusHistory.status_value`** subquery as filters; **`lower(...)`**, **`nulls_last`** | +| `site_name` | **`ThingIdLink.alternate_id`** where **`alternate_organization = 'NMBGMR'`**, smallest link **`id`** (matches **`Thing.site_name`**) | +| `contacts` | **`min(lower(Contact.name))`** over **`ThingContactAssociation`** (first name alphabetically among linked contacts) | +| `aquifers` | **`min(lower(AquiferSystem.name))`** over **`ThingAquiferAssociation`** | +| `open_status` | Latest open **“Open Status”** row; rank **Open** before **Closed**, then unknown strings, then no row | +| `measuring_point_height` | Latest **`MeasuringPointHistory`** row with non-null height (**`start_date` desc**, limit 1) | + +### Contact (`GET /contact`) + +| `sort` value | SQL idea | +|--------------|----------| +| `things` | **`min(lower(Thing.name))`** over linked sites (**`ThingContactAssociation`**) | + +**Semantic note:** For multi-valued columns, “sort” uses a **scalar summary** (minimum name, latest status text, etc.). That order can differ from how the UI joins labels with commas. Align copy in column **`description`** text with this behavior. + +## How filters combine + +Inside `order_sort_filter`, each decoded JSON dict is applied in order via `_apply_json_filter_clause`. Combined filters are **`AND`** together. Changing this would require coordinated UI and API changes. + +## Wire format reminder + +Each filter **must** include **`field`**, **`operator`**, and **`value`** keys (Refine convention). Omitting keys yields **422** from `_apply_json_filter_clause`. + +## Where code lives + +| Piece | Location | +|-------|-----------| +| Merge **`filter_`** + **`filters`**, sorting, pagination hook | **`order_sort_filter`** in **`services/query_helper.py`** | +| Dispatch virtual fields | **`_apply_json_filter_clause`** in **`services/query_helper.py`** | +| **`Thing` + contacts** | **`_apply_thing_contacts_filter`** | +| **`Contact` + things** | **`_apply_contact_things_filter`** | +| Contact list accepts repeated **`filter`** | **`GET`** **`/contact`** in **`api/contact.py`**, **`get_db_contacts`** in **`services/contact_helper.py`** | +| Wells list pattern (reference) | **`GET`** **`/thing/water-well`** in **`api/thing.py`**, **`get_db_things`** in **`services/thing_helper.py`** | +| Sort by derived / virtual columns | **`_apply_thing_virtual_sort`**, **`_apply_contact_virtual_sort`**, **`THING_VIRTUAL_SORT_FIELDS`** in **`services/query_helper.py`** | + +## Tests worth reading + +- **`tests/test_contact_filters.py`**: **`things`** filters, **`things`** sort, multiple **`filter`** params on **`GET /contact`**. +- **`tests/test_thing.py`** (contacts on wells): **`contacts`** **`contains`**, **`ncontains`**, **`nnull`**, and **`sort`** on **`monitoring_status`**, **`site_name`**, **`contacts`**, **`aquifers`**, etc. + +## When you change this + +1. Keep UI **`field`** names and API virtual branches in sync (**`things`** vs typo **`associated_sites`** breaks filtering). +2. Prefer **`EXISTS`** for “any related row matches” filters to avoid duplicate parents. +3. Extend operators only with tests that lock semantics (especially **negation**). +4. If you add virtual fields for other routes, document them here and in the **`query_helper`** header block. diff --git a/services/contact_helper.py b/services/contact_helper.py index 05b66200..2b0cc490 100644 --- a/services/contact_helper.py +++ b/services/contact_helper.py @@ -29,8 +29,19 @@ def get_db_contacts( thing_id: int | None = None, sort: str | None = None, order: str | None = None, - filter_: str | None = None, + *, + filters: list[str] | None = None, ): + """Paginated contacts with eager loads and Refine-compatible ``filters``. + + Pass ``filters`` from ``GET /contact`` (repeated ``filter`` query keys). The + ``things`` virtual field searches linked site names via + ``_apply_contact_things_filter``. Background: + docs/refine-json-filters-and-virtual-fields.md. + + ``thing_id`` restricts to contacts tied to one site (join on the + association table). + """ sql = session.query(Contact).options( # eagerly load related tables to avoid N+1 problems joinedload(Contact.emails), @@ -46,12 +57,15 @@ def get_db_contacts( sql = sql.join(ThingContactAssociation) sql = sql.where(ThingContactAssociation.thing_id == thing_id) - sql = order_sort_filter(sql, Contact, sort, order, filter_) + sql = order_sort_filter(sql, Contact, sort, order, filters=filters) return paginate(sql) def add_contact( - session: Session, data: CreateContact | dict, user: dict, commit: bool = True + session: Session, + data: CreateContact | dict, + user: dict, + commit: bool = True, ) -> Contact: """ Add a new contact to the database. diff --git a/services/query_helper.py b/services/query_helper.py index b197045b..538e93a0 100644 --- a/services/query_helper.py +++ b/services/query_helper.py @@ -18,7 +18,21 @@ from fastapi import HTTPException from fastapi_pagination.ext.sqlalchemy import paginate -from sqlalchemy import Column, Float, Integer, Select, String, Text, func, not_, select +from sqlalchemy import ( + Column, + Float, + Integer, + Select, + String, + Text, + and_, + case, + exists, + func, + not_, + nulls_last, + select, +) from sqlalchemy.orm import DeclarativeBase, Session from sqlalchemy.sql.elements import OperatorExpression from starlette.status import HTTP_404_NOT_FOUND @@ -27,6 +41,65 @@ from services.env import to_bool from services.regex import QUERY_REGEX +# ----------------------------------------------------------------------------- +# REFINE LIST FILTERS (JSON OVER QUERY STRING) +# +# Ocotillo UI uses Refine. ``getList`` sends each active DataGrid filter as one +# HTTP query parameter named ``filter``, repeated when multiple columns are +# filtered: +# +# GET /contact?filter={"field":"things","operator":"contains","value":"DE"} +# +# FastAPI should declare that parameter as ``list[str]`` (alias ``filter``), +# never a single ``str``. A single-string binding drops extra filters when users +# combine column filters (wrong totals vs rows). +# +# Each JSON object must contain keys ``field``, ``operator``, and ``value``. +# ``order_sort_filter`` merges legacy ``filter_`` with ``filters``, JSON-decodes +# each string, then ANDs predicates by calling ``_apply_json_filter_clause`` +# repeatedly. +# +# WHY VIRTUAL ``field`` NAMES (NOT RAW ORM NAMES FOR FILTERING) +# +# Some UI columns summarize **many related rows**, for example Associated Sites +# on contacts. SQLAlchemy exposes that as ``Contact.things``, an association +# proxy, **not** a ``String`` column. The default filter path does +# ``getattr(table, field)`` and applies ``ILIKE`` to a column. Proxies are not +# columns, and even if they were, "contains" must mean "match **any** linked +# site name", which needs a subquery or join. So we reserve virtual ``field`` +# strings that match what the UI sends and implement them explicitly below. +# +# ASSOCIATION PAIR (INVERSE OF EACH OTHER) +# +# ``Thing`` virtual field ``contacts``: filter wells by **any** linked +# ``Contact.name`` via ``ThingContactAssociation``. Used from the wells list. +# +# ``Contact`` virtual field ``things``: filter contacts by **any** linked +# ``Thing.name`` via the same association table. Used from the contacts list. +# +# Both use ``EXISTS (SELECT 1 FROM … WHERE …)`` so we never duplicate parent +# rows when a contact links to many sites or a site has many contacts. Joining +# associations in the outer FROM would multiply rows and break pagination. +# +# Text predicates apply only to **name** on the far side of the association +# (not organization, role, or other columns) unless we deliberately extend the +# helpers and document that contract. +# +# Thing-only virtual fields ``monitoring_status`` / ``well_status``: latest +# open ``StatusHistory`` row for that type. Operators: contains, ncontains, +# startswith, endswith, eq, ne. +# +# All other ``field`` values must resolve to a real mapped SQL column on the +# primary table for this query (Thing or Contact). +# +# ``sort`` + ``order``: use mapped columns, or Thing keys in +# ``THING_VIRTUAL_SORT_FIELDS`` / Contact ``things``, implemented in +# ``_apply_thing_virtual_sort`` and ``_apply_contact_virtual_sort``. +# Python ``@property`` and association proxies are not valid ``ORDER BY`` targets. +# +# Long-form narrative: docs/refine-json-filters-and-virtual-fields.md +# ----------------------------------------------------------------------------- + def make_where(col: Column, op: str, v: str) -> OperatorExpression: @@ -117,22 +190,555 @@ def _python_type(column: Any): return None +def _apply_thing_derived_status_filter( + sql: Select[Any], + thing_table: type, + status_type_literal: str, + operator: str, + value: Any, +) -> Select[Any]: + """Filter Thing rows using the latest open StatusHistory row. + + Mirrors monitoring_status / well_status: open row (end_date None) with + newest start_date wins. + """ + from db.status_history import StatusHistory + + sh = StatusHistory + tt = thing_table.__tablename__ + + max_start = ( + select(func.max(sh.start_date)) + .select_from(sh) + .where( + sh.target_table == tt, + sh.target_id == thing_table.id, + sh.status_type == status_type_literal, + sh.end_date.is_(None), + ) + .correlate(thing_table) + .scalar_subquery() + ) + + base_clause = and_( + sh.target_table == tt, + sh.target_id == thing_table.id, + sh.status_type == status_type_literal, + sh.end_date.is_(None), + sh.start_date == max_start, + ) + + if operator == "ncontains": + return sql.where( + ~exists( + select(1).where( + base_clause, + sh.status_value.ilike(f"%{value}%"), + ) + ) + ) + + if operator == "contains": + pred = sh.status_value.ilike(f"%{value}%") + elif operator == "startswith": + pred = sh.status_value.ilike(f"{value}%") + elif operator == "endswith": + pred = sh.status_value.ilike(f"%{value}") + elif operator == "eq": + pred = sh.status_value == str(value) + elif operator == "ne": + pred = sh.status_value != str(value) + else: + raise HTTPException( + status_code=400, + detail=( + f"Operator {operator!r} is not supported for derived " + "status filters (contains, ncontains, eq, ne, startswith, " + "endswith)" + ), + ) + + return sql.where(exists(select(1).where(base_clause, pred))) + + +def _thing_latest_open_status_sort_scalar( + thing_table: type, + status_type_literal: str, +): + """Scalar subquery: ``status_value`` for the current open status row. + + Aligns with ``Thing.monitoring_status`` / ``Thing.well_status`` properties and + with ``_apply_thing_derived_status_filter``: among rows with + ``end_date IS NULL`` for this ``status_type``, pick the row with the + maximum ``start_date``, then read ``status_value``. + """ + from db.status_history import StatusHistory + + sh = StatusHistory + tt = thing_table.__tablename__ + + max_start = ( + select(func.max(sh.start_date)) + .select_from(sh) + .where( + sh.target_table == tt, + sh.target_id == thing_table.id, + sh.status_type == status_type_literal, + sh.end_date.is_(None), + ) + .correlate(thing_table) + .scalar_subquery() + ) + + return ( + select(sh.status_value) + .select_from(sh) + .where( + sh.target_table == tt, + sh.target_id == thing_table.id, + sh.status_type == status_type_literal, + sh.end_date.is_(None), + sh.start_date == max_start, + ) + .correlate(thing_table) + .scalar_subquery() + ) + + +def _thing_site_name_sort_scalar(thing_table: type): + """NMBGMR ``ThingIdLink.alternate_id`` with lowest link ``id`` (matches ``Thing.site_name``).""" + from db.thing import ThingIdLink + + til = ThingIdLink + return ( + select(til.alternate_id) + .select_from(til) + .where( + til.thing_id == thing_table.id, + til.alternate_organization == "NMBGMR", + ) + .order_by(til.id.asc()) + .limit(1) + .correlate(thing_table) + .scalar_subquery() + ) + + +def _thing_contacts_min_name_sort_scalar(thing_table: type): + """Minimum ``lower(Contact.name)`` across associations (stable proxy for display order).""" + from db.contact import Contact, ThingContactAssociation + + tca = ThingContactAssociation + c = Contact + return ( + select(func.min(func.lower(c.name))) + .select_from(tca) + .join(c, tca.contact_id == c.id) + .where( + tca.thing_id == thing_table.id, + c.name.isnot(None), + ) + .correlate(thing_table) + .scalar_subquery() + ) + + +def _thing_aquifers_min_name_sort_scalar(thing_table: type): + """Minimum ``lower(AquiferSystem.name)`` across linked aquifers.""" + from db.aquifer_system import AquiferSystem + from db.thing_aquifer_association import ThingAquiferAssociation + + taa = ThingAquiferAssociation + aq = AquiferSystem + return ( + select(func.min(func.lower(aq.name))) + .select_from(taa) + .join(aq, taa.aquifer_system_id == aq.id) + .where(taa.thing_id == thing_table.id) + .correlate(thing_table) + .scalar_subquery() + ) + + +def _thing_measuring_point_height_sort_scalar(thing_table: type): + """Height from the latest ``MeasuringPointHistory`` row with non-null height.""" + from db.measuring_point_history import MeasuringPointHistory + + mph = MeasuringPointHistory + return ( + select(mph.measuring_point_height) + .select_from(mph) + .where( + mph.thing_id == thing_table.id, + mph.measuring_point_height.isnot(None), + ) + .order_by(mph.start_date.desc()) + .limit(1) + .correlate(thing_table) + .scalar_subquery() + ) + + +def _thing_open_status_order_expression(thing_table: type): + """Rank ``Open`` before ``Closed``, unknown values last, no row second-to-last group.""" + sv = _thing_latest_open_status_sort_scalar(thing_table, "Open Status") + return case( + (sv.is_(None), 2), + (sv == "Open", 0), + (sv == "Closed", 1), + else_=3, + ) + + +def _contact_things_min_name_sort_scalar(contact_table: type): + """Minimum ``lower(Thing.name)`` across a contact's associated sites.""" + from db.contact import ThingContactAssociation + from db.thing import Thing + + tca = ThingContactAssociation + t = Thing + return ( + select(func.min(func.lower(t.name))) + .select_from(tca) + .join(t, tca.thing_id == t.id) + .where( + tca.contact_id == contact_table.id, + t.name.isnot(None), + ) + .correlate(contact_table) + .scalar_subquery() + ) + + +THING_VIRTUAL_SORT_FIELDS = frozenset( + { + "monitoring_status", + "well_status", + "datalogger_suitability_status", + "site_name", + "contacts", + "aquifers", + "open_status", + "measuring_point_height", + } +) + + +def _apply_thing_virtual_sort( + sql: Select[Any], + thing_table: type, + sort: str, + order: str, +) -> Select[Any] | None: + """Apply SQL ``ORDER BY`` for Thing columns that are not mapped attributes.""" + if sort not in THING_VIRTUAL_SORT_FIELDS: + return None + + ord_ = order.lower() + if ord_ not in ("asc", "desc"): + raise ValueError("Invalid order parameter. Use 'asc' or 'desc'.") + + def str_order(expr): + if ord_ == "asc": + return sql.order_by( + nulls_last(expr.asc()), + thing_table.id.asc(), + ) + return sql.order_by( + nulls_last(expr.desc()), + thing_table.id.desc(), + ) + + def num_order(expr): + if ord_ == "asc": + return sql.order_by( + nulls_last(expr.asc()), + thing_table.id.asc(), + ) + return sql.order_by( + nulls_last(expr.desc()), + thing_table.id.desc(), + ) + + if sort == "monitoring_status": + expr = func.lower( + _thing_latest_open_status_sort_scalar(thing_table, "Monitoring Status") + ) + return str_order(expr) + + if sort == "well_status": + expr = func.lower( + _thing_latest_open_status_sort_scalar(thing_table, "Well Status") + ) + return str_order(expr) + + if sort == "datalogger_suitability_status": + expr = func.lower( + _thing_latest_open_status_sort_scalar( + thing_table, "Datalogger Suitability Status" + ) + ) + return str_order(expr) + + if sort == "site_name": + return str_order(func.lower(_thing_site_name_sort_scalar(thing_table))) + + if sort == "contacts": + return str_order(_thing_contacts_min_name_sort_scalar(thing_table)) + + if sort == "aquifers": + return str_order(_thing_aquifers_min_name_sort_scalar(thing_table)) + + if sort == "open_status": + return num_order(_thing_open_status_order_expression(thing_table)) + + if sort == "measuring_point_height": + return num_order(_thing_measuring_point_height_sort_scalar(thing_table)) + + raise NotImplementedError( + f"Thing virtual sort {sort!r} is listed in THING_VIRTUAL_SORT_FIELDS " + "but not implemented in _apply_thing_virtual_sort" + ) + + +def _apply_contact_virtual_sort( + sql: Select[Any], + contact_table: type, + sort: str, + order: str, +) -> Select[Any] | None: + """Apply SQL ``ORDER BY`` for Contact columns that are not mapped columns.""" + if sort != "things": + return None + + ord_ = order.lower() + if ord_ not in ("asc", "desc"): + raise ValueError("Invalid order parameter. Use 'asc' or 'desc'.") + + expr = func.lower(_contact_things_min_name_sort_scalar(contact_table)) + if ord_ == "asc": + return sql.order_by( + nulls_last(expr.asc()), + contact_table.id.asc(), + ) + return sql.order_by( + nulls_last(expr.desc()), + contact_table.id.desc(), + ) + + +def _apply_thing_contacts_filter( + sql: Select[Any], + thing_table: type, + operator: str, + value: Any, +) -> Select[Any]: + """Filter ``Thing`` rows using linked contacts (many-to-many). + + **Why this exists.** The wells list exposes a ``contacts`` column backed by + ``ThingContactAssociation``. Refine sends ``field=contacts``. That name is not + a plain ``Thing`` column, so the default ILIKE path cannot apply. + + **Semantics.** Return wells where **any** linked contact satisfies the text + operator on ``Contact.name`` (OR across association rows). We do **not** + scan organization or role here; extend this function intentionally if those + become product requirements. + + **SQL shape.** ``EXISTS`` avoids duplicating ``Thing`` rows when a well has + multiple contacts (pagination stays one row per well). + + Pairing: ``_apply_contact_things_filter`` is the inverse direction for the + contact list ``things`` column. See module comment and + docs/refine-json-filters-and-virtual-fields.md. + """ + from db.contact import Contact, ThingContactAssociation + + tca = ThingContactAssociation + c = Contact + + def _linked_contact_select(predicate): + return ( + select(1) + .select_from(tca) + .join(c, tca.contact_id == c.id) + .where( + tca.thing_id == thing_table.id, + c.name.isnot(None), + predicate, + ) + ) + + any_linked_contact = ( + select(1) + .select_from(tca) + .join(c, tca.contact_id == c.id) + .where(tca.thing_id == thing_table.id) + ) + + if operator == "nnull": + return sql.where(exists(any_linked_contact)) + + if operator == "null": + return sql.where(~exists(any_linked_contact)) + + if operator == "ncontains": + ncl = _linked_contact_select(c.name.ilike(f"%{value}%")) + return sql.where(~exists(ncl)) + + if operator == "ne": + neq = _linked_contact_select(c.name == str(value)) + return sql.where(~exists(neq)) + + if operator == "contains": + pred = c.name.ilike(f"%{value}%") + elif operator == "startswith": + pred = c.name.ilike(f"{value}%") + elif operator == "endswith": + pred = c.name.ilike(f"%{value}") + elif operator == "eq": + pred = c.name == str(value) + else: + raise HTTPException( + status_code=400, + detail=( + f"Operator {operator!r} is not supported for contacts " + "filters (contains, ncontains, eq, ne, startswith, endswith, " + "null, nnull)" + ), + ) + + return sql.where(exists(_linked_contact_select(pred))) + + +def _apply_contact_things_filter( + sql: Select[Any], + contact_table: type, + operator: str, + value: Any, +) -> Select[Any]: + """Filter ``Contact`` rows using linked monitoring sites (many-to-many). + + **Why this exists.** The UI Associated Sites column summarizes related + ``Thing`` rows. Refine sends ``field`` equal to ``things``. That relation is an + association proxy on ``Contact``, not a searchable string column. Filters for + Associated Sites must not go through the generic column ILIKE path (they would + raise or behave incorrectly). + + **Semantics.** Keep contacts where **any** linked ``Thing.name`` matches + the predicate (OR across ``ThingContactAssociation`` rows). Only ``name`` is + searched for text operators, matching how the UI builds the display string + from site names. + + **SQL shape.** ``EXISTS`` prevents one contact appearing multiple times when + they own many sites. + + Inverse of ``_apply_thing_contacts_filter``. Narrative documentation: + docs/refine-json-filters-and-virtual-fields.md. + """ + from db.contact import ThingContactAssociation + from db.thing import Thing + + tca = ThingContactAssociation + t = Thing + + def _linked_thing_select(predicate): + return ( + select(1) + .select_from(tca) + .join(t, tca.thing_id == t.id) + .where( + tca.contact_id == contact_table.id, + t.name.isnot(None), + predicate, + ) + ) + + any_linked_thing = ( + select(1) + .select_from(tca) + .join(t, tca.thing_id == t.id) + .where(tca.contact_id == contact_table.id) + ) + + if operator == "nnull": + return sql.where(exists(any_linked_thing)) + + if operator == "null": + return sql.where(~exists(any_linked_thing)) + + if operator == "ncontains": + nlk = _linked_thing_select(t.name.ilike(f"%{value}%")) + return sql.where(~exists(nlk)) + + if operator == "ne": + neq = _linked_thing_select(t.name == str(value)) + return sql.where(~exists(neq)) + + if operator == "contains": + pred = t.name.ilike(f"%{value}%") + elif operator == "startswith": + pred = t.name.ilike(f"{value}%") + elif operator == "endswith": + pred = t.name.ilike(f"%{value}") + elif operator == "eq": + pred = t.name == str(value) + else: + raise HTTPException( + status_code=400, + detail=( + f"Operator {operator!r} is not supported for things " + "filters (contains, ncontains, eq, ne, startswith, endswith, " + "null, nnull)" + ), + ) + + return sql.where(exists(_linked_thing_select(pred))) + + def _apply_json_filter_clause( sql: Select[Any], table: DeclarativeBase, f: dict ) -> Select[Any]: - """Apply one Refine logical filter dict (field / operator / value) to a SELECT.""" + """Apply one Refine logical filter dict to an SQLAlchemy SELECT. + + Dispatch order matters. Virtual association branches (Contact ``things``, + Thing ``contacts``, Thing derived statuses) **must** run before the generic + ``getattr(table, field)`` path; otherwise proxies or unsupported types hit + the column branch and produce 400 responses. + + Each call applies a **single** predicate. ``order_sort_filter`` chains + multiple JSON filters with AND semantics. + """ required_keys = {"field", "value", "operator"} missing = required_keys - f.keys() if missing: + keys = ", ".join(sorted(missing)) raise HTTPException( status_code=422, - detail=f"Missing required filter keys: {', '.join(sorted(missing))}", + detail=f"Missing required filter keys: {keys}", ) field = f["field"] value = f["value"] operator = f["operator"] + if getattr(table, "__name__", None) == "Thing" and field in ( + "monitoring_status", + "well_status", + ): + status_type_map = { + "monitoring_status": "Monitoring Status", + "well_status": "Well Status", + } + return _apply_thing_derived_status_filter( + sql, table, status_type_map[field], operator, value + ) + + if getattr(table, "__name__", None) == "Contact" and field == "things": + return _apply_contact_things_filter(sql, table, operator, value) + + if getattr(table, "__name__", None) == "Thing" and field == "contacts": + return _apply_thing_contacts_filter(sql, table, operator, value) + try: column = getattr(table, field) except AttributeError as exc: @@ -235,6 +841,21 @@ def order_sort_filter( *, filters: list[str] | None = None, ) -> Select[Any]: + """Apply optional sort and zero or more Refine JSON filters to ``sql``. + + **Repeatable ``filter`` parameters.** Pass ``filters`` as the list of raw + JSON strings from FastAPI ``Query(alias='filter')``. Each entry decodes to + one predicate; all predicates are combined with AND. The legacy ``filter_`` + argument supports older callers that still pass a single JSON string. + + **Why both ``filter_`` and ``filters``.** Backward compatibility while we + migrate every list route to list-shaped query params. UI clients should use + repeated ``filter`` keys only. + + Virtual association fields are implemented inside + ``_apply_json_filter_clause``. See + docs/refine-json-filters-and-virtual-fields.md for background. + """ if order: if not sort: raise ValueError( @@ -242,17 +863,26 @@ def order_sort_filter( f"The sort parameter should be a column name in the table {table}." ) - attr = getattr(table, sort) - # test if column is a string col - if isinstance(attr.type, String): - attr = func.lower(attr) + virtual_sorted = None + if getattr(table, "__name__", None) == "Thing": + virtual_sorted = _apply_thing_virtual_sort(sql, table, sort, order) + elif getattr(table, "__name__", None) == "Contact": + virtual_sorted = _apply_contact_virtual_sort(sql, table, sort, order) - if order.lower() == "asc": - sql = sql.order_by(attr.asc()) - elif order.lower() == "desc": - sql = sql.order_by(attr.desc()) + if virtual_sorted is not None: + sql = virtual_sorted else: - raise ValueError("Invalid order parameter. Use 'asc' or 'desc'.") + attr = getattr(table, sort) + # test if column is a string col + if isinstance(attr.type, String): + attr = func.lower(attr) + + if order.lower() == "asc": + sql = sql.order_by(attr.asc()) + elif order.lower() == "desc": + sql = sql.order_by(attr.desc()) + else: + raise ValueError("Invalid order parameter. Use 'asc' or 'desc'.") filter_jsons: list[str] = [] if filters: diff --git a/tests/test_contact_filters.py b/tests/test_contact_filters.py new file mode 100644 index 00000000..8eba3315 --- /dev/null +++ b/tests/test_contact_filters.py @@ -0,0 +1,120 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +import json + +import pytest +from core.dependencies import ( + admin_function, + amp_admin_function, + amp_editor_function, + amp_viewer_function, + editor_function, + viewer_function, +) +from main import app +from tests import client, override_authentication + + +@pytest.fixture(scope="module", autouse=True) +def override_authentication_dependency_fixture(): + app.dependency_overrides[admin_function] = override_authentication( + default={"name": "foobar", "sub": "1234567890"} + ) + app.dependency_overrides[editor_function] = override_authentication( + default={"name": "foobar", "sub": "1234567890"} + ) + app.dependency_overrides[viewer_function] = override_authentication() + app.dependency_overrides[amp_admin_function] = override_authentication( + default={"name": "foobar", "sub": "1234567890"} + ) + app.dependency_overrides[amp_editor_function] = override_authentication( + default={"name": "foobar", "sub": "1234567890"} + ) + app.dependency_overrides[amp_viewer_function] = override_authentication() + + yield + + app.dependency_overrides = {} + + +def test_get_contacts_filter_things_contains(water_well_thing, contact): + fl = json.dumps( + {"field": "things", "operator": "contains", "value": "Well"}, + ) + response = client.get("/contact", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert contact.id in ids + + +def test_get_contacts_filter_things_contains_no_match( + water_well_thing, + contact, +): + fl = json.dumps( + { + "field": "things", + "operator": "contains", + "value": "ZyxyzNoMatch999", + } + ) + response = client.get("/contact", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert contact.id not in ids + + +def test_get_contacts_filter_things_nnull(water_well_thing, contact): + fl = json.dumps( + {"field": "things", "operator": "nnull", "value": True}, + ) + response = client.get("/contact", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert contact.id in ids + + +def test_get_contacts_multiple_filters_and(water_well_thing, contact): + fl_a = json.dumps( + {"field": "things", "operator": "contains", "value": "Well"}, + ) + fl_b = json.dumps( + {"field": "name", "operator": "contains", "value": "Test"}, + ) + response = client.get( + "/contact", + params=[("filter", fl_a), ("filter", fl_b)], + ) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert contact.id in ids + + +def test_get_contacts_sort_things_asc(water_well_thing, contact): + response = client.get( + "/contact", + params={ + "sort": "things", + "order": "asc", + }, + ) + assert response.status_code == 200 + data = response.json() + assert contact.id in [item["id"] for item in data["items"]] diff --git a/tests/test_thing.py b/tests/test_thing.py index e36792ee..d3444a7c 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +import json + import pytest from core.dependencies import ( admin_function, @@ -933,6 +935,136 @@ def test_get_water_wells_includes_contact_summary( ] +def test_get_water_wells_filter_contacts_contains(water_well_thing, contact): + fl = json.dumps({"field": "contacts", "operator": "contains", "value": "Test"}) + response = client.get("/thing/water-well", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert water_well_thing.id in ids + + +def test_get_water_wells_filter_contacts_contains_no_match(water_well_thing, contact): + fl = json.dumps( + { + "field": "contacts", + "operator": "contains", + "value": "ZyxyzNoMatch999", + } + ) + response = client.get("/thing/water-well", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert water_well_thing.id not in ids + + +def test_get_water_wells_filter_contacts_ncontains(water_well_thing, contact): + fl = json.dumps( + { + "field": "contacts", + "operator": "ncontains", + "value": "ZyxyzNoMatch999", + } + ) + response = client.get("/thing/water-well", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert water_well_thing.id in ids + + +def test_get_water_wells_filter_contacts_nnull(water_well_thing, contact): + fl = json.dumps( + {"field": "contacts", "operator": "nnull", "value": True}, + ) + response = client.get("/thing/water-well", params=[("filter", fl)]) + assert response.status_code == 200 + data = response.json() + ids = [item["id"] for item in data["items"]] + assert water_well_thing.id in ids + + +def test_get_water_wells_sort_monitoring_status_desc(water_well_thing): + """Derived status columns are Python properties; sort uses StatusHistory SQL.""" + response = client.get( + "/thing/water-well", + params={ + "sort": "monitoring_status", + "order": "desc", + "page": 1, + "size": 50, + }, + ) + assert response.status_code == 200 + + +def test_get_water_wells_sort_well_status_asc(water_well_thing): + response = client.get( + "/thing/water-well", + params={ + "sort": "well_status", + "order": "asc", + }, + ) + assert response.status_code == 200 + + +def test_get_water_wells_sort_site_name_asc(water_well_thing): + response = client.get( + "/thing/water-well", + params={ + "sort": "site_name", + "order": "asc", + }, + ) + assert response.status_code == 200 + + +def test_get_water_wells_sort_contacts_asc(water_well_thing, contact): + response = client.get( + "/thing/water-well", + params={ + "sort": "contacts", + "order": "asc", + }, + ) + assert response.status_code == 200 + + +def test_get_water_wells_sort_open_status_asc(water_well_thing): + response = client.get( + "/thing/water-well", + params={ + "sort": "open_status", + "order": "asc", + }, + ) + assert response.status_code == 200 + + +def test_get_water_wells_sort_measuring_point_height_asc(water_well_thing): + response = client.get( + "/thing/water-well", + params={ + "sort": "measuring_point_height", + "order": "desc", + }, + ) + assert response.status_code == 200 + + +def test_get_water_wells_sort_aquifers_asc(water_well_thing): + response = client.get( + "/thing/water-well", + params={ + "sort": "aquifers", + "order": "asc", + }, + ) + assert response.status_code == 200 + + def test_get_water_well_by_id_404_not_found(water_well_thing): bad_id = 99999 response = client.get(f"/thing/water-well/{bad_id}")