From a96e2b5b9b340fa974945506881d149bbb6122dd Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 10:08:17 +0000 Subject: [PATCH 01/25] feat(medcat-service): Update gradio version --- medcat-service/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt index 27b723e0d..e4c0679fb 100644 --- a/medcat-service/requirements.txt +++ b/medcat-service/requirements.txt @@ -9,7 +9,7 @@ requests==2.32.4 fastapi[standard]==0.115.2 pydantic>=2.11.10,<2.12.5 pydantic-settings==2.10.1 -gradio[mcp]==5.38.0 +gradio[mcp]==6.2.0 prometheus-fastapi-instrumentator==7.1.0 opentelemetry-distro[otlp]==0.60b0 opentelemetry-instrumentation==0.60b0 From 476ebff1d6882f316a431fc55e4da7828b44a7e8 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 11:04:49 +0000 Subject: [PATCH 02/25] build(medcat-service): Update fastapi dependency --- medcat-service/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt index e4c0679fb..cf81d21c5 100644 --- a/medcat-service/requirements.txt +++ b/medcat-service/requirements.txt @@ -6,7 +6,7 @@ medcat[meta-cat,spacy,deid]~=2.2.0 # pinned because of issues with de-id models and past models (it will not do any de-id) transformers>=4.34.0,<5.0.0 requests==2.32.4 -fastapi[standard]==0.115.2 +fastapi[standard]==0.128.0 pydantic>=2.11.10,<2.12.5 pydantic-settings==2.10.1 gradio[mcp]==6.2.0 From 7a9e413ae24e992036462cdcebb6cec52ed2f990 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 11:08:15 +0000 Subject: [PATCH 03/25] feat(medcat-servie): Fix gradio version root path bug --- medcat-service/medcat_service/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-service/medcat_service/config.py b/medcat-service/medcat_service/config.py index ee12a3601..febb84469 100644 --- a/medcat-service/medcat_service/config.py +++ b/medcat-service/medcat_service/config.py @@ -38,7 +38,7 @@ class Settings(BaseSettings): ) app_root_path: str = Field( - default="/", + default="", description="The Root Path for the FastAPI App", examples=["/medcat-service"], ) From 3f6e38c8aa0360800d6a77bdb06017eb0a3b6872 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 11:56:01 +0000 Subject: [PATCH 04/25] build(medcat-service): Add hot module reloader. Update gradio demo --- medcat-service/medcat_service/demo/gradio_demo.py | 10 +++++----- medcat-service/medcat_service/main.py | 4 +++- medcat-service/requirements-dev.txt | 1 + medcat-service/start_service_debug.sh | 8 +++++++- 4 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 medcat-service/requirements-dev.txt diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index a19bbab92..26ada6a51 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -118,6 +118,8 @@ def perform_named_entity_resolution(input_text: str): entity annotation and its attributes for display purposes. """ + if not input_text or not input_text.strip(): + return None, None processor = get_medcat_processor(get_settings()) input = ProcessAPIInputContent(text=input_text) @@ -136,8 +138,7 @@ def perform_named_entity_resolution(input_text: str): short_example = "John had been diagnosed with acute Kidney Failure the week before" -long_example = """ -Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). +long_example = """Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). CC: Left hand numbness on presentation; then developed lethargy later that day. HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. @@ -160,15 +161,14 @@ def perform_named_entity_resolution(input_text: str): io = gr.Interface( fn=perform_named_entity_resolution, - inputs="text", + inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), outputs=[ gr.HighlightedText(label="Processed Text"), gr.Dataframe(label="Annotations", headers=headers, interactive=False), ], examples=[short_example, long_example], - preload_example=0, title="MedCAT Demo", - description="Enter some text and click Annotate.", flagging_mode="never", article=article_footer, + submit_btn="Annotate", ) diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py index 21f843b2a..f9b3adb30 100644 --- a/medcat-service/medcat_service/main.py +++ b/medcat-service/medcat_service/main.py @@ -37,7 +37,9 @@ app.include_router(health.router) app.include_router(process.router) -gr.mount_gradio_app(app, io, path="/demo", mcp_server=True) +theme = gr.themes.Default(primary_hue="blue", secondary_hue="teal") + +app = gr.mount_gradio_app(app, io, path="/demo", theme=theme) def configure_observability(settings: Settings, app: FastAPI): diff --git a/medcat-service/requirements-dev.txt b/medcat-service/requirements-dev.txt new file mode 100644 index 000000000..c4f4d8c64 --- /dev/null +++ b/medcat-service/requirements-dev.txt @@ -0,0 +1 @@ +uvicorn-hmr[all] diff --git a/medcat-service/start_service_debug.sh b/medcat-service/start_service_debug.sh index c0055ed09..1938ae180 100644 --- a/medcat-service/start_service_debug.sh +++ b/medcat-service/start_service_debug.sh @@ -13,4 +13,10 @@ fi export APP_ENABLE_METRICS=${APP_ENABLE_METRICS:-True} -fastapi dev medcat_service/main.py +if HOT_MODULE_RELOADING=True; then + # Experimental: Hot module reloading. Need to `pip install -r requirements-dev.txt` + echo "Running medcat-service with hot module reloading" + uvicorn-hmr medcat_service/main:app --refresh --reload-include 'medcat_service' +else + fastapi dev medcat_service/main.py +fi From e452614c63258814693f157ec48490c7b5108fab Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 12:07:25 +0000 Subject: [PATCH 05/25] docs(medcat-service): Add dev readme. refactor gradio to extract the text content --- medcat-service/README.md | 14 +++++++++ .../medcat_service/demo/demo_content.py | 24 +++++++++++++++ .../medcat_service/demo/gradio_demo.py | 29 ++----------------- medcat-service/start_service_debug.sh | 6 ++-- 4 files changed, 44 insertions(+), 29 deletions(-) create mode 100644 medcat-service/medcat_service/demo/demo_content.py diff --git a/medcat-service/README.md b/medcat-service/README.md index d8d0bc1da..4b726ae0f 100644 --- a/medcat-service/README.md +++ b/medcat-service/README.md @@ -389,3 +389,17 @@ The main settings that can be used to improve the performance when querying larg MedCAT parameters are defined in selected `envs/medcat*` file. For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/). + +## Local development + +For local development, set up a Python virtual environment, install dependencies with pip, and make sure to also install the local MedCAT core library (the `medcat-v2` folder) in editable mode. + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt -r requirements-dev.txt +SETUPTOOLS_SCM_PRETEND_VERSION="2.4.0-dev0" pip install -e "../medcat-v2[meta-cat,spacy]" +bash start_service_debug.sh + +# Service will run on localhost:8000 +``` \ No newline at end of file diff --git a/medcat-service/medcat_service/demo/demo_content.py b/medcat-service/medcat_service/demo/demo_content.py new file mode 100644 index 000000000..9b4c62b96 --- /dev/null +++ b/medcat-service/medcat_service/demo/demo_content.py @@ -0,0 +1,24 @@ + +short_example = "John had been diagnosed with acute Kidney Failure the week before" + + +long_example = """Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). +CC: Left hand numbness on presentation; then developed lethargy later that day. + +HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. + +He had been experiencing falling spells without associated LOC up to several times a month for the past year. + +MEDS: procardia SR, Lasix, Ecotrin, KCL, Digoxin, Colace, Coumadin. + +PMH: 1)8/92 evaluation for presyncope (Echocardiogram showed: AV fibrosis/calcification, AV stenosis/insufficiency, MV stenosis with annular calcification and regurgitation, moderate TR, Decreased LV systolic function, severe LAE. MRI brain: focal areas of increased T2 signal in the left cerebellum and in the brainstem probably representing microvascular ischemic disease. IVG (MUGA scan)revealed: global hypokinesis of the LV and biventricular dysfunction, RV ejection Fx 45% and LV ejection Fx 39%. He was subsequently placed on coumadin severe valvular heart disease), 2)HTN, 3)Rheumatic fever and heart disease, 4)COPD, 5)ETOH abuse, 6)colonic polyps, 7)CAD, 8)CHF, 9)Appendectomy, 10)Junctional tachycardia. +""" # noqa: E501 + +article_footer = """ +## Disclaimer +This software is intended solely for the testing purposes and non-commercial use. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. + +contact@cogstack.com for more information. + +Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. +""" # noqa: E501 diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 26ada6a51..f9a183a0c 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -1,6 +1,7 @@ import gradio as gr from pydantic import BaseModel +import medcat_service.demo.demo_content as demo_content from medcat_service.dependencies import get_medcat_processor, get_settings from medcat_service.types import ProcessAPIInputContent from medcat_service.types_entities import Entity @@ -135,30 +136,6 @@ def perform_named_entity_resolution(input_text: str): return response.model_dump(), response_datatable_format -short_example = "John had been diagnosed with acute Kidney Failure the week before" - - -long_example = """Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). -CC: Left hand numbness on presentation; then developed lethargy later that day. - -HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. - -He had been experiencing falling spells without associated LOC up to several times a month for the past year. - -MEDS: procardia SR, Lasix, Ecotrin, KCL, Digoxin, Colace, Coumadin. - -PMH: 1)8/92 evaluation for presyncope (Echocardiogram showed: AV fibrosis/calcification, AV stenosis/insufficiency, MV stenosis with annular calcification and regurgitation, moderate TR, Decreased LV systolic function, severe LAE. MRI brain: focal areas of increased T2 signal in the left cerebellum and in the brainstem probably representing microvascular ischemic disease. IVG (MUGA scan)revealed: global hypokinesis of the LV and biventricular dysfunction, RV ejection Fx 45% and LV ejection Fx 39%. He was subsequently placed on coumadin severe valvular heart disease), 2)HTN, 3)Rheumatic fever and heart disease, 4)COPD, 5)ETOH abuse, 6)colonic polyps, 7)CAD, 8)CHF, 9)Appendectomy, 10)Junctional tachycardia. -""" # noqa: E501 - -article_footer = """ -## Disclaimer -This software is intended solely for the testing purposes and non-commercial use. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. - -contact@cogstack.com for more information. - -Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. -""" # noqa: E501 - io = gr.Interface( fn=perform_named_entity_resolution, inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), @@ -166,9 +143,9 @@ def perform_named_entity_resolution(input_text: str): gr.HighlightedText(label="Processed Text"), gr.Dataframe(label="Annotations", headers=headers, interactive=False), ], - examples=[short_example, long_example], + examples=[demo_content.short_example, demo_content.long_example], title="MedCAT Demo", flagging_mode="never", - article=article_footer, + article=demo_content.article_footer, submit_btn="Annotate", ) diff --git a/medcat-service/start_service_debug.sh b/medcat-service/start_service_debug.sh index 1938ae180..44cce7741 100644 --- a/medcat-service/start_service_debug.sh +++ b/medcat-service/start_service_debug.sh @@ -2,9 +2,9 @@ echo "Starting MedCAT Service" # Optional - Enable DeID mode with: -#export APP_MEDCAT_MODEL_PACK="models/examples/example-deid-model-pack.zip" -#export DEID_MODE=True -#export DEID_REDACT=True +export APP_MEDCAT_MODEL_PACK="models/examples/example-deid-model-pack.zip" +export DEID_MODE=True +export DEID_REDACT=True if [ -z "${APP_MODEL_CDB_PATH}" ] && [ -z "${APP_MODEL_VOCAB_PATH}" ] && [ -z "${APP_MEDCAT_MODEL_PACK}" ]; then export APP_MEDCAT_MODEL_PACK="models/examples/example-medcat-v2-model-pack.zip" From 9e436f29487ae6533cc17b3794ffa30af5ed0a14 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 12:15:50 +0000 Subject: [PATCH 06/25] feat(medcat-service): Add anoncat demo text --- .../medcat_service/demo/demo_content.py | 97 +++++++++++++++++++ .../medcat_service/demo/gradio_demo.py | 44 ++++++--- 2 files changed, 128 insertions(+), 13 deletions(-) diff --git a/medcat-service/medcat_service/demo/demo_content.py b/medcat-service/medcat_service/demo/demo_content.py index 9b4c62b96..3092443b9 100644 --- a/medcat-service/medcat_service/demo/demo_content.py +++ b/medcat-service/medcat_service/demo/demo_content.py @@ -22,3 +22,100 @@ Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. """ # noqa: E501 + +anoncat_example = """Patient Information: + +Name: John Parkinson +Date of Birth: February 12, 1958 +Gender: Male +Address: 789 Wellness Lane, Healthville, HV 56789 +Phone: (555) 555-1234 +Email: john.parkinson@email.com +Emergency Contact: + +Name: Mary Parkinson +Relationship: Spouse +Phone: (555) 555-5678 +Insurance Information: + +Insurance Provider: HealthWell Assurance +Policy Number: HW765432109 +Group Number: G876543 +Medical History: + +Allergies: + +None reported +Medications: + +Levodopa/Carbidopa for Parkinson's disease symptoms +Pramipexole for restless legs syndrome +Lisinopril for hypertension +Atorvastatin for hyperlipidemia +Metformin for Type 2 Diabetes +Medical Conditions: + +Parkinson's Disease (diagnosed on June 20, 2015) +Hypertension +Hyperlipidemia +Type 2 Diabetes +Osteoarthritis +Vital Signs: + +Blood Pressure: 130/80 mmHg +Heart Rate: 72 bpm +Temperature: 98.4°F +Respiratory Rate: 18 breaths per minute +Recent Inpatient Stay (Dates: September 1-10, 2023): + +Reason for Admission: Acute exacerbation of Parkinson's symptoms, pneumonia, and uncontrolled diabetes. + +Interventions: + +Neurology Consultation for Parkinson's disease management adjustments. +Antibiotic therapy for pneumonia. +Continuous glucose monitoring and insulin therapy for diabetes control. +Physical therapy sessions to maintain mobility. +Complications: + +Delirium managed with close monitoring and appropriate interventions. +Discharge Plan: + +Medication adjustments for Parkinson's disease. +Follow-up appointments with neurologist, endocrinologist, and primary care. +Home health care for continued physical therapy. +Follow-up Visits: + +Date: October 15, 2023 + +Reason for Visit: Post-discharge Follow-up +Notes: Stable Parkinson's symptoms, pneumonia resolved. Adjusted diabetes medications for better control. +Date: December 5, 2023 + +Reason for Visit: Neurology Follow-up +Notes: Fine-tuned Parkinson's medication regimen. Recommended ongoing physical therapy. +""" # noqa: E501 + +anoncat_help_content = """Demo app for the deidentification of private health information using the CogStack AnonCAT model + +Please DO NOT test with any real sensitive PHI data. + +Local validation and fine-tuning available via [MedCATtrainer]( +https://github.com/CogStack/cogstack-nlp/tree/main/medcat-trainer). +Email us, [contact@cogstack.org](mailto:contact@cogstack.org), to discuss model access, +model performance, and your use case. + +The following PHI items have been trained: + +| PHI Item | Description | +|----------|-------------| +| NHS Number | UK National Health Service Numbers. | +| Name | All names, first, middle, last of patients, relatives, care providers etc. Importantly, does not redact conditions that are named after a name, e.g. "Parkinsons's disease". | +| Date of Birth | DOBs. Does not include other dates that may be in the record, i.e. dates of visit etc. | +| Hospital Number | A unique number provided by the hospital. Distinct from the NHS number | +| Address Line | Address lines - first, second, third or fourth | +| Postcode | UK postal codes - 6 or 7 alphanumeric codes as part of addresses | +| Telephone Number | Telephone numbers, extensions, mobile / cell phone numbers | +| Email | Email addresses | +| Initials | Patient, relatives, care provider name initials. | +""" # noqa: E501 diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index f9a183a0c..36675634e 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -136,16 +136,34 @@ def perform_named_entity_resolution(input_text: str): return response.model_dump(), response_datatable_format -io = gr.Interface( - fn=perform_named_entity_resolution, - inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), - outputs=[ - gr.HighlightedText(label="Processed Text"), - gr.Dataframe(label="Annotations", headers=headers, interactive=False), - ], - examples=[demo_content.short_example, demo_content.long_example], - title="MedCAT Demo", - flagging_mode="never", - article=demo_content.article_footer, - submit_btn="Annotate", -) +settings = get_settings() + + +if settings.deid_mode: + io = gr.Interface( + fn=perform_named_entity_resolution, + inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), + outputs=[ + gr.HighlightedText(label="Processed Text"), + gr.Dataframe(label="Annotations", headers=headers, interactive=False), + ], + examples=[demo_content.short_example, demo_content.anoncat_example], + title="AnonCAT Demo", + flagging_mode="never", + article=demo_content.anoncat_help_content, + submit_btn="Deidentify", + ) +else: + io = gr.Interface( + fn=perform_named_entity_resolution, + inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), + outputs=[ + gr.HighlightedText(label="Processed Text"), + gr.Dataframe(label="Annotations", headers=headers, interactive=False), + ], + examples=[demo_content.short_example, demo_content.long_example], + title="MedCAT Demo", + flagging_mode="never", + article=demo_content.article_footer, + submit_btn="Annotate", + ) From e7c3a11709913deea881ad851f2707b79c8dfe9f Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 12:40:32 +0000 Subject: [PATCH 07/25] feat(medcat-service): Move out of main.py. Configure overflow scrollbar in results --- .../medcat_service/demo/gradio_demo.py | 27 ++++++++++++++++--- medcat-service/medcat_service/main.py | 7 ++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 36675634e..2f992d0f6 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -139,12 +139,21 @@ def perform_named_entity_resolution(input_text: str): settings = get_settings() +# CSS to set max height with scrollbar for HighlightedText output +# Target the component container and its content +highlighted_text_css = """ +#highlighted-text-output { + max-height: 460px; + overflow-y: auto; +} +""" + if settings.deid_mode: io = gr.Interface( fn=perform_named_entity_resolution, - inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), + inputs=gr.Textbox(label="Input Text", lines=3, placeholder="Enter some text and click Annotate..."), outputs=[ - gr.HighlightedText(label="Processed Text"), + gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output"), gr.Dataframe(label="Annotations", headers=headers, interactive=False), ], examples=[demo_content.short_example, demo_content.anoncat_example], @@ -158,7 +167,7 @@ def perform_named_entity_resolution(input_text: str): fn=perform_named_entity_resolution, inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), outputs=[ - gr.HighlightedText(label="Processed Text"), + gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output"), gr.Dataframe(label="Annotations", headers=headers, interactive=False), ], examples=[demo_content.short_example, demo_content.long_example], @@ -167,3 +176,15 @@ def perform_named_entity_resolution(input_text: str): article=demo_content.article_footer, submit_btn="Annotate", ) + + +def mount_gradio_app(app, path: str = "/demo") -> None: + """ + Mount the Gradio interface to the FastAPI app with a custom theme. + + Args: + app: The FastAPI application instance + path: The path at which to mount the Gradio app (default: "/demo") + """ + theme = gr.themes.Default(primary_hue="blue", secondary_hue="teal") + gr.mount_gradio_app(app, io, path=path, theme=theme, css=highlighted_text_css) diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py index f9b3adb30..1a260f000 100644 --- a/medcat-service/medcat_service/main.py +++ b/medcat-service/medcat_service/main.py @@ -3,12 +3,11 @@ import logging import logging.config -import gradio as gr from fastapi import FastAPI, Request from fastapi.responses import JSONResponse from medcat_service.config import Settings -from medcat_service.demo.gradio_demo import io +from medcat_service.demo.gradio_demo import mount_gradio_app from medcat_service.dependencies import get_settings from medcat_service.log_config import log_config from medcat_service.routers import admin, health, process @@ -37,9 +36,7 @@ app.include_router(health.router) app.include_router(process.router) -theme = gr.themes.Default(primary_hue="blue", secondary_hue="teal") - -app = gr.mount_gradio_app(app, io, path="/demo", theme=theme) +mount_gradio_app(app, path="/demo") def configure_observability(settings: Settings, app: FastAPI): From eecf7b92777228e9a79c2d696fd3461098fd71cd Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 14:04:25 +0000 Subject: [PATCH 08/25] test(medcat-service): Create gradio logic tests. Split into its own file --- .../medcat_service/demo/demo_logic.py | 142 ++++++++++++++ .../medcat_service/demo/gradio_demo.py | 138 +------------- .../medcat_service/test/demo/__init__.py | 0 .../test/demo/test_demo_logic.py | 179 ++++++++++++++++++ 4 files changed, 324 insertions(+), 135 deletions(-) create mode 100644 medcat-service/medcat_service/demo/demo_logic.py create mode 100644 medcat-service/medcat_service/test/demo/__init__.py create mode 100644 medcat-service/medcat_service/test/demo/test_demo_logic.py diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py new file mode 100644 index 000000000..391577b25 --- /dev/null +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -0,0 +1,142 @@ +""" +This module provides conversion utilities between the MedCAT output format +and the exact format expected by Gradio components, specifically aligning +with the output schema of Hugging Face Transformers pipelines (e.g., for +NER highlighting). Use these definitions and helper functions to bridge +MedCAT's annotation results and Gradio's interactive demo expectations. +""" + +from pydantic import BaseModel + +from medcat_service.dependencies import get_medcat_processor, get_settings +from medcat_service.types import ProcessAPIInputContent +from medcat_service.types_entities import Entity + + +class EntityAnnotation(BaseModel): + """ + Expected data format for NER in gradio + """ + + entity: str + score: float + index: int + word: str + start: int + end: int + + +headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"] + + +class EntityAnnotationDisplay(BaseModel): + """ + DIsplay data format for use in a datatable + """ + + pretty_name: str + identifier: str + score: float + start: int + end: int + id: int + # Misisng Meta Anns + + +class EntityResponse(BaseModel): + """ + Expected data format of gradio highlightedtext component + """ + + entities: list[EntityAnnotation] + text: str + + +def convert_annotation_to_ner_model(entity: Entity, index: int) -> EntityAnnotation: + return EntityAnnotation( + entity=entity.get("cui", "UNKNOWN"), + score=entity.get("acc", 0.0), + index=index, + word=entity.get("detected_name", ""), + start=entity.get("start", -1), + end=entity.get("end", -1), + ) + + +def convert_annotation_to_display_model(entity: Entity) -> EntityAnnotationDisplay: + return EntityAnnotationDisplay( + pretty_name=entity.get("pretty_name", ""), + identifier=entity.get("cui", "UNKNOWN"), + score=entity.get("acc", 0.0), + start=entity.get("start", -1), + end=entity.get("end", -1), + id=entity.get("id", -1), + # medcat-demo-app/webapp/demo/views.py + # if key == 'meta_anns': + # meta_anns=ent.get("meta_anns", {}) + # if meta_anns: + # for meta_ann in meta_anns.keys(): + # new_ent[meta_ann]=meta_anns[meta_ann]['value'] + ) + + +def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotation]: + annotations: list[EntityAnnotation] = [] + for entity_dict in entity_dict_list: + for key, entity in entity_dict.items(): + annotations.append(convert_annotation_to_ner_model(entity, index=int(key))) + return annotations + + +def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]: + annotations: list[EntityAnnotationDisplay] = [] + for entity_dict in entity_dict_list: + for key, entity in entity_dict.items(): + annotations.append(convert_annotation_to_display_model(entity)) + return annotations + + +def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnnotationDisplay]) -> list[list[str]]: + return [[str(getattr(entity, field)) for field in entity.model_fields] for entity in entity_display_model] + + +def perform_named_entity_resolution(input_text: str): + """ + Performs clinical coding by processing the input text with MedCAT to extract and + annotate medical concepts (entities). + + Returns: + 1. A dictionary following the NER response model (EntityResponse), containing the original text + and the list of detected entities. + 2. A datatable-compatible list of lists, where each sublist represents an entity annotation and + its attributes for display purposes. + + This method is used as the main function for the Gradio MedCAT demo and MCP server, + enabling users to input free text and receive automatic annotation and coding of clinical entities. + + Args: + input_text (str): The input text to be processed and annotated for medical entities by MedCAT. + + Returns: + Tuple: + - dict: A dictionary following the NER response model (EntityResponse), containing the + original text and the list of detected entities. + - list[list[str]]: A datatable-compatible list of lists, where each sublist represents an + entity annotation and its attributes for display purposes. + + """ + if not input_text or not input_text.strip(): + return None, None + + processor = get_medcat_processor(get_settings()) + input = ProcessAPIInputContent(text=input_text) + + result = processor.process_content(input.model_dump()) + + entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) + + annotations_as_display_format = convert_entity_dict_to_display_model(result.annotations) + response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) + + response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) + return response.model_dump(), response_datatable_format diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 2f992d0f6..b5b2cd420 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -1,144 +1,11 @@ import gradio as gr -from pydantic import BaseModel import medcat_service.demo.demo_content as demo_content -from medcat_service.dependencies import get_medcat_processor, get_settings -from medcat_service.types import ProcessAPIInputContent -from medcat_service.types_entities import Entity - - -class EntityAnnotation(BaseModel): - """ - Expected data format for NER in gradio - """ - - entity: str - score: float - index: int - word: str - start: int - end: int - +from medcat_service.demo.demo_logic import perform_named_entity_resolution +from medcat_service.dependencies import get_settings headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"] - -class EntityAnnotationDisplay(BaseModel): - """ - DIsplay data format for use in a datatable - """ - - pretty_name: str - identifier: str - score: float - start: int - end: int - id: int - # Misisng Meta Anns - - -class EntityResponse(BaseModel): - """ - Expected data format of gradio highlightedtext component - """ - - entities: list[EntityAnnotation] - text: str - - -def convert_annotation_to_ner_model(entity: Entity, index: int) -> EntityAnnotation: - return EntityAnnotation( - entity=entity.get("cui", "UNKNOWN"), - score=entity.get("acc", 0.0), - index=index, - word=entity.get("detected_name", ""), - start=entity.get("start", -1), - end=entity.get("end", -1), - ) - - -def convert_annotation_to_display_model(entity: Entity) -> EntityAnnotationDisplay: - return EntityAnnotationDisplay( - pretty_name=entity.get("pretty_name", ""), - identifier=entity.get("cui", "UNKNOWN"), - score=entity.get("acc", 0.0), - start=entity.get("start", -1), - end=entity.get("end", -1), - id=entity.get("id", -1), - # medcat-demo-app/webapp/demo/views.py - # if key == 'meta_anns': - # meta_anns=ent.get("meta_anns", {}) - # if meta_anns: - # for meta_ann in meta_anns.keys(): - # new_ent[meta_ann]=meta_anns[meta_ann]['value'] - ) - - -def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotation]: - annotations: list[EntityAnnotation] = [] - for entity_dict in entity_dict_list: - for key, entity in entity_dict.items(): - annotations.append(convert_annotation_to_ner_model(entity, index=int(key))) - return annotations - - -def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]: - annotations: list[EntityAnnotationDisplay] = [] - for entity_dict in entity_dict_list: - for key, entity in entity_dict.items(): - annotations.append(convert_annotation_to_display_model(entity)) - return annotations - - -def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnnotationDisplay]) -> list[list[str]]: - return [[str(getattr(entity, field)) for field in entity.model_fields] for entity in entity_display_model] - - -def perform_named_entity_resolution(input_text: str): - """ - Performs clinical coding by processing the input text with MedCAT to extract and - annotate medical concepts (entities). - - Returns: - 1. A dictionary following the NER response model (EntityResponse), containing the original text - and the list of detected entities. - 2. A datatable-compatible list of lists, where each sublist represents an entity annotation and - its attributes for display purposes. - - This method is used as the main function for the Gradio MedCAT demo and MCP server, - enabling users to input free text and receive automatic annotation and coding of clinical entities. - - Args: - input_text (str): The input text to be processed and annotated for medical entities by MedCAT. - - Returns: - Tuple: - - dict: A dictionary following the NER response model (EntityResponse), containing the - original text and the list of detected entities. - - list[list[str]]: A datatable-compatible list of lists, where each sublist represents an - entity annotation and its attributes for display purposes. - - """ - if not input_text or not input_text.strip(): - return None, None - - processor = get_medcat_processor(get_settings()) - input = ProcessAPIInputContent(text=input_text) - - result = processor.process_content(input.model_dump()) - - entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) - - annotations_as_display_format = convert_entity_dict_to_display_model(result.annotations) - response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) - - response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) - return response.model_dump(), response_datatable_format - - -settings = get_settings() - - # CSS to set max height with scrollbar for HighlightedText output # Target the component container and its content highlighted_text_css = """ @@ -147,6 +14,7 @@ def perform_named_entity_resolution(input_text: str): overflow-y: auto; } """ +settings = get_settings() if settings.deid_mode: io = gr.Interface( diff --git a/medcat-service/medcat_service/test/demo/__init__.py b/medcat-service/medcat_service/test/demo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/medcat-service/medcat_service/test/demo/test_demo_logic.py b/medcat-service/medcat_service/test/demo/test_demo_logic.py new file mode 100644 index 000000000..9716cdab5 --- /dev/null +++ b/medcat-service/medcat_service/test/demo/test_demo_logic.py @@ -0,0 +1,179 @@ +""" +Unit tests for demo logic functions, specifically perform_named_entity_resolution. +""" +import unittest +from unittest.mock import patch + +from medcat_service.config import Settings +from medcat_service.demo.demo_logic import EntityResponse, perform_named_entity_resolution +from medcat_service.nlp_processor import MedCatProcessor +from medcat_service.test.common import ( + get_example_long_document, + get_example_short_document, + setup_medcat_processor, +) + + +class TestDemoLogic(unittest.TestCase): + """ + Test cases for demo logic functions. + """ + + processor: MedCatProcessor + + @classmethod + def setUpClass(cls): + """Set up test fixtures once before all test methods.""" + setup_medcat_processor() + cls.processor = MedCatProcessor(Settings()) + + def setUp(self): + """Set up test fixtures before each test method.""" + self.test_text = get_example_short_document() + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_valid_text(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with valid input text.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert + self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + self.assertIn("text", result_dict) + self.assertIn("entities", result_dict) + self.assertEqual(result_dict["text"], self.test_text) + self.assertIsInstance(result_dict["entities"], list) + self.assertIsInstance(result_table, list) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_empty_string(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with empty string.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution("") + + # Assert + self.assertIsNone(result_dict) + self.assertIsNone(result_table) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_whitespace_only(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with whitespace-only string.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(" \n\t ") + + # Assert + self.assertIsNone(result_dict) + self.assertIsNone(result_table) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_response_structure(self, mock_get_processor, mock_get_settings): + """Test that the response has the correct structure.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert structure + self.assertIsNotNone(result_dict) + assert result_dict is not None # Type narrowing for type checker + self.assertIn("text", result_dict) + self.assertIn("entities", result_dict) + self.assertEqual(result_dict["text"], self.test_text) + + # Check entity structure if entities exist + if result_dict["entities"]: + entity = result_dict["entities"][0] + self.assertIn("entity", entity) + self.assertIn("score", entity) + self.assertIn("index", entity) + self.assertIn("word", entity) + self.assertIn("start", entity) + self.assertIn("end", entity) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_table_format(self, mock_get_processor, mock_get_settings): + """Test that the table format is correct.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert table structure + self.assertIsNotNone(result_table) + self.assertIsInstance(result_table, list) + # If there are annotations, check the structure + if result_table: + self.assertIsInstance(result_table[0], list) + # Should have 6 columns based on headers + if result_table[0]: + self.assertEqual(len(result_table[0]), 6) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_long_text(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with longer text.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + long_text = get_example_long_document() + + # Execute + result_dict, result_table = perform_named_entity_resolution(long_text) + + # Assert + self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + assert result_dict is not None # Type narrowing for type checker + self.assertEqual(result_dict["text"], long_text) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_returns_entity_response_format( + self, mock_get_processor, mock_get_settings + ): + """Test that the result can be validated as EntityResponse format.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert - validate the dict can be converted to EntityResponse + self.assertIsNotNone(result_dict) + assert result_dict is not None # Type narrowing for type checker + try: + response = EntityResponse(**result_dict) + self.assertEqual(response.text, self.test_text) + self.assertIsInstance(response.entities, list) + except Exception as e: + self.fail(f"Result dict should be valid EntityResponse format: {e}") + + +if __name__ == "__main__": + unittest.main() From c0550921d4547b8d4b3e7ad7fad377ed719d35f8 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 16:06:17 +0000 Subject: [PATCH 09/25] refactor(medcat-service): Update start_service_debug.sh for clarity and improve test coverage in test_demo_logic.py --- .../test/demo/test_demo_logic.py | 116 +++++++++++++++++- medcat-service/start_service_debug.sh | 9 +- 2 files changed, 120 insertions(+), 5 deletions(-) diff --git a/medcat-service/medcat_service/test/demo/test_demo_logic.py b/medcat-service/medcat_service/test/demo/test_demo_logic.py index 9716cdab5..f56fb677d 100644 --- a/medcat-service/medcat_service/test/demo/test_demo_logic.py +++ b/medcat-service/medcat_service/test/demo/test_demo_logic.py @@ -1,8 +1,9 @@ """ Unit tests for demo logic functions, specifically perform_named_entity_resolution. """ +import json import unittest -from unittest.mock import patch +from unittest.mock import MagicMock, patch from medcat_service.config import Settings from medcat_service.demo.demo_logic import EntityResponse, perform_named_entity_resolution @@ -174,6 +175,119 @@ def test_perform_named_entity_resolution_returns_entity_response_format( except Exception as e: self.fail(f"Result dict should be valid EntityResponse format: {e}") + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_mocked_get_entities( + self, mock_get_processor, mock_get_settings + ): + """Test perform_named_entity_resolution with mocked get_entities returning JSON data.""" + # Mock entities data inline as JSON string + mock_annotations_json = """ + { + "annotations": [ + { + "1": { + "pretty_name": "Cerebral Hemorrhage", + "cui": "C2937358", + "type_ids": [ + "T046" + ], + "source_value": "Intracerebral hemorrhage", + "detected_name": "intracerebral~hemorrhage", + "acc": 1, + "context_similarity": 1, + "start": 13, + "end": 37, + "id": 1, + "meta_anns": { + "Status": { + "value": "Affirmed", + "confidence": 0.9999077320098877, + "name": "Status" + } + }, + "context_left": [], + "context_center": [], + "context_right": [], + "icd10": [ + { + "chapter": "I61", + "name": "Intracerebral haemorrhage" + }, + { + "chapter": "I61.9", + "name": "Intracerebral haemorrhage, unspecified" + } + ], + "snomed": [ + "S-1508000", + "S-155389003", + "S-155391006", + "S-155394003", + "S-195163003", + "S-195173001", + "S-266313001", + "S-274100004" + ] + } + } + ] + } + """ + mock_annotations_data = json.loads(mock_annotations_json) + + # Create a mock processor + mock_processor = MagicMock(spec=MedCatProcessor) + + # Mock process_content to return a ProcessResult with the expected structure + from medcat_service.types import ProcessResult + + mock_process_result = ProcessResult( + text=self.test_text, + annotations=mock_annotations_data["annotations"], + success=True, + timestamp="2024-01-01T00:00:00Z", + elapsed_time=0.1, + ) + mock_processor.process_content.return_value = mock_process_result + + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = mock_processor + + # Expected result as JSON string for readability + expected = json.dumps( + { + "text": self.test_text, + "entities": [ + { + "entity": "C2937358", + "score": 1.0, + "index": 1, + "word": "intracerebral~hemorrhage", + "start": 13, + "end": 37, + } + ], + }, + indent=2, + sort_keys=True, + ) + + # Execute + actual_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert + self.assertIsNotNone(actual_dict) + assert actual_dict is not None # Type narrowing for type checker + actual = json.dumps(actual_dict, indent=2, sort_keys=True) + self.assertEqual(expected, actual) + + # Verify process_content was called with correct input + mock_processor.process_content.assert_called_once() + call_args = mock_processor.process_content.call_args[0][0] + self.assertEqual(call_args["text"], self.test_text) + if __name__ == "__main__": unittest.main() diff --git a/medcat-service/start_service_debug.sh b/medcat-service/start_service_debug.sh index 44cce7741..798d66d3f 100644 --- a/medcat-service/start_service_debug.sh +++ b/medcat-service/start_service_debug.sh @@ -2,9 +2,9 @@ echo "Starting MedCAT Service" # Optional - Enable DeID mode with: -export APP_MEDCAT_MODEL_PACK="models/examples/example-deid-model-pack.zip" -export DEID_MODE=True -export DEID_REDACT=True +# export APP_MEDCAT_MODEL_PACK="models/examples/example-deid-model-pack.zip" +# export DEID_MODE=True +# export DEID_REDACT=True if [ -z "${APP_MODEL_CDB_PATH}" ] && [ -z "${APP_MODEL_VOCAB_PATH}" ] && [ -z "${APP_MEDCAT_MODEL_PACK}" ]; then export APP_MEDCAT_MODEL_PACK="models/examples/example-medcat-v2-model-pack.zip" @@ -13,10 +13,11 @@ fi export APP_ENABLE_METRICS=${APP_ENABLE_METRICS:-True} -if HOT_MODULE_RELOADING=True; then +if [ "${HOT_MODULE_RELOADING}" = "True" ]; then # Experimental: Hot module reloading. Need to `pip install -r requirements-dev.txt` echo "Running medcat-service with hot module reloading" uvicorn-hmr medcat_service/main:app --refresh --reload-include 'medcat_service' else fastapi dev medcat_service/main.py fi + From d78f2e358fbe316d22f0e40fd45ae3f664312028 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 8 Jan 2026 17:37:46 +0000 Subject: [PATCH 10/25] feat(medcat-service): Enhance Gradio demo layout and add logging for entity resolution --- .../medcat_service/demo/demo_logic.py | 17 +++- .../medcat_service/demo/gradio_demo.py | 85 +++++++++++++------ 2 files changed, 73 insertions(+), 29 deletions(-) diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index 391577b25..87e8479ed 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -6,12 +6,16 @@ MedCAT's annotation results and Gradio's interactive demo expectations. """ +import logging + from pydantic import BaseModel from medcat_service.dependencies import get_medcat_processor, get_settings from medcat_service.types import ProcessAPIInputContent from medcat_service.types_entities import Entity +logger = logging.getLogger(__name__) + class EntityAnnotation(BaseModel): """ @@ -89,7 +93,7 @@ def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]] def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]: - annotations: list[EntityAnnotationDisplay] = [] + logger.debug("Converting entity dict to display model") annotations: list[EntityAnnotationDisplay] = [] for entity_dict in entity_dict_list: for key, entity in entity_dict.items(): annotations.append(convert_annotation_to_display_model(entity)) @@ -97,7 +101,10 @@ def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnnotationDisplay]) -> list[list[str]]: - return [[str(getattr(entity, field)) for field in entity.model_fields] for entity in entity_display_model] + return [ + [str(getattr(entity, field)) for field in EntityAnnotationDisplay.model_fields] + for entity in entity_display_model + ] def perform_named_entity_resolution(input_text: str): @@ -125,6 +132,7 @@ def perform_named_entity_resolution(input_text: str): entity annotation and its attributes for display purposes. """ + logger.debug("Performing named entity resolution") if not input_text or not input_text.strip(): return None, None @@ -135,8 +143,11 @@ def perform_named_entity_resolution(input_text: str): entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) + logger.debug("Converting entity dict to display model") annotations_as_display_format = convert_entity_dict_to_display_model(result.annotations) response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) - return response.model_dump(), response_datatable_format + result = response.model_dump(), response_datatable_format + logger.debug("Returning final result") + return result diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index b5b2cd420..f9efc82fe 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -17,33 +17,66 @@ settings = get_settings() if settings.deid_mode: - io = gr.Interface( - fn=perform_named_entity_resolution, - inputs=gr.Textbox(label="Input Text", lines=3, placeholder="Enter some text and click Annotate..."), - outputs=[ - gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output"), - gr.Dataframe(label="Annotations", headers=headers, interactive=False), - ], - examples=[demo_content.short_example, demo_content.anoncat_example], - title="AnonCAT Demo", - flagging_mode="never", - article=demo_content.anoncat_help_content, - submit_btn="Deidentify", - ) + with gr.Blocks(title="AnonCAT Demo", fill_width=True) as io: + gr.Markdown("# AnonCAT Demo") + with gr.Row(): + with gr.Column(): + input_text = gr.Textbox( + label="Input Text", + lines=3, + placeholder="Enter some text and click Deidentify..." + ) + examples = gr.Examples( + examples=[demo_content.short_example, demo_content.anoncat_example], + inputs=input_text, + ) + with gr.Row(): + clear_btn = gr.Button("Clear", variant="secondary") + deid_btn = gr.Button("Deidentify", variant="primary") + + with gr.Column(): + highlighted = gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output") + dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=4) + deid_btn.click( + perform_named_entity_resolution, + inputs=input_text, + outputs=[highlighted, dataframe] + ) + clear_btn.click( + lambda: ("", None, None), + outputs=[input_text, highlighted, dataframe] + ) + gr.Markdown(demo_content.anoncat_help_content) else: - io = gr.Interface( - fn=perform_named_entity_resolution, - inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Enter some text and click Annotate..."), - outputs=[ - gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output"), - gr.Dataframe(label="Annotations", headers=headers, interactive=False), - ], - examples=[demo_content.short_example, demo_content.long_example], - title="MedCAT Demo", - flagging_mode="never", - article=demo_content.article_footer, - submit_btn="Annotate", - ) + with gr.Blocks(title="MedCAT Demo", fill_width=True) as io: + gr.Markdown("# MedCAT Demo") + with gr.Row(): + with gr.Column(): + input_text = gr.Textbox( + label="Input Text", + lines=6, + placeholder="Enter some text and click Annotate..." + ) + examples = gr.Examples( + examples=[demo_content.short_example, demo_content.long_example], + inputs=input_text, + ) + with gr.Row(): + clear_btn = gr.Button("Clear", variant="secondary") + annotate_btn = gr.Button("Annotate", variant="primary") + with gr.Column(): + highlighted = gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output") + dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=50) + annotate_btn.click( + perform_named_entity_resolution, + inputs=input_text, + outputs=[highlighted, dataframe] + ) + clear_btn.click( + lambda: ("", None, None), + outputs=[input_text, highlighted, dataframe] + ) + gr.Markdown(demo_content.article_footer) def mount_gradio_app(app, path: str = "/demo") -> None: From 5c443f19c211fb77f78759f9eca28658d8d62625 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 14:44:24 +0000 Subject: [PATCH 11/25] fix(medct-service): fix syntax --- medcat-service/medcat_service/demo/demo_logic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index 87e8479ed..2afad8fae 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -93,7 +93,8 @@ def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]] def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]: - logger.debug("Converting entity dict to display model") annotations: list[EntityAnnotationDisplay] = [] + logger.debug("Converting entity dict to display model") + annotations: list[EntityAnnotationDisplay] = [] for entity_dict in entity_dict_list: for key, entity in entity_dict.items(): annotations.append(convert_annotation_to_display_model(entity)) From 760abd18b5c226058c68eab31cd6ff79cc737992 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 14:53:15 +0000 Subject: [PATCH 12/25] build(medct-service): Update gradio version --- medcat-service/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt index cf81d21c5..a1b302f58 100644 --- a/medcat-service/requirements.txt +++ b/medcat-service/requirements.txt @@ -9,7 +9,7 @@ requests==2.32.4 fastapi[standard]==0.128.0 pydantic>=2.11.10,<2.12.5 pydantic-settings==2.10.1 -gradio[mcp]==6.2.0 +gradio[mcp]==6.5.1 prometheus-fastapi-instrumentator==7.1.0 opentelemetry-distro[otlp]==0.60b0 opentelemetry-instrumentation==0.60b0 From 8e179fe35896040add027e0b5e875cf5fbcbde7a Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 15:33:51 +0000 Subject: [PATCH 13/25] refactor(medcat-service): Move example files to txt files --- .../medcat_service/demo/anoncat_example.txt | 71 ++++++++++ .../demo/anoncat_help_content.txt | 23 ++++ .../medcat_service/demo/article_footer.txt | 7 + .../medcat_service/demo/demo_content.py | 129 ++---------------- .../medcat_service/demo/gradio_demo.py | 20 ++- .../medcat_service/demo/long_example.txt | 10 ++ .../medcat_service/demo/short_example.txt | 1 + 7 files changed, 138 insertions(+), 123 deletions(-) create mode 100644 medcat-service/medcat_service/demo/anoncat_example.txt create mode 100644 medcat-service/medcat_service/demo/anoncat_help_content.txt create mode 100644 medcat-service/medcat_service/demo/article_footer.txt create mode 100644 medcat-service/medcat_service/demo/long_example.txt create mode 100644 medcat-service/medcat_service/demo/short_example.txt diff --git a/medcat-service/medcat_service/demo/anoncat_example.txt b/medcat-service/medcat_service/demo/anoncat_example.txt new file mode 100644 index 000000000..961d33ef3 --- /dev/null +++ b/medcat-service/medcat_service/demo/anoncat_example.txt @@ -0,0 +1,71 @@ +Patient Information: + +Name: John Parkinson +Date of Birth: February 12, 1958 +Gender: Male +Address: 789 Wellness Lane, Healthville, HV 56789 +Phone: (555) 555-1234 +Email: john.parkinson@email.com +Emergency Contact: + +Name: Mary Parkinson +Relationship: Spouse +Phone: (555) 555-5678 +Insurance Information: + +Insurance Provider: HealthWell Assurance +Policy Number: HW765432109 +Group Number: G876543 +Medical History: + +Allergies: + +None reported +Medications: + +Levodopa/Carbidopa for Parkinson's disease symptoms +Pramipexole for restless legs syndrome +Lisinopril for hypertension +Atorvastatin for hyperlipidemia +Metformin for Type 2 Diabetes +Medical Conditions: + +Parkinson's Disease (diagnosed on June 20, 2015) +Hypertension +Hyperlipidemia +Type 2 Diabetes +Osteoarthritis +Vital Signs: + +Blood Pressure: 130/80 mmHg +Heart Rate: 72 bpm +Temperature: 98.4°F +Respiratory Rate: 18 breaths per minute +Recent Inpatient Stay (Dates: September 1-10, 2023): + +Reason for Admission: Acute exacerbation of Parkinson's symptoms, pneumonia, and uncontrolled diabetes. + +Interventions: + +Neurology Consultation for Parkinson's disease management adjustments. +Antibiotic therapy for pneumonia. +Continuous glucose monitoring and insulin therapy for diabetes control. +Physical therapy sessions to maintain mobility. +Complications: + +Delirium managed with close monitoring and appropriate interventions. +Discharge Plan: + +Medication adjustments for Parkinson's disease. +Follow-up appointments with neurologist, endocrinologist, and primary care. +Home health care for continued physical therapy. +Follow-up Visits: + +Date: October 15, 2023 + +Reason for Visit: Post-discharge Follow-up +Notes: Stable Parkinson's symptoms, pneumonia resolved. Adjusted diabetes medications for better control. +Date: December 5, 2023 + +Reason for Visit: Neurology Follow-up +Notes: Fine-tuned Parkinson's medication regimen. Recommended ongoing physical therapy. \ No newline at end of file diff --git a/medcat-service/medcat_service/demo/anoncat_help_content.txt b/medcat-service/medcat_service/demo/anoncat_help_content.txt new file mode 100644 index 000000000..e96f57d23 --- /dev/null +++ b/medcat-service/medcat_service/demo/anoncat_help_content.txt @@ -0,0 +1,23 @@ +Demo app for the deidentification of private health information using the CogStack AnonCAT model + +Please DO NOT test with any real sensitive PHI data. + +Local validation and fine-tuning available via [MedCATtrainer]( +https://github.com/CogStack/cogstack-nlp/tree/main/medcat-trainer). +Email us, [contact@cogstack.org](mailto:contact@cogstack.org), to discuss model access, +model performance, and your use case. + +The following PHI items have been trained: + +| PHI Item | Description | +|----------|-------------| +| NHS Number | UK National Health Service Numbers. | +| Name | All names, first, middle, last of patients, relatives, care providers etc. Importantly, does not redact conditions that are named after a name, e.g. "Parkinsons's disease". | +| Date of Birth | DOBs. Does not include other dates that may be in the record, i.e. dates of visit etc. | +| Hospital Number | A unique number provided by the hospital. Distinct from the NHS number | +| Address Line | Address lines - first, second, third or fourth | +| Postcode | UK postal codes - 6 or 7 alphanumeric codes as part of addresses | +| Telephone Number | Telephone numbers, extensions, mobile / cell phone numbers | +| Email | Email addresses | +| Initials | Patient, relatives, care provider name initials. | + diff --git a/medcat-service/medcat_service/demo/article_footer.txt b/medcat-service/medcat_service/demo/article_footer.txt new file mode 100644 index 000000000..2c2f13c3e --- /dev/null +++ b/medcat-service/medcat_service/demo/article_footer.txt @@ -0,0 +1,7 @@ +## Disclaimer +This software is intended solely for the testing purposes and non-commercial use. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. + +contact@cogstack.com for more information. + +Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. + diff --git a/medcat-service/medcat_service/demo/demo_content.py b/medcat-service/medcat_service/demo/demo_content.py index 3092443b9..450972881 100644 --- a/medcat-service/medcat_service/demo/demo_content.py +++ b/medcat-service/medcat_service/demo/demo_content.py @@ -1,121 +1,16 @@ +import importlib.resources +from functools import cache -short_example = "John had been diagnosed with acute Kidney Failure the week before" +@cache +def _read_file(filename: str) -> str: + package = importlib.resources.files(__package__ or 'medcat_service.demo') + file_path = package / filename + return file_path.read_text(encoding='utf-8') -long_example = """Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). -CC: Left hand numbness on presentation; then developed lethargy later that day. -HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. - -He had been experiencing falling spells without associated LOC up to several times a month for the past year. - -MEDS: procardia SR, Lasix, Ecotrin, KCL, Digoxin, Colace, Coumadin. - -PMH: 1)8/92 evaluation for presyncope (Echocardiogram showed: AV fibrosis/calcification, AV stenosis/insufficiency, MV stenosis with annular calcification and regurgitation, moderate TR, Decreased LV systolic function, severe LAE. MRI brain: focal areas of increased T2 signal in the left cerebellum and in the brainstem probably representing microvascular ischemic disease. IVG (MUGA scan)revealed: global hypokinesis of the LV and biventricular dysfunction, RV ejection Fx 45% and LV ejection Fx 39%. He was subsequently placed on coumadin severe valvular heart disease), 2)HTN, 3)Rheumatic fever and heart disease, 4)COPD, 5)ETOH abuse, 6)colonic polyps, 7)CAD, 8)CHF, 9)Appendectomy, 10)Junctional tachycardia. -""" # noqa: E501 - -article_footer = """ -## Disclaimer -This software is intended solely for the testing purposes and non-commercial use. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. - -contact@cogstack.com for more information. - -Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. -""" # noqa: E501 - -anoncat_example = """Patient Information: - -Name: John Parkinson -Date of Birth: February 12, 1958 -Gender: Male -Address: 789 Wellness Lane, Healthville, HV 56789 -Phone: (555) 555-1234 -Email: john.parkinson@email.com -Emergency Contact: - -Name: Mary Parkinson -Relationship: Spouse -Phone: (555) 555-5678 -Insurance Information: - -Insurance Provider: HealthWell Assurance -Policy Number: HW765432109 -Group Number: G876543 -Medical History: - -Allergies: - -None reported -Medications: - -Levodopa/Carbidopa for Parkinson's disease symptoms -Pramipexole for restless legs syndrome -Lisinopril for hypertension -Atorvastatin for hyperlipidemia -Metformin for Type 2 Diabetes -Medical Conditions: - -Parkinson's Disease (diagnosed on June 20, 2015) -Hypertension -Hyperlipidemia -Type 2 Diabetes -Osteoarthritis -Vital Signs: - -Blood Pressure: 130/80 mmHg -Heart Rate: 72 bpm -Temperature: 98.4°F -Respiratory Rate: 18 breaths per minute -Recent Inpatient Stay (Dates: September 1-10, 2023): - -Reason for Admission: Acute exacerbation of Parkinson's symptoms, pneumonia, and uncontrolled diabetes. - -Interventions: - -Neurology Consultation for Parkinson's disease management adjustments. -Antibiotic therapy for pneumonia. -Continuous glucose monitoring and insulin therapy for diabetes control. -Physical therapy sessions to maintain mobility. -Complications: - -Delirium managed with close monitoring and appropriate interventions. -Discharge Plan: - -Medication adjustments for Parkinson's disease. -Follow-up appointments with neurologist, endocrinologist, and primary care. -Home health care for continued physical therapy. -Follow-up Visits: - -Date: October 15, 2023 - -Reason for Visit: Post-discharge Follow-up -Notes: Stable Parkinson's symptoms, pneumonia resolved. Adjusted diabetes medications for better control. -Date: December 5, 2023 - -Reason for Visit: Neurology Follow-up -Notes: Fine-tuned Parkinson's medication regimen. Recommended ongoing physical therapy. -""" # noqa: E501 - -anoncat_help_content = """Demo app for the deidentification of private health information using the CogStack AnonCAT model - -Please DO NOT test with any real sensitive PHI data. - -Local validation and fine-tuning available via [MedCATtrainer]( -https://github.com/CogStack/cogstack-nlp/tree/main/medcat-trainer). -Email us, [contact@cogstack.org](mailto:contact@cogstack.org), to discuss model access, -model performance, and your use case. - -The following PHI items have been trained: - -| PHI Item | Description | -|----------|-------------| -| NHS Number | UK National Health Service Numbers. | -| Name | All names, first, middle, last of patients, relatives, care providers etc. Importantly, does not redact conditions that are named after a name, e.g. "Parkinsons's disease". | -| Date of Birth | DOBs. Does not include other dates that may be in the record, i.e. dates of visit etc. | -| Hospital Number | A unique number provided by the hospital. Distinct from the NHS number | -| Address Line | Address lines - first, second, third or fourth | -| Postcode | UK postal codes - 6 or 7 alphanumeric codes as part of addresses | -| Telephone Number | Telephone numbers, extensions, mobile / cell phone numbers | -| Email | Email addresses | -| Initials | Patient, relatives, care provider name initials. | -""" # noqa: E501 +short_example = _read_file('short_example.txt') +long_example = _read_file('long_example.txt') +anoncat_example = _read_file('anoncat_example.txt') +article_footer = _read_file('article_footer.txt') +anoncat_help_content = _read_file('anoncat_help_content.txt') diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index f9efc82fe..82380e505 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -27,8 +27,11 @@ placeholder="Enter some text and click Deidentify..." ) examples = gr.Examples( - examples=[demo_content.short_example, demo_content.anoncat_example], + examples=[demo_content.short_example, demo_content.anoncat_example], inputs=input_text, + example_labels=["Short Example", + "Note with personally identifiable information"] + ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") @@ -57,15 +60,20 @@ lines=6, placeholder="Enter some text and click Annotate..." ) - examples = gr.Examples( - examples=[demo_content.short_example, demo_content.long_example], - inputs=input_text, - ) + with gr.Row(): + examples = gr.Examples( + examples=[demo_content.short_example, demo_content.long_example, demo_content.anoncat_example], + inputs=input_text, + example_labels=["Short Example", + "Patient Discharge Summary in Neurology", + "Note with personally identifiable information" ] + ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") annotate_btn = gr.Button("Annotate", variant="primary") with gr.Column(): - highlighted = gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output") + highlighted = gr.HighlightedText( + label="Processed Text", elem_id="highlighted-text-output") dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=50) annotate_btn.click( perform_named_entity_resolution, diff --git a/medcat-service/medcat_service/demo/long_example.txt b/medcat-service/medcat_service/demo/long_example.txt new file mode 100644 index 000000000..80cf4aff2 --- /dev/null +++ b/medcat-service/medcat_service/demo/long_example.txt @@ -0,0 +1,10 @@ +Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). +CC: Left hand numbness on presentation; then developed lethargy later that day. + +HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. + +He had been experiencing falling spells without associated LOC up to several times a month for the past year. + +MEDS: procardia SR, Lasix, Ecotrin, KCL, Digoxin, Colace, Coumadin. + +PMH: 1)8/92 evaluation for presyncope (Echocardiogram showed: AV fibrosis/calcification, AV stenosis/insufficiency, MV stenosis with annular calcification and regurgitation, moderate TR, Decreased LV systolic function, severe LAE. MRI brain: focal areas of increased T2 signal in the left cerebellum and in the brainstem probably representing microvascular ischemic disease. IVG (MUGA scan)revealed: global hypokinesis of the LV and biventricular dysfunction, RV ejection Fx 45% and LV ejection Fx 39%. He was subsequently placed on coumadin severe valvular heart disease), 2)HTN, 3)Rheumatic fever and heart disease, 4)COPD, 5)ETOH abuse, 6)colonic polyps, 7)CAD, 8)CHF, 9)Appendectomy, 10)Junctional tachycardia. \ No newline at end of file diff --git a/medcat-service/medcat_service/demo/short_example.txt b/medcat-service/medcat_service/demo/short_example.txt new file mode 100644 index 000000000..1c99c7c78 --- /dev/null +++ b/medcat-service/medcat_service/demo/short_example.txt @@ -0,0 +1 @@ +John had been diagnosed with acute Kidney Failure the week before \ No newline at end of file From f1daebf0c01b097ce000a0a4fea403a34059f215 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 17:20:35 +0000 Subject: [PATCH 14/25] feat(medcat-service): In demo Click on annotation to view details --- .../medcat_service/demo/gradio_demo.py | 74 +++++++++++++++++-- 1 file changed, 68 insertions(+), 6 deletions(-) diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 82380e505..83b7e0a67 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -16,6 +16,39 @@ """ settings = get_settings() +default_annotation_details = "**No annotation selected**\n\nClick on a highlighted entity to view its details." + + +def format_annotation_details(row, selected_text: str): + """Format a pandas Series row as markdown for display.""" + if row is None: + return "**No annotation selected**\n\nClick on a highlighted entity to view its details." + + pretty_name = row.get('Pretty Name', 'N/A') + identifier = row.get('Identifier', 'N/A') + confidence = row.get('Confidence Score', 0.0) + start_idx = row.get('Start Index', -1) + end_idx = row.get('End Index', -1) + entity_id = row.get('ID', -1) + + confidence_pct = float(confidence) * 100 + + details = f"""## Annotation Details +**Input Text:** {selected_text} + +**Entity Name:** {pretty_name} + +**Identifier (CUI):** `{identifier}` + +**Confidence Score:** {confidence_pct:.2f}% + +**Text Position:** Start: `{start_idx}` → End: `{end_idx}` + +**Entity ID:** `{entity_id}` +""" + return details + + if settings.deid_mode: with gr.Blocks(title="AnonCAT Demo", fill_width=True) as io: gr.Markdown("# AnonCAT Demo") @@ -38,7 +71,8 @@ deid_btn = gr.Button("Deidentify", variant="primary") with gr.Column(): - highlighted = gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output") + highlighted = gr.HighlightedText( + label="Processed Text", elem_id="highlighted-text-output", interactive=False) dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=4) deid_btn.click( perform_named_entity_resolution, @@ -52,6 +86,23 @@ gr.Markdown(demo_content.anoncat_help_content) else: with gr.Blocks(title="MedCAT Demo", fill_width=True) as io: + + def on_select(value, annotation_details, dataframe, evt: gr.SelectData): + """ + Important things to know: Adding the type gr.SelectData actually changes the data passed + + Then the index appears hacky. The highlighted text selected item has indices, but they are not the indices + in the datatable. It looks like index 0 is always '', then it always inserts the text between annotations + as another index. So we need to divide by 2 to get the correct index. + """ + datatable_index = (evt.index - 1) // 2 + selected_text = evt.value[0] + if dataframe is not None and datatable_index < len(dataframe): + row = dataframe.iloc[datatable_index] + return format_annotation_details(row, selected_text) + else: + return "**No annotation selected**\n\nClick on a highlighted entity to view its details." + gr.Markdown("# MedCAT Demo") with gr.Row(): with gr.Column(): @@ -66,23 +117,34 @@ inputs=input_text, example_labels=["Short Example", "Patient Discharge Summary in Neurology", - "Note with personally identifiable information" ] + "Note with personally identifiable information"] ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") annotate_btn = gr.Button("Annotate", variant="primary") with gr.Column(): highlighted = gr.HighlightedText( - label="Processed Text", elem_id="highlighted-text-output") - dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=50) + label="Processed Text", elem_id="highlighted-text-output", interactive=False) + annotation_details = gr.Markdown( + label="Annotation Details", + value=default_annotation_details + ) + with gr.Accordion(label="All Annotations", open=False): + dataframe = gr.Dataframe(label="All Annotations", headers=headers, interactive=False, max_chars=50) + highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) + annotate_btn.click( + lambda: (default_annotation_details), + outputs=[annotation_details] + ) annotate_btn.click( perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe] ) + clear_btn.click( - lambda: ("", None, None), - outputs=[input_text, highlighted, dataframe] + lambda: ("", None, None, default_annotation_details), + outputs=[input_text, highlighted, dataframe, annotation_details] ) gr.Markdown(demo_content.article_footer) From a9e9227c77c793e8a3873090d3ea51d3155e9e23 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 17:25:09 +0000 Subject: [PATCH 15/25] feat(medcat-service): In demo Click on annotation to view details - text --- medcat-service/medcat_service/demo/gradio_demo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 83b7e0a67..d3c46d885 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -16,7 +16,7 @@ """ settings = get_settings() -default_annotation_details = "**No annotation selected**\n\nClick on a highlighted entity to view its details." +annotation_details_placeholder_text = "Click on a highlighted entity to view its details" def format_annotation_details(row, selected_text: str): @@ -33,7 +33,7 @@ def format_annotation_details(row, selected_text: str): confidence_pct = float(confidence) * 100 - details = f"""## Annotation Details + details = f"""### Annotation Details **Input Text:** {selected_text} **Entity Name:** {pretty_name} @@ -101,7 +101,7 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): row = dataframe.iloc[datatable_index] return format_annotation_details(row, selected_text) else: - return "**No annotation selected**\n\nClick on a highlighted entity to view its details." + return annotation_details_placeholder_text gr.Markdown("# MedCAT Demo") with gr.Row(): @@ -127,13 +127,13 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): label="Processed Text", elem_id="highlighted-text-output", interactive=False) annotation_details = gr.Markdown( label="Annotation Details", - value=default_annotation_details + value=annotation_details_placeholder_text ) with gr.Accordion(label="All Annotations", open=False): dataframe = gr.Dataframe(label="All Annotations", headers=headers, interactive=False, max_chars=50) highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) annotate_btn.click( - lambda: (default_annotation_details), + lambda: (annotation_details_placeholder_text), outputs=[annotation_details] ) annotate_btn.click( @@ -143,7 +143,7 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): ) clear_btn.click( - lambda: ("", None, None, default_annotation_details), + lambda: ("", None, None, annotation_details_placeholder_text), outputs=[input_text, highlighted, dataframe, annotation_details] ) gr.Markdown(demo_content.article_footer) From 6d0f7c7acac46949b8cf97b57b43bb3ad575e9d5 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 17:31:48 +0000 Subject: [PATCH 16/25] feat(medcat-service): In demo Click on annotation to view details - text --- .../medcat_service/demo/gradio_demo.py | 104 ++++++++---------- 1 file changed, 44 insertions(+), 60 deletions(-) diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index d3c46d885..130b26095 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -24,12 +24,12 @@ def format_annotation_details(row, selected_text: str): if row is None: return "**No annotation selected**\n\nClick on a highlighted entity to view its details." - pretty_name = row.get('Pretty Name', 'N/A') - identifier = row.get('Identifier', 'N/A') - confidence = row.get('Confidence Score', 0.0) - start_idx = row.get('Start Index', -1) - end_idx = row.get('End Index', -1) - entity_id = row.get('ID', -1) + pretty_name = row.get("Pretty Name", "N/A") + identifier = row.get("Identifier", "N/A") + confidence = row.get("Confidence Score", 0.0) + start_idx = row.get("Start Index", -1) + end_idx = row.get("End Index", -1) + entity_id = row.get("ID", -1) confidence_pct = float(confidence) * 100 @@ -49,22 +49,37 @@ def format_annotation_details(row, selected_text: str): return details +def on_select(value, annotation_details, dataframe, evt: gr.SelectData): + """ + On select of annotations in the highlighted text component. + + Important things to know: Adding the type gr.SelectData actually changes the data passed + + Then the index appears hacky. The highlighted text selected item has indices, but they are not the indices + in the datatable. It looks like index 0 is always '', then it always inserts the text between annotations + as another index. So we need to divide by 2 to get the correct index. + """ + datatable_index = (evt.index - 1) // 2 + selected_text = evt.value[0] + if dataframe is not None and datatable_index < len(dataframe): + row = dataframe.iloc[datatable_index] + return format_annotation_details(row, selected_text) + else: + return annotation_details_placeholder_text + + if settings.deid_mode: with gr.Blocks(title="AnonCAT Demo", fill_width=True) as io: gr.Markdown("# AnonCAT Demo") with gr.Row(): with gr.Column(): input_text = gr.Textbox( - label="Input Text", - lines=3, - placeholder="Enter some text and click Deidentify..." + label="Input Text", lines=3, placeholder="Enter some text and click Deidentify..." ) examples = gr.Examples( - examples=[demo_content.short_example, demo_content.anoncat_example], + examples=[demo_content.short_example, demo_content.anoncat_example], inputs=input_text, - example_labels=["Short Example", - "Note with personally identifiable information"] - + example_labels=["Short Example", "Note with personally identifiable information"], ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") @@ -72,79 +87,48 @@ def format_annotation_details(row, selected_text: str): with gr.Column(): highlighted = gr.HighlightedText( - label="Processed Text", elem_id="highlighted-text-output", interactive=False) + label="Processed Text", elem_id="highlighted-text-output", interactive=False + ) dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=4) - deid_btn.click( - perform_named_entity_resolution, - inputs=input_text, - outputs=[highlighted, dataframe] - ) - clear_btn.click( - lambda: ("", None, None), - outputs=[input_text, highlighted, dataframe] - ) + deid_btn.click(perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe]) + clear_btn.click(lambda: ("", None, None), outputs=[input_text, highlighted, dataframe]) gr.Markdown(demo_content.anoncat_help_content) else: with gr.Blocks(title="MedCAT Demo", fill_width=True) as io: - - def on_select(value, annotation_details, dataframe, evt: gr.SelectData): - """ - Important things to know: Adding the type gr.SelectData actually changes the data passed - - Then the index appears hacky. The highlighted text selected item has indices, but they are not the indices - in the datatable. It looks like index 0 is always '', then it always inserts the text between annotations - as another index. So we need to divide by 2 to get the correct index. - """ - datatable_index = (evt.index - 1) // 2 - selected_text = evt.value[0] - if dataframe is not None and datatable_index < len(dataframe): - row = dataframe.iloc[datatable_index] - return format_annotation_details(row, selected_text) - else: - return annotation_details_placeholder_text - gr.Markdown("# MedCAT Demo") with gr.Row(): with gr.Column(): input_text = gr.Textbox( - label="Input Text", - lines=6, - placeholder="Enter some text and click Annotate..." + label="Input Text", lines=6, placeholder="Enter some text and click Annotate..." ) with gr.Row(): examples = gr.Examples( examples=[demo_content.short_example, demo_content.long_example, demo_content.anoncat_example], inputs=input_text, - example_labels=["Short Example", - "Patient Discharge Summary in Neurology", - "Note with personally identifiable information"] + example_labels=[ + "Short Example", + "Patient Discharge Summary in Neurology", + "Note with personally identifiable information", + ], ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") annotate_btn = gr.Button("Annotate", variant="primary") with gr.Column(): highlighted = gr.HighlightedText( - label="Processed Text", elem_id="highlighted-text-output", interactive=False) - annotation_details = gr.Markdown( - label="Annotation Details", - value=annotation_details_placeholder_text + label="Processed Text", elem_id="highlighted-text-output", interactive=False ) + annotation_details = gr.Markdown(label="Annotation Details", value=annotation_details_placeholder_text) with gr.Accordion(label="All Annotations", open=False): dataframe = gr.Dataframe(label="All Annotations", headers=headers, interactive=False, max_chars=50) highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) - annotate_btn.click( - lambda: (annotation_details_placeholder_text), - outputs=[annotation_details] - ) - annotate_btn.click( - perform_named_entity_resolution, - inputs=input_text, - outputs=[highlighted, dataframe] - ) + + annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) + annotate_btn.click(perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe]) clear_btn.click( lambda: ("", None, None, annotation_details_placeholder_text), - outputs=[input_text, highlighted, dataframe, annotation_details] + outputs=[input_text, highlighted, dataframe, annotation_details], ) gr.Markdown(demo_content.article_footer) From 3ebfdc1f7c32e7bab20ac086504e4b2b75462d44 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 17:35:29 +0000 Subject: [PATCH 17/25] feat(medcat-service): In demo Click on annotation to view details - ruff --- medcat-service/medcat_service/demo/gradio_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 130b26095..582b1a959 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -122,7 +122,7 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): with gr.Accordion(label="All Annotations", open=False): dataframe = gr.Dataframe(label="All Annotations", headers=headers, interactive=False, max_chars=50) highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) - + annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) annotate_btn.click(perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe]) From 5da1b9d3297806acac41d8e487626d99959692fd Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 18:15:20 +0000 Subject: [PATCH 18/25] feat(medcat-service): In demo move resource txt files to subfolder --- .../medcat_service/demo/demo_content.py | 2 +- .../medcat_service/demo/demo_logic.py | 16 ++++++++- .../medcat_service/demo/gradio_demo.py | 34 ++++++++++++++----- .../medcat_service/demo/resources/__init__.py | 2 ++ .../demo/{ => resources}/anoncat_example.txt | 0 .../{ => resources}/anoncat_help_content.txt | 0 .../demo/{ => resources}/article_footer.txt | 0 .../demo/{ => resources}/long_example.txt | 0 .../demo/{ => resources}/short_example.txt | 0 9 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 medcat-service/medcat_service/demo/resources/__init__.py rename medcat-service/medcat_service/demo/{ => resources}/anoncat_example.txt (100%) rename medcat-service/medcat_service/demo/{ => resources}/anoncat_help_content.txt (100%) rename medcat-service/medcat_service/demo/{ => resources}/article_footer.txt (100%) rename medcat-service/medcat_service/demo/{ => resources}/long_example.txt (100%) rename medcat-service/medcat_service/demo/{ => resources}/short_example.txt (100%) diff --git a/medcat-service/medcat_service/demo/demo_content.py b/medcat-service/medcat_service/demo/demo_content.py index 450972881..045f1ce2a 100644 --- a/medcat-service/medcat_service/demo/demo_content.py +++ b/medcat-service/medcat_service/demo/demo_content.py @@ -5,7 +5,7 @@ @cache def _read_file(filename: str) -> str: package = importlib.resources.files(__package__ or 'medcat_service.demo') - file_path = package / filename + file_path = package / 'resources' / filename return file_path.read_text(encoding='utf-8') diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index 2afad8fae..3073e9103 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -149,6 +149,20 @@ def perform_named_entity_resolution(input_text: str): response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) - result = response.model_dump(), response_datatable_format + result = response.model_dump(), response_datatable_format, result.text logger.debug("Returning final result") return result + +def medcat_demo_perform_named_entity_resolution(input_text: str): + """ + Performs named entity resolution for the MedCAT demo. + """ + result = perform_named_entity_resolution(input_text) + return result[0], result[1] + +def anoncat_demo_perform_deidentification(input_text: str): + """ + Performs deidentification for the AnonCAT demo. + """ + result = perform_named_entity_resolution(input_text) + return result \ No newline at end of file diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 582b1a959..aff789170 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -1,7 +1,7 @@ import gradio as gr import medcat_service.demo.demo_content as demo_content -from medcat_service.demo.demo_logic import perform_named_entity_resolution +from medcat_service.demo.demo_logic import medcat_demo_perform_named_entity_resolution, anoncat_demo_perform_deidentification from medcat_service.dependencies import get_settings headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"] @@ -83,15 +83,30 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): ) with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") - deid_btn = gr.Button("Deidentify", variant="primary") + annotate_btn = gr.Button("Deidentify", variant="primary") with gr.Column(): - highlighted = gr.HighlightedText( - label="Processed Text", elem_id="highlighted-text-output", interactive=False - ) - dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=4) - deid_btn.click(perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe]) - clear_btn.click(lambda: ("", None, None), outputs=[input_text, highlighted, dataframe]) + with gr.Tab("Deidentification"): + deidentified_text = gr.Textbox(label="Deidentified Text", value="", interactive=False) + with gr.Tab("Details"): + + highlighted = gr.HighlightedText( + label="Processed Text", elem_id="highlighted-text-output", interactive=False + ) + annotation_details = gr.Markdown(label="Annotation Details", + value=annotation_details_placeholder_text) + dataframe = gr.Dataframe(label="All Annotations", headers=headers, interactive=False, max_chars=50) + + highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) + + annotate_btn.click(anoncat_demo_perform_deidentification, + inputs=input_text, outputs=[highlighted, dataframe, deidentified_text]) + annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) + + clear_btn.click( + lambda: ("", None, None, annotation_details_placeholder_text), + outputs=[input_text, highlighted, dataframe, annotation_details], + ) gr.Markdown(demo_content.anoncat_help_content) else: with gr.Blocks(title="MedCAT Demo", fill_width=True) as io: @@ -124,7 +139,8 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) - annotate_btn.click(perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe]) + annotate_btn.click(medcat_demo_perform_named_entity_resolution, + inputs=input_text, outputs=[highlighted, dataframe]) clear_btn.click( lambda: ("", None, None, annotation_details_placeholder_text), diff --git a/medcat-service/medcat_service/demo/resources/__init__.py b/medcat-service/medcat_service/demo/resources/__init__.py new file mode 100644 index 000000000..03158aaa5 --- /dev/null +++ b/medcat-service/medcat_service/demo/resources/__init__.py @@ -0,0 +1,2 @@ +# Resources directory for demo content files + diff --git a/medcat-service/medcat_service/demo/anoncat_example.txt b/medcat-service/medcat_service/demo/resources/anoncat_example.txt similarity index 100% rename from medcat-service/medcat_service/demo/anoncat_example.txt rename to medcat-service/medcat_service/demo/resources/anoncat_example.txt diff --git a/medcat-service/medcat_service/demo/anoncat_help_content.txt b/medcat-service/medcat_service/demo/resources/anoncat_help_content.txt similarity index 100% rename from medcat-service/medcat_service/demo/anoncat_help_content.txt rename to medcat-service/medcat_service/demo/resources/anoncat_help_content.txt diff --git a/medcat-service/medcat_service/demo/article_footer.txt b/medcat-service/medcat_service/demo/resources/article_footer.txt similarity index 100% rename from medcat-service/medcat_service/demo/article_footer.txt rename to medcat-service/medcat_service/demo/resources/article_footer.txt diff --git a/medcat-service/medcat_service/demo/long_example.txt b/medcat-service/medcat_service/demo/resources/long_example.txt similarity index 100% rename from medcat-service/medcat_service/demo/long_example.txt rename to medcat-service/medcat_service/demo/resources/long_example.txt diff --git a/medcat-service/medcat_service/demo/short_example.txt b/medcat-service/medcat_service/demo/resources/short_example.txt similarity index 100% rename from medcat-service/medcat_service/demo/short_example.txt rename to medcat-service/medcat_service/demo/resources/short_example.txt From ec2ff6c340d11afc8dcfdd11d26a53b5c1703a5d Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 18:21:17 +0000 Subject: [PATCH 19/25] feat(medcat-service): In demo move resource txt files to subfolder - lint --- .../medcat_service/demo/gradio_demo.py | 53 +++++++++++-------- .../medcat_service/demo/resources/__init__.py | 2 - 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index aff789170..8a96c7559 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -1,7 +1,10 @@ import gradio as gr import medcat_service.demo.demo_content as demo_content -from medcat_service.demo.demo_logic import medcat_demo_perform_named_entity_resolution, anoncat_demo_perform_deidentification +from medcat_service.demo.demo_logic import ( + anoncat_demo_perform_deidentification, + medcat_demo_perform_named_entity_resolution, +) from medcat_service.dependencies import get_settings headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"] @@ -72,35 +75,42 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): with gr.Blocks(title="AnonCAT Demo", fill_width=True) as io: gr.Markdown("# AnonCAT Demo") with gr.Row(): - with gr.Column(): - input_text = gr.Textbox( - label="Input Text", lines=3, placeholder="Enter some text and click Deidentify..." - ) - examples = gr.Examples( - examples=[demo_content.short_example, demo_content.anoncat_example], - inputs=input_text, - example_labels=["Short Example", "Note with personally identifiable information"], - ) - with gr.Row(): - clear_btn = gr.Button("Clear", variant="secondary") - annotate_btn = gr.Button("Deidentify", variant="primary") + with gr.Column(): # noqa + with gr.Tab("Input"): + input_text = gr.Textbox( + label="Input Text", lines=3, placeholder="Enter some text and click Deidentify..." + ) + examples = gr.Examples( + examples=[demo_content.short_example, demo_content.anoncat_example], + inputs=input_text, + example_labels=["Short Example", "Note with personally identifiable information"], + ) + with gr.Row(): + clear_btn = gr.Button("Clear", variant="secondary") + annotate_btn = gr.Button("Deidentify", variant="primary") with gr.Column(): with gr.Tab("Deidentification"): deidentified_text = gr.Textbox(label="Deidentified Text", value="", interactive=False) with gr.Tab("Details"): - highlighted = gr.HighlightedText( label="Processed Text", elem_id="highlighted-text-output", interactive=False ) - annotation_details = gr.Markdown(label="Annotation Details", - value=annotation_details_placeholder_text) - dataframe = gr.Dataframe(label="All Annotations", headers=headers, interactive=False, max_chars=50) + annotation_details = gr.Markdown( + label="Annotation Details", value=annotation_details_placeholder_text + ) + with gr.Accordion(label="All Annotations", open=False): + dataframe = gr.Dataframe( + label="All Annotations", headers=headers, interactive=False, max_chars=50 + ) highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) - annotate_btn.click(anoncat_demo_perform_deidentification, - inputs=input_text, outputs=[highlighted, dataframe, deidentified_text]) + annotate_btn.click( + anoncat_demo_perform_deidentification, + inputs=input_text, + outputs=[highlighted, dataframe, deidentified_text], + ) annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) clear_btn.click( @@ -139,8 +149,9 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): highlighted.select(on_select, [highlighted, annotation_details, dataframe], outputs=annotation_details) annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) - annotate_btn.click(medcat_demo_perform_named_entity_resolution, - inputs=input_text, outputs=[highlighted, dataframe]) + annotate_btn.click( + medcat_demo_perform_named_entity_resolution, inputs=input_text, outputs=[highlighted, dataframe] + ) clear_btn.click( lambda: ("", None, None, annotation_details_placeholder_text), diff --git a/medcat-service/medcat_service/demo/resources/__init__.py b/medcat-service/medcat_service/demo/resources/__init__.py index 03158aaa5..e69de29bb 100644 --- a/medcat-service/medcat_service/demo/resources/__init__.py +++ b/medcat-service/medcat_service/demo/resources/__init__.py @@ -1,2 +0,0 @@ -# Resources directory for demo content files - From 93d8095f7048ce92f04bb8f3a685f08e7f809568 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 18:26:02 +0000 Subject: [PATCH 20/25] feat(medcat-service): In demo move resource txt files to subfolder - lint --- medcat-service/medcat_service/demo/demo_logic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index 3073e9103..fcb097dd8 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -153,6 +153,7 @@ def perform_named_entity_resolution(input_text: str): logger.debug("Returning final result") return result + def medcat_demo_perform_named_entity_resolution(input_text: str): """ Performs named entity resolution for the MedCAT demo. @@ -160,9 +161,10 @@ def medcat_demo_perform_named_entity_resolution(input_text: str): result = perform_named_entity_resolution(input_text) return result[0], result[1] + def anoncat_demo_perform_deidentification(input_text: str): """ Performs deidentification for the AnonCAT demo. """ result = perform_named_entity_resolution(input_text) - return result \ No newline at end of file + return result From cdc4a6a974f9744c3585f4fd920e6d56f7aeec8f Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 2 Feb 2026 18:39:07 +0000 Subject: [PATCH 21/25] feat(medcat-service): Support boolean redact flag in deid processor and demo --- medcat-service/medcat_service/demo/demo_logic.py | 8 ++++---- medcat-service/medcat_service/demo/gradio_demo.py | 3 ++- .../medcat_service/nlp_processor/medcat_processor.py | 7 +++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index fcb097dd8..379fc444e 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -108,7 +108,7 @@ def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnno ] -def perform_named_entity_resolution(input_text: str): +def perform_named_entity_resolution(input_text: str, redact: bool | None = None): """ Performs clinical coding by processing the input text with MedCAT to extract and annotate medical concepts (entities). @@ -140,7 +140,7 @@ def perform_named_entity_resolution(input_text: str): processor = get_medcat_processor(get_settings()) input = ProcessAPIInputContent(text=input_text) - result = processor.process_content(input.model_dump()) + result = processor.process_content(input.model_dump(), redact=redact) entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) @@ -162,9 +162,9 @@ def medcat_demo_perform_named_entity_resolution(input_text: str): return result[0], result[1] -def anoncat_demo_perform_deidentification(input_text: str): +def anoncat_demo_perform_deidentification(input_text: str, redact: bool): """ Performs deidentification for the AnonCAT demo. """ - result = perform_named_entity_resolution(input_text) + result = perform_named_entity_resolution(input_text, redact=redact) return result diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index 8a96c7559..eb0da47d7 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -85,6 +85,7 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): inputs=input_text, example_labels=["Short Example", "Note with personally identifiable information"], ) + redact = gr.Checkbox(label="Redact") with gr.Row(): clear_btn = gr.Button("Clear", variant="secondary") annotate_btn = gr.Button("Deidentify", variant="primary") @@ -108,7 +109,7 @@ def on_select(value, annotation_details, dataframe, evt: gr.SelectData): annotate_btn.click( anoncat_demo_perform_deidentification, - inputs=input_text, + inputs=[input_text, redact], outputs=[highlighted, dataframe, deidentified_text], ) annotate_btn.click(lambda: (annotation_details_placeholder_text), outputs=[annotation_details]) diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py index 2573b6069..9d3717669 100644 --- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py +++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py @@ -136,12 +136,14 @@ def process_entities(self, entities, *args, **kwargs): yield entities @tracer.start_as_current_span("process_content") - def process_content(self, content, *args, **kwargs): + def process_content(self, content, *args, redact: bool | None = None, **kwargs): """Processes a single document extracting the annotations. Args: content (dict): Document to be processed, containing "text" field. *args: Variable length argument list. + redact (bool, optional): Whether to redact entities. If not provided, uses + self.service_settings.deid_redact. **kwargs: Arbitrary keyword arguments. meta_anns_filters (List[Tuple[str, List[str]]]): List of task and filter values pairs to filter entities by. Example: meta_anns_filters = [("Presence", ["True"]), @@ -171,7 +173,8 @@ def process_content(self, content, *args, **kwargs): if self.service_settings.deid_mode and isinstance(self.cat, DeIdModel): with tracer.start_as_current_span("cat.deid_text"): - text, entities = self.cat.deid_text_with_entities(text, redact=self.service_settings.deid_redact) + redact_value = redact if redact is not None else self.service_settings.deid_redact + text, entities = self.cat.deid_text_with_entities(text, redact=redact_value) else: if text is not None and len(text.strip()) > 0: with tracer.start_as_current_span("cat.get_entities"): From b708c7c5c94558bed976a4193516e0360abfbf92 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 3 Feb 2026 12:13:02 +0000 Subject: [PATCH 22/25] feat(medcat-service): Fix mypy errors --- medcat-service/medcat_service/demo/demo_logic.py | 16 +++++++++++----- .../nlp_processor/medcat_processor.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index 379fc444e..1dfdd2540 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -11,7 +11,7 @@ from pydantic import BaseModel from medcat_service.dependencies import get_medcat_processor, get_settings -from medcat_service.types import ProcessAPIInputContent +from medcat_service.types import ProcessAPIInputContent, ProcessErrorsResult, ProcessResult from medcat_service.types_entities import Entity logger = logging.getLogger(__name__) @@ -140,7 +140,14 @@ def perform_named_entity_resolution(input_text: str, redact: bool | None = None) processor = get_medcat_processor(get_settings()) input = ProcessAPIInputContent(text=input_text) - result = processor.process_content(input.model_dump(), redact=redact) + process_result = processor.process_content(input.model_dump(), redact=redact) + + if isinstance(process_result, ProcessErrorsResult): + error_msg = ( + "; ".join(process_result.errors) if process_result.errors else "Unknown error occurred during processing" + ) + raise ValueError(f"Processing failed: {error_msg}") + result: ProcessResult = process_result entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) @@ -149,9 +156,8 @@ def perform_named_entity_resolution(input_text: str, redact: bool | None = None) response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) - result = response.model_dump(), response_datatable_format, result.text - logger.debug("Returning final result") - return result + response_tuple = response.model_dump(), response_datatable_format, result.text + return response_tuple def medcat_demo_perform_named_entity_resolution(input_text: str): diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py index 9d3717669..206d8c540 100644 --- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py +++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py @@ -136,7 +136,7 @@ def process_entities(self, entities, *args, **kwargs): yield entities @tracer.start_as_current_span("process_content") - def process_content(self, content, *args, redact: bool | None = None, **kwargs): + def process_content(self, content, *args, redact = None, **kwargs): """Processes a single document extracting the annotations. Args: From 2e9e5c92853e2c71648fbe2c0189b8ac6e6cdfb5 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 3 Feb 2026 12:28:46 +0000 Subject: [PATCH 23/25] feat(medcat-service): Fix mypy errors --- medcat-service/medcat_service/nlp_processor/medcat_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py index 206d8c540..b117bb32f 100644 --- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py +++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py @@ -136,7 +136,7 @@ def process_entities(self, entities, *args, **kwargs): yield entities @tracer.start_as_current_span("process_content") - def process_content(self, content, *args, redact = None, **kwargs): + def process_content(self, content, *args, redact=None, **kwargs): """Processes a single document extracting the annotations. Args: From bca7d60a3e1f883e2b89500a104f7a72568d70b3 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 3 Feb 2026 12:51:12 +0000 Subject: [PATCH 24/25] feat(medcat-service): Fix unit tests --- .../medcat_service/demo/demo_logic.py | 2 +- .../test/demo/test_demo_logic.py | 230 +++++++++++++++++- 2 files changed, 222 insertions(+), 10 deletions(-) diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py index 1dfdd2540..050dd66b7 100644 --- a/medcat-service/medcat_service/demo/demo_logic.py +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -135,7 +135,7 @@ def perform_named_entity_resolution(input_text: str, redact: bool | None = None) """ logger.debug("Performing named entity resolution") if not input_text or not input_text.strip(): - return None, None + return None, None, None processor = get_medcat_processor(get_settings()) input = ProcessAPIInputContent(text=input_text) diff --git a/medcat-service/medcat_service/test/demo/test_demo_logic.py b/medcat-service/medcat_service/test/demo/test_demo_logic.py index f56fb677d..83200ee7b 100644 --- a/medcat-service/medcat_service/test/demo/test_demo_logic.py +++ b/medcat-service/medcat_service/test/demo/test_demo_logic.py @@ -6,7 +6,12 @@ from unittest.mock import MagicMock, patch from medcat_service.config import Settings -from medcat_service.demo.demo_logic import EntityResponse, perform_named_entity_resolution +from medcat_service.demo.demo_logic import ( + EntityResponse, + anoncat_demo_perform_deidentification, + medcat_demo_perform_named_entity_resolution, + perform_named_entity_resolution, +) from medcat_service.nlp_processor import MedCatProcessor from medcat_service.test.common import ( get_example_long_document, @@ -41,18 +46,22 @@ def test_perform_named_entity_resolution_with_valid_text(self, mock_get_processo mock_get_processor.return_value = TestDemoLogic.processor # Execute - result_dict, result_table = perform_named_entity_resolution(self.test_text) + result_dict, result_table, result_text = perform_named_entity_resolution(self.test_text) # Assert self.assertIsNotNone(result_dict) self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) assert result_dict is not None # Type narrowing for type checker assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker self.assertIn("text", result_dict) self.assertIn("entities", result_dict) self.assertEqual(result_dict["text"], self.test_text) self.assertIsInstance(result_dict["entities"], list) self.assertIsInstance(result_table, list) + self.assertIsInstance(result_text, str) + self.assertEqual(result_text, self.test_text) @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") @@ -63,11 +72,12 @@ def test_perform_named_entity_resolution_with_empty_string(self, mock_get_proces mock_get_processor.return_value = TestDemoLogic.processor # Execute - result_dict, result_table = perform_named_entity_resolution("") + result_dict, result_table, result_text = perform_named_entity_resolution("") # Assert self.assertIsNone(result_dict) self.assertIsNone(result_table) + self.assertIsNone(result_text) @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") @@ -78,11 +88,12 @@ def test_perform_named_entity_resolution_with_whitespace_only(self, mock_get_pro mock_get_processor.return_value = TestDemoLogic.processor # Execute - result_dict, result_table = perform_named_entity_resolution(" \n\t ") + result_dict, result_table, result_text = perform_named_entity_resolution(" \n\t ") # Assert self.assertIsNone(result_dict) self.assertIsNone(result_table) + self.assertIsNone(result_text) @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") @@ -93,14 +104,20 @@ def test_perform_named_entity_resolution_response_structure(self, mock_get_proce mock_get_processor.return_value = TestDemoLogic.processor # Execute - result_dict, result_table = perform_named_entity_resolution(self.test_text) + result_dict, result_table, result_text = perform_named_entity_resolution(self.test_text) # Assert structure self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker self.assertIn("text", result_dict) self.assertIn("entities", result_dict) self.assertEqual(result_dict["text"], self.test_text) + self.assertIsInstance(result_text, str) + self.assertEqual(result_text, self.test_text) # Check entity structure if entities exist if result_dict["entities"]: @@ -121,11 +138,15 @@ def test_perform_named_entity_resolution_table_format(self, mock_get_processor, mock_get_processor.return_value = TestDemoLogic.processor # Execute - result_dict, result_table = perform_named_entity_resolution(self.test_text) + result_dict, result_table, result_text = perform_named_entity_resolution(self.test_text) # Assert table structure self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) + assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker self.assertIsInstance(result_table, list) + self.assertIsInstance(result_text, str) # If there are annotations, check the structure if result_table: self.assertIsInstance(result_table[0], list) @@ -144,13 +165,18 @@ def test_perform_named_entity_resolution_with_long_text(self, mock_get_processor long_text = get_example_long_document() # Execute - result_dict, result_table = perform_named_entity_resolution(long_text) + result_dict, result_table, result_text = perform_named_entity_resolution(long_text) # Assert self.assertIsNotNone(result_dict) self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker self.assertEqual(result_dict["text"], long_text) + self.assertIsInstance(result_text, str) + self.assertEqual(result_text, long_text) @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") @@ -163,17 +189,23 @@ def test_perform_named_entity_resolution_returns_entity_response_format( mock_get_processor.return_value = TestDemoLogic.processor # Execute - result_dict, result_table = perform_named_entity_resolution(self.test_text) + result_dict, result_table, result_text = perform_named_entity_resolution(self.test_text) # Assert - validate the dict can be converted to EntityResponse self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker try: response = EntityResponse(**result_dict) self.assertEqual(response.text, self.test_text) self.assertIsInstance(response.entities, list) except Exception as e: self.fail(f"Result dict should be valid EntityResponse format: {e}") + self.assertIsInstance(result_text, str) + self.assertEqual(result_text, self.test_text) @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") @@ -275,19 +307,199 @@ def test_perform_named_entity_resolution_with_mocked_get_entities( ) # Execute - actual_dict, result_table = perform_named_entity_resolution(self.test_text) + actual_dict, result_table, actual_text = perform_named_entity_resolution(self.test_text) # Assert self.assertIsNotNone(actual_dict) + self.assertIsNotNone(result_table) + self.assertIsNotNone(actual_text) assert actual_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + assert actual_text is not None # Type narrowing for type checker actual = json.dumps(actual_dict, indent=2, sort_keys=True) self.assertEqual(expected, actual) + self.assertIsInstance(actual_text, str) + self.assertEqual(actual_text, self.test_text) # Verify process_content was called with correct input mock_processor.process_content.assert_called_once() call_args = mock_processor.process_content.call_args[0][0] self.assertEqual(call_args["text"], self.test_text) + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_medcat_demo_perform_named_entity_resolution_returns_first_two_values( + self, mock_get_processor, mock_get_settings + ): + """Test that medcat_demo_perform_named_entity_resolution returns the first 2 values.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + full_result = perform_named_entity_resolution(self.test_text) + medcat_result = medcat_demo_perform_named_entity_resolution(self.test_text) + + # Assert + self.assertEqual(len(full_result), 3) + self.assertEqual(len(medcat_result), 2) + self.assertEqual(medcat_result[0], full_result[0]) + self.assertEqual(medcat_result[1], full_result[1]) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_anoncat_demo_perform_deidentification_with_redact_true( + self, mock_get_processor, mock_get_settings + ): + """Test anoncat_demo_perform_deidentification with redact=True.""" + # Mock entities data + mock_annotations_json = """ + { + "annotations": [ + { + "1": { + "pretty_name": "Test Entity", + "cui": "C123456", + "type_ids": ["T001"], + "source_value": "test entity", + "detected_name": "test~entity", + "acc": 0.95, + "context_similarity": 0.9, + "start": 0, + "end": 11, + "id": 1 + } + } + ] + } + """ + mock_annotations_data = json.loads(mock_annotations_json) + + # Create a mock processor + mock_processor = MagicMock(spec=MedCatProcessor) + + # Mock process_content to return a ProcessResult with redacted text + from medcat_service.types import ProcessResult + + redacted_text = "The patient [***] was prescribed with Aspirin, 4-5 tabs daily" + + mock_process_result = ProcessResult( + text=redacted_text, + annotations=mock_annotations_data["annotations"], + success=True, + timestamp="2024-01-01T00:00:00Z", + elapsed_time=0.1, + ) + mock_processor.process_content.return_value = mock_process_result + + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = mock_processor + + # Execute + result = anoncat_demo_perform_deidentification(self.test_text, redact=True) + + # Assert + self.assertIsNotNone(result) + self.assertEqual(len(result), 3) + result_dict, result_table, result_text = result + self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) + assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker + self.assertIn("text", result_dict) + self.assertIn("entities", result_dict) + self.assertIsInstance(result_dict["entities"], list) + self.assertIsInstance(result_table, list) + self.assertIsInstance(result_text, str) + # Verify the text is redacted + self.assertEqual(result_text, redacted_text) + self.assertEqual(result_dict["text"], redacted_text) # dict still has original text + + # Verify process_content was called with redact=True + mock_processor.process_content.assert_called_once() + call_kwargs = mock_processor.process_content.call_args[1] + self.assertEqual(call_kwargs.get("redact"), True) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_anoncat_demo_perform_deidentification_with_redact_false( + self, mock_get_processor, mock_get_settings + ): + """Test anoncat_demo_perform_deidentification with redact=False.""" + # Mock entities data + mock_annotations_json = """ + { + "annotations": [ + { + "1": { + "pretty_name": "Test Entity", + "cui": "C123456", + "type_ids": ["T001"], + "source_value": "test entity", + "detected_name": "test~entity", + "acc": 0.95, + "context_similarity": 0.9, + "start": 0, + "end": 11, + "id": 1 + } + } + ] + } + """ + mock_annotations_data = json.loads(mock_annotations_json) + + # Create a mock processor + mock_processor = MagicMock(spec=MedCatProcessor) + + # Mock process_content to return a ProcessResult with unredacted text + from medcat_service.types import ProcessResult + + deidentified_text = "The patient [name] was prescribed with Aspirin, 4-5 tabs daily" + + mock_process_result = ProcessResult( + text=deidentified_text, + annotations=mock_annotations_data["annotations"], + success=True, + timestamp="2024-01-01T00:00:00Z", + elapsed_time=0.1, + ) + mock_processor.process_content.return_value = mock_process_result + + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = mock_processor + + # Execute + result = anoncat_demo_perform_deidentification(self.test_text, redact=False) + + # Assert + self.assertIsNotNone(result) + self.assertEqual(len(result), 3) + result_dict, result_table, result_text = result + self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + self.assertIsNotNone(result_text) + assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + assert result_text is not None # Type narrowing for type checker + self.assertIn("text", result_dict) + self.assertIn("entities", result_dict) + self.assertIsInstance(result_dict["entities"], list) + self.assertIsInstance(result_table, list) + self.assertIsInstance(result_text, str) + # Verify the text is not redacted + self.assertEqual(result_text, deidentified_text) + self.assertEqual(result_dict["text"], deidentified_text) # dict still has original text + + # Verify process_content was called with redact=False + mock_processor.process_content.assert_called_once() + call_kwargs = mock_processor.process_content.call_args[1] + self.assertEqual(call_kwargs.get("redact"), False) + if __name__ == "__main__": unittest.main() From aeadb78500f7ba33787ee2a56529119a0d666ebf Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 3 Feb 2026 12:57:13 +0000 Subject: [PATCH 25/25] feat(medcat-service): Fix unit tests --- .../test/demo/test_demo_logic.py | 92 ++++++++----------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/medcat-service/medcat_service/test/demo/test_demo_logic.py b/medcat-service/medcat_service/test/demo/test_demo_logic.py index 83200ee7b..4a55be15c 100644 --- a/medcat-service/medcat_service/test/demo/test_demo_logic.py +++ b/medcat-service/medcat_service/test/demo/test_demo_logic.py @@ -1,6 +1,7 @@ """ Unit tests for demo logic functions, specifically perform_named_entity_resolution. """ + import json import unittest from unittest.mock import MagicMock, patch @@ -27,6 +28,28 @@ class TestDemoLogic(unittest.TestCase): processor: MedCatProcessor + # Mock annotations JSON for anoncat tests + mock_annotations_json = """ + { + "annotations": [ + { + "1": { + "pretty_name": "Test Entity", + "cui": "C123456", + "type_ids": ["T001"], + "source_value": "test entity", + "detected_name": "test~entity", + "acc": 0.95, + "context_similarity": 0.9, + "start": 0, + "end": 11, + "id": 1 + } + } + ] + } + """ + @classmethod def setUpClass(cls): """Set up test fixtures once before all test methods.""" @@ -209,9 +232,7 @@ def test_perform_named_entity_resolution_returns_entity_response_format( @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") - def test_perform_named_entity_resolution_with_mocked_get_entities( - self, mock_get_processor, mock_get_settings - ): + def test_perform_named_entity_resolution_with_mocked_get_entities(self, mock_get_processor, mock_get_settings): """Test perform_named_entity_resolution with mocked get_entities returning JSON data.""" # Mock entities data inline as JSON string mock_annotations_json = """ @@ -348,32 +369,10 @@ def test_medcat_demo_perform_named_entity_resolution_returns_first_two_values( @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") - def test_anoncat_demo_perform_deidentification_with_redact_true( - self, mock_get_processor, mock_get_settings - ): + def test_anoncat_demo_perform_deidentification_with_redact_true(self, mock_get_processor, mock_get_settings): """Test anoncat_demo_perform_deidentification with redact=True.""" # Mock entities data - mock_annotations_json = """ - { - "annotations": [ - { - "1": { - "pretty_name": "Test Entity", - "cui": "C123456", - "type_ids": ["T001"], - "source_value": "test entity", - "detected_name": "test~entity", - "acc": 0.95, - "context_similarity": 0.9, - "start": 0, - "end": 11, - "id": 1 - } - } - ] - } - """ - mock_annotations_data = json.loads(mock_annotations_json) + mock_annotations_data = json.loads(self.mock_annotations_json) # Create a mock processor mock_processor = MagicMock(spec=MedCatProcessor) @@ -415,8 +414,10 @@ def test_anoncat_demo_perform_deidentification_with_redact_true( self.assertIsInstance(result_table, list) self.assertIsInstance(result_text, str) # Verify the text is redacted - self.assertEqual(result_text, redacted_text) - self.assertEqual(result_dict["text"], redacted_text) # dict still has original text + self.assertEqual(result_text, redacted_text, "output contains redacted text") + self.assertEqual( + result_dict["text"], self.test_text, "dict still has original text for use by highlighted text viewer" + ) # Verify process_content was called with redact=True mock_processor.process_content.assert_called_once() @@ -425,32 +426,10 @@ def test_anoncat_demo_perform_deidentification_with_redact_true( @patch("medcat_service.demo.demo_logic.get_settings") @patch("medcat_service.demo.demo_logic.get_medcat_processor") - def test_anoncat_demo_perform_deidentification_with_redact_false( - self, mock_get_processor, mock_get_settings - ): + def test_anoncat_demo_perform_deidentification_with_redact_false(self, mock_get_processor, mock_get_settings): """Test anoncat_demo_perform_deidentification with redact=False.""" # Mock entities data - mock_annotations_json = """ - { - "annotations": [ - { - "1": { - "pretty_name": "Test Entity", - "cui": "C123456", - "type_ids": ["T001"], - "source_value": "test entity", - "detected_name": "test~entity", - "acc": 0.95, - "context_similarity": 0.9, - "start": 0, - "end": 11, - "id": 1 - } - } - ] - } - """ - mock_annotations_data = json.loads(mock_annotations_json) + mock_annotations_data = json.loads(self.mock_annotations_json) # Create a mock processor mock_processor = MagicMock(spec=MedCatProcessor) @@ -491,9 +470,10 @@ def test_anoncat_demo_perform_deidentification_with_redact_false( self.assertIsInstance(result_dict["entities"], list) self.assertIsInstance(result_table, list) self.assertIsInstance(result_text, str) - # Verify the text is not redacted - self.assertEqual(result_text, deidentified_text) - self.assertEqual(result_dict["text"], deidentified_text) # dict still has original text + # Verify the text is deidentified + self.assertEqual(result_text, deidentified_text, "output contains deidentified text") + # dict still has original text for use by highlighted text viewer + self.assertEqual(result_dict["text"], self.test_text) # Verify process_content was called with redact=False mock_processor.process_content.assert_called_once()