sunlabuiuc · sgolara2 · Apr 7, 2026 · Apr 10, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
@@ -224,6 +224,7 @@ Available Datasets
     datasets/pyhealth.datasets.SampleDataset
     datasets/pyhealth.datasets.MIMIC3Dataset
     datasets/pyhealth.datasets.MIMIC4Dataset
+    datasets/pyhealth.datasets.MRIDataset
     datasets/pyhealth.datasets.MedicalTranscriptionsDataset
     datasets/pyhealth.datasets.CardiologyDataset
     datasets/pyhealth.datasets.eICUDataset

diff --git a/docs/api/datasets/pyhealth.datasets.MRIDataset.rst b/docs/api/datasets/pyhealth.datasets.MRIDataset.rst
@@ -0,0 +1,21 @@
+pyhealth.datasets.MRIDataset
+===================================
+
+The dataset used is the OASIS MRI dataset (https://sites.wustl.edu/oasisbrains/), which consists of 80,000 brain MRI images. The images have been divided into four classes based on Alzheimer's progression. The dataset aims to provide a valuable resource for analyzing and detecting early signs of Alzheimer's disease.
+
+To make the dataset accessible, the original .img and .hdr files were converted into Nifti format (.nii) using FSL (FMRIB Software Library). The converted MRI images of 461 patients have been uploaded to a GitHub repository, which can be accessed in multiple parts.
+
+For the neural network training, 2D images were used as input. The brain images were sliced along the z-axis into 256 pieces, and slices ranging from 100 to 160 were selected from each patient. This approach resulted in a comprehensive dataset for analysis.
+
+Patient classification was performed based on the provided metadata and Clinical Dementia Rating (CDR) values, resulting in four classes: demented, very mild demented, mild demented, and non-demented. These classes enable the detection and study of different stages of Alzheimer's disease progression.
+
+During the dataset preparation, the .nii MRI scans were converted to .jpg files. Although this conversion presented some challenges, the files were successfully processed using appropriate tools. The resulting dataset size is 1.3 GB.
+
+With this comprehensive dataset, the project aims to explore various neural network models and achieve optimal results in Alzheimer's disease detection and analysis.
+
+Refer to `doc <https://www.kaggle.com/datasets/ninadaithal/imagesoasis>`_ for more information. 
+
+.. autoclass:: pyhealth.datasets.MRIDataset
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst
@@ -216,6 +216,7 @@ Available Tasks
     Medical Transcriptions Classification <tasks/pyhealth.tasks.MedicalTranscriptionsClassification>
     Mortality Prediction (Next Visit) <tasks/pyhealth.tasks.mortality_prediction>
     Mortality Prediction (StageNet MIMIC-IV) <tasks/pyhealth.tasks.mortality_prediction_stagenet_mimic4>
+    MRI Binary Classification <tasks/pyhealth.tasks.MRIBinaryClassification>
     Patient Linkage (MIMIC-III) <tasks/pyhealth.tasks.patient_linkage_mimic3_fn>
     Readmission Prediction <tasks/pyhealth.tasks.readmission_prediction>
     Sleep Staging <tasks/pyhealth.tasks.sleep_staging>

diff --git a/docs/api/tasks/pyhealth.tasks.MRIBinaryClassification.rst b/docs/api/tasks/pyhealth.tasks.MRIBinaryClassification.rst
@@ -0,0 +1,7 @@
+pyhealth.tasks.MRIBinaryClassification
+=======================================
+
+.. autoclass:: pyhealth.tasks.MRIBinaryClassification
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/examples/mri_alzheimers.py b/examples/mri_alzheimers.py
@@ -0,0 +1,49 @@
+import os
+import tempfile
+
+from pyhealth.datasets import MRIDataset, get_dataloader, split_by_sample
+from pyhealth.models import CNN
+from pyhealth.processors import NiftiImageProcessor
+from pyhealth.tasks import MRIBinaryClassification
+from pyhealth.trainer import Trainer
+
+# Since PyHealth uses multiprocessing, it is best practice to use a main guard.
+if __name__ == "__main__":
+    # Use tempfile to automate cleanup
+    dataset_dir = tempfile.TemporaryDirectory()
+    cache_dir = tempfile.TemporaryDirectory()
+
+    dataset = MRIDataset(
+        root=dataset_dir.name,
+        cache_dir=cache_dir.name,
+        download=True,
+        partial=True,
+    )
+    dataset.stats()
+
+    task = MRIBinaryClassification(disease="alzheimer")
+    samples = dataset.set_task(
+        task,
+        input_processors={"image": NiftiImageProcessor()},
+    )
+
+    train_dataset, val_dataset, test_dataset = split_by_sample(samples, [0.7, 0.1, 0.2])
+
+    train_loader = get_dataloader(train_dataset, batch_size=8, shuffle=True)
+    val_loader = get_dataloader(val_dataset, batch_size=8, shuffle=False)
+    test_loader = get_dataloader(test_dataset, batch_size=8, shuffle=False)
+
+    model = CNN(dataset=samples)
+
+    # Default to CPU to avoid CUDA runtime mismatch on unsupported GPUs.
+    device = os.environ.get("PYHEALTH_DEVICE", "cpu")
+    trainer = Trainer(model=model, device=device)
+    trainer.train(
+        train_dataloader=train_loader,
+        val_dataloader=val_loader,
+        epochs=1,
+    )
+
+    trainer.evaluate(test_loader)
+
+    samples.close()