From f132830996f523783b3532da710d38317e1bd09a Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 18 Feb 2026 14:40:03 -0800 Subject: [PATCH 1/6] Remove duplicated code that misconfigures documentation --- .../python/tutorial_scripts/schema_operations.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docs/tutorials/python/tutorial_scripts/schema_operations.py b/docs/tutorials/python/tutorial_scripts/schema_operations.py index 419ae08a4..5be5dfe9b 100644 --- a/docs/tutorials/python/tutorial_scripts/schema_operations.py +++ b/docs/tutorials/python/tutorial_scripts/schema_operations.py @@ -27,21 +27,11 @@ syn = Synapse() syn.login() -schemas, file_paths = generate_jsonschema( - data_model_source=DATA_MODEL_SOURCE, - output=OUTPUT_DIRECTORY, - data_types=DATA_TYPE, - synapse_client=syn, -) - -print(schemas[0]) - - # Create JSON Schemas for multiple data types schemas, file_paths = generate_jsonschema( data_model_source=DATA_MODEL_SOURCE, output=OUTPUT_DIRECTORY, - data_types=["Patient", "Biospecimen"], + data_types=DATA_TYPE, synapse_client=syn, ) From e838fb93bd1e904feef0ebb9eac0835041650e9b Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 18 Feb 2026 15:06:08 -0800 Subject: [PATCH 2/6] Shift documentation to extensions --- .../extensions/curator}/schema_operations.md | 125 ++++++++++++------ .../tutorial_scripts/schema_operations.py | 85 ------------ mkdocs.yml | 3 +- 3 files changed, 90 insertions(+), 123 deletions(-) rename docs/{tutorials/python => guides/extensions/curator}/schema_operations.md (53%) delete mode 100644 docs/tutorials/python/tutorial_scripts/schema_operations.py diff --git a/docs/tutorials/python/schema_operations.md b/docs/guides/extensions/curator/schema_operations.md similarity index 53% rename from docs/tutorials/python/schema_operations.md rename to docs/guides/extensions/curator/schema_operations.md index fc3c8e3b0..1b115596f 100644 --- a/docs/tutorials/python/schema_operations.md +++ b/docs/guides/extensions/curator/schema_operations.md @@ -1,3 +1,5 @@ +# How to Generate JSONschemas from Curator CSV data models + JSON Schema is a tool used to validate data. In Synapse, JSON Schemas can be used to validate the metadata applied to an entity such as project, file, folder, table, or view, including the [annotations](https://help.synapse.org/docs/Annotating-Data-With-Metadata.2667708522.html) applied to it. To learn more about JSON Schemas, check out [JSON-Schema.org](https://json-schema.org/). Synapse supports a subset of features from [json-schema-draft-07](https://json-schema.org/draft-07). To see the list of features currently supported, see the [JSON Schema object definition](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/schema/JsonSchema.html) from Synapse's REST API Documentation. @@ -15,13 +17,37 @@ This tutorial uses the Python client as a library. To use the CLI tool, see the ## Prerequisites -* You have a working [installation](../installation.md) of the Synapse Python Client. +* You have a working [installation](../../tutorials/installation.md) of the Synapse Python Client. You must install the Curator extensions package. * You have a data model, see this [data model_documentation](../../explanations/curator_data_model.md). ## 1. Initial set up ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=1-18} +from synapseclient import Synapse +from synapseclient.extensions.curator import ( + bind_jsonschema, + generate_jsonschema, + register_jsonschema, +) + +# Path or URL to your data model (CSV or JSONLD format) +# Example: "path/to/my_data_model.csv" or "https://raw.githubusercontent.com/example.csv" +DATA_MODEL_SOURCE = "tests/unit/synapseclient/extensions/schema_files/example.model.csv" +# List of component names/data types to create schemas for, or None for all components/data types +# Example: ["Patient", "Biospecimen"] or None +DATA_TYPE = ["Patient"] +# Directory where JSON Schema files will be saved +OUTPUT_DIRECTORY = "temp" +# Path to a generated JSON Schema file for registration +SCHEMA_PATH = "temp/Patient.json" +# Your Synapse organization name for schema registration +ORGANIZATION_NAME = "my.organization" +# Name for the schema +SCHEMA_NAME = "patient.schema" +# Version number for the schema +SCHEMA_VERSION = "0.0.1" +# Synapse entity ID to bind the schema to (file, folder, etc.) +ENTITY_ID = "syn12345678" ``` To create a JSON Schema you need a data model, and the data types you want to create. @@ -30,74 +56,102 @@ The data model must be in either CSV or JSON-LD form. The data model may be a lo The data types must exist in your data model. This can be a list of data types, or `None` to create all data types in the data model. -## 2. Create a JSON Schema +## 2. Create JSON Schemas -Create a JSON Schema +Create multiple JSON Schema ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=20-27} +# Create JSON Schemas for multiple data types +schemas, file_paths = generate_jsonschema( + data_model_source=DATA_MODEL_SOURCE, + output=OUTPUT_DIRECTORY, + data_types=DATA_TYPE, + synapse_client=syn, +) ``` -You should see the first JSON Schema for the datatype you selected printed. -It will look like [this schema](https://repo-prod.prod.sagebase.org/repo/v1/schema/type/registered/dpetest-test.schematic.Patient). -By setting the `output` parameter as path to a "temp" directory, the file will be created as "temp/Patient.json". - -## 3. Create multiple JSON Schema - -Create multiple JSON Schema +The JSONschema looks like [this](https://repo-prod.prod.sagebase.org/repo/v1/schema/type/registered/dpetest-test.schematic.Patient). -```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=30-36} -``` +By setting the `output` parameter as path to a "temp" directory, the file will be created as "temp/Patient.json". The `data_types` parameter is a list and can have multiple data types. -## 4. Create every JSON Schema +## 3. Create every JSON Schema Create every JSON Schema ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=38-43} +# Create JSON Schemas for all data types +schemas, file_paths = generate_jsonschema( + data_model_source=DATA_MODEL_SOURCE, + output=OUTPUT_DIRECTORY, + synapse_client=syn, +) ``` If you don't set a `data_types` parameter a JSON Schema will be created for every data type in the data model. -## 5. Create a JSON Schema with a certain path +## 4. Create a JSON Schema with a certain path Create a JSON Schema ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=45-51} +# Specify path for JSON Schema +schemas, file_paths = generate_jsonschema( + data_model_source=DATA_MODEL_SOURCE, + data_types=DATA_TYPE, + output="test.json", + synapse_client=syn, +) ``` If you have only one data type and set the `output` parameter to a file path(ending in.json), the JSON Schema file will have that path. -## 6. Create a JSON Schema in the current working directory +## 5. Create a JSON Schema in the current working directory Create a JSON Schema ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=53-58} +# Create JSON Schema in cwd +schemas, file_paths = generate_jsonschema( + data_model_source=DATA_MODEL_SOURCE, + data_types=DATA_TYPE, + synapse_client=syn, +) ``` If you don't set `output` parameter the JSON Schema file will be created in the current working directory. -## 7. Create a JSON Schema using display names +## 6. Create a JSON Schema using display names Create a JSON Schema ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=60-66} +# Create JSON Schema using display names for both properties names and valid values +schemas, file_paths = generate_jsonschema( + data_model_source=DATA_MODEL_SOURCE, + data_types=DATA_TYPE, + data_model_labels="display_label", + synapse_client=syn, +) ``` You can have Curator format the property names and valid values in the JSON Schema. This will remove whitespace and special characters. -## 8. Register a JSON Schema to Synapse +## 7. Register a JSON Schema to Synapse Once you've created a JSON Schema file, you can register it to a Synapse organization. ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=68-76} +# Register a JSON Schema to Synapse +json_schema = register_jsonschema( + schema_path=SCHEMA_PATH, + organization_name=ORGANIZATION_NAME, + schema_name=SCHEMA_NAME, + schema_version=SCHEMA_VERSION, + synapse_client=syn, +) +print(f"Registered schema URI: {json_schema.uri}") ``` The `register_jsonschema` function: @@ -106,12 +160,19 @@ The `register_jsonschema` function: - Returns the schema URI and a success message - You can optionally specify a version (e.g., "0.0.1") or let it auto-generate -## 9. Bind a JSON Schema to a Synapse Entity +## 8. Bind a JSON Schema to a Synapse Entity After registering a schema, you can bind it to Synapse entities (files, folders, etc.) for metadata validation. ```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!lines=78-85} +# Bind a JSON Schema to a Synapse entity +result = bind_jsonschema( + entity_id=ENTITY_ID, + json_schema_uri=json_schema.uri, + enable_derived_annotations=True, + synapse_client=syn, +) +print(f"Successfully bound schema to entity: {result}") ``` The `bind_jsonschema` function: @@ -120,16 +181,6 @@ The `bind_jsonschema` function: - Optionally enables derived annotations to auto-populate metadata - Returns binding details -## Source Code for this Tutorial - -
- Click to show me - -```python -{!docs/tutorials/python/tutorial_scripts/schema_operations.py!} -``` -
- ## Reference - [JSON Schema Object Definition](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/schema/JsonSchema.html) diff --git a/docs/tutorials/python/tutorial_scripts/schema_operations.py b/docs/tutorials/python/tutorial_scripts/schema_operations.py deleted file mode 100644 index 5be5dfe9b..000000000 --- a/docs/tutorials/python/tutorial_scripts/schema_operations.py +++ /dev/null @@ -1,85 +0,0 @@ -from synapseclient import Synapse -from synapseclient.extensions.curator import ( - bind_jsonschema, - generate_jsonschema, - register_jsonschema, -) - -# Path or URL to your data model (CSV or JSONLD format) -# Example: "path/to/my_data_model.csv" or "https://raw.githubusercontent.com/example.csv" -DATA_MODEL_SOURCE = "tests/unit/synapseclient/extensions/schema_files/example.model.csv" -# List of component names/data types to create schemas for, or None for all components/data types -# Example: ["Patient", "Biospecimen"] or None -DATA_TYPE = ["Patient"] -# Directory where JSON Schema files will be saved -OUTPUT_DIRECTORY = "temp" -# Path to a generated JSON Schema file for registration -SCHEMA_PATH = "temp/Patient.json" -# Your Synapse organization name for schema registration -ORGANIZATION_NAME = "my.organization" -# Name for the schema -SCHEMA_NAME = "patient.schema" -# Version number for the schema -SCHEMA_VERSION = "0.0.1" -# Synapse entity ID to bind the schema to (file, folder, etc.) -ENTITY_ID = "syn12345678" - -syn = Synapse() -syn.login() - -# Create JSON Schemas for multiple data types -schemas, file_paths = generate_jsonschema( - data_model_source=DATA_MODEL_SOURCE, - output=OUTPUT_DIRECTORY, - data_types=DATA_TYPE, - synapse_client=syn, -) - -# Create JSON Schemas for all data types -schemas, file_paths = generate_jsonschema( - data_model_source=DATA_MODEL_SOURCE, - output=OUTPUT_DIRECTORY, - synapse_client=syn, -) - -# Specify path for JSON Schema -schemas, file_paths = generate_jsonschema( - data_model_source=DATA_MODEL_SOURCE, - data_types=DATA_TYPE, - output="test.json", - synapse_client=syn, -) - -# Create JSON Schema in cwd -schemas, file_paths = generate_jsonschema( - data_model_source=DATA_MODEL_SOURCE, - data_types=DATA_TYPE, - synapse_client=syn, -) - -# Create JSON Schema using display names for both properties names and valid values -schemas, file_paths = generate_jsonschema( - data_model_source=DATA_MODEL_SOURCE, - data_types=DATA_TYPE, - data_model_labels="display_label", - synapse_client=syn, -) - -# Register a JSON Schema to Synapse -json_schema = register_jsonschema( - schema_path=SCHEMA_PATH, - organization_name=ORGANIZATION_NAME, - schema_name=SCHEMA_NAME, - schema_version=SCHEMA_VERSION, - synapse_client=syn, -) -print(f"Registered schema URI: {json_schema.uri}") - -# Bind a JSON Schema to a Synapse entity -result = bind_jsonschema( - entity_id=ENTITY_ID, - json_schema_uri=json_schema.uri, - enable_derived_annotations=True, - synapse_client=syn, -) -print(f"Successfully bound schema to entity: {result}") diff --git a/mkdocs.yml b/mkdocs.yml index 85a237d0c..be5da7b4c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,7 +45,7 @@ nav: # - Team: tutorials/python/team.md - Upload data in bulk: tutorials/python/upload_data_in_bulk.md - Download data in bulk: tutorials/python/download_data_in_bulk.md - - Creating JSON Schema: tutorials/python/schema_operations.md + # - Creating JSON Schema: tutorials/python/schema_operations.md - Working with JSON Schema: tutorials/python/json_schema.md # - Move Files and Folders: tutorials/python/move_files_and_folders.md # - Migrate data to other storage locations: tutorials/python/migrate_data_to_other_storage_locations.md @@ -59,6 +59,7 @@ nav: - Data Storage: guides/data_storage.md - Access the REST API: guides/accessing_the_rest_api.md - Extensions: + - Curator Data Model: guides/extensions/curator/schema_operations.md - Curator: guides/extensions/curator/metadata_curation.md # - Expermental Features: # - Validating Annotations: guides/validate_annotations.md From 2fa39d3e6b00acbb4a3f0ef8b1b687fd7d1ed4c9 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 18 Feb 2026 15:06:45 -0800 Subject: [PATCH 3/6] Rename --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index be5da7b4c..eda90ac08 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -59,7 +59,7 @@ nav: - Data Storage: guides/data_storage.md - Access the REST API: guides/accessing_the_rest_api.md - Extensions: - - Curator Data Model: guides/extensions/curator/schema_operations.md + - Curator JSONschemas: guides/extensions/curator/schema_operations.md - Curator: guides/extensions/curator/metadata_curation.md # - Expermental Features: # - Validating Annotations: guides/validate_annotations.md From acb55ee4402d7fa2abbf1c14f0c8eed04e126038 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 18 Feb 2026 15:11:57 -0800 Subject: [PATCH 4/6] Add documentation --- docs/guides/extensions/curator/metadata_curation.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 65149d87e..6b04833b1 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -18,6 +18,7 @@ By following this guide, you will: - Python environment with synapseclient and the `curator` extension installed (ie. `pip install --upgrade "synapseclient[curator]"`) - An existing Synapse project and folder where you want to manage metadata - A JSON Schema registered in Synapse (many schemas are already available for Sage-affiliated projects, or you can register your own by following the [JSON Schema tutorial](../../../tutorials/python/json_schema.md)) + - If you are leveraging the [Curator CSV data model](../../../explanations/curator_data_model.md), you can create JSON schemas by following this [tutorial](../../extensions/curator/schema_operations.md) - (Optional) An existing Synapse team if you want multiple users to collaborate on the same Grid session. Pass the team's ID as `assignee_principal_id` when creating the curation task. ## Step 1: Authenticate and import required functions From e254be82c4e5beabfbc6cf8ba665ad3eabe551f6 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 18 Feb 2026 15:21:10 -0800 Subject: [PATCH 5/6] Add space to ensure numbering --- docs/guides/extensions/curator/schema_operations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/guides/extensions/curator/schema_operations.md b/docs/guides/extensions/curator/schema_operations.md index 1b115596f..059aa678a 100644 --- a/docs/guides/extensions/curator/schema_operations.md +++ b/docs/guides/extensions/curator/schema_operations.md @@ -9,6 +9,7 @@ In this tutorial, you will learn how to create, register, and bind JSON Schemas ## Tutorial Purpose You will learn the complete JSON Schema workflow: + 1. **Generate** JSON schemas from your data model 2. **Register** schemas to a Synapse organization 3. **Bind** schemas to Synapse entities for metadata validation From 3ce74175b4f1dd223da77d0a81d0d06e9377847f Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 18 Feb 2026 15:25:50 -0800 Subject: [PATCH 6/6] Reorganize documentation --- .../extensions/curator/schema_operations.md | 59 ++++++++----------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/docs/guides/extensions/curator/schema_operations.md b/docs/guides/extensions/curator/schema_operations.md index 059aa678a..730b530cf 100644 --- a/docs/guides/extensions/curator/schema_operations.md +++ b/docs/guides/extensions/curator/schema_operations.md @@ -59,7 +59,9 @@ The data types must exist in your data model. This can be a list of data types, ## 2. Create JSON Schemas -Create multiple JSON Schema +### Create multiple JSON Schema + +The JSONschema looks like [this](https://repo-prod.prod.sagebase.org/repo/v1/schema/type/registered/dpetest-test.schematic.Patient). By setting the `output` parameter as path to a "temp" directory, the file will be created as "temp/Patient.json". The `data_types` parameter is a list and can have multiple data types. ```python # Create JSON Schemas for multiple data types @@ -71,15 +73,9 @@ schemas, file_paths = generate_jsonschema( ) ``` -The JSONschema looks like [this](https://repo-prod.prod.sagebase.org/repo/v1/schema/type/registered/dpetest-test.schematic.Patient). - -By setting the `output` parameter as path to a "temp" directory, the file will be created as "temp/Patient.json". - -The `data_types` parameter is a list and can have multiple data types. +### Create every JSON schema -## 3. Create every JSON Schema - -Create every JSON Schema +If you don't set a `data_types` parameter a JSON Schema will be created for every data type in the data model. ```python # Create JSON Schemas for all data types @@ -90,11 +86,9 @@ schemas, file_paths = generate_jsonschema( ) ``` -If you don't set a `data_types` parameter a JSON Schema will be created for every data type in the data model. - -## 4. Create a JSON Schema with a certain path +### Create a JSON Schema with a certain path -Create a JSON Schema +If you have only one data type and set the `output` parameter to a file path(ending in.json), the JSON Schema file will have that path. ```python # Specify path for JSON Schema @@ -106,14 +100,11 @@ schemas, file_paths = generate_jsonschema( ) ``` -If you have only one data type and set the `output` parameter to a file path(ending in.json), the JSON Schema file will have that path. - -## 5. Create a JSON Schema in the current working directory +### Create a JSON Schema in the current working directory -Create a JSON Schema +If you don't set `output` parameter the JSON Schema file will be created in the current working directory. ```python -# Create JSON Schema in cwd schemas, file_paths = generate_jsonschema( data_model_source=DATA_MODEL_SOURCE, data_types=DATA_TYPE, @@ -121,11 +112,10 @@ schemas, file_paths = generate_jsonschema( ) ``` -If you don't set `output` parameter the JSON Schema file will be created in the current working directory. -## 6. Create a JSON Schema using display names +### Create a JSON Schema using display names -Create a JSON Schema +You can have Curator format the property names and valid values in the JSON Schema. This will remove whitespace and special characters. ```python # Create JSON Schema using display names for both properties names and valid values @@ -137,12 +127,17 @@ schemas, file_paths = generate_jsonschema( ) ``` -You can have Curator format the property names and valid values in the JSON Schema. This will remove whitespace and special characters. -## 7. Register a JSON Schema to Synapse +## 3. Register a JSON Schema to Synapse Once you've created a JSON Schema file, you can register it to a Synapse organization. +The `register_jsonschema` function: +- Takes a path to your generated JSON Schema file +- Registers it with the specified organization in Synapse +- Returns the schema URI and a success message +- You can optionally specify a version (e.g., "0.0.1") or let it auto-generate + ```python # Register a JSON Schema to Synapse json_schema = register_jsonschema( @@ -155,16 +150,17 @@ json_schema = register_jsonschema( print(f"Registered schema URI: {json_schema.uri}") ``` -The `register_jsonschema` function: -- Takes a path to your generated JSON Schema file -- Registers it with the specified organization in Synapse -- Returns the schema URI and a success message -- You can optionally specify a version (e.g., "0.0.1") or let it auto-generate ## 8. Bind a JSON Schema to a Synapse Entity After registering a schema, you can bind it to Synapse entities (files, folders, etc.) for metadata validation. +The `bind_jsonschema` function: +- Takes a Synapse entity ID (e.g., "syn12345678") +- Binds the registered schema URI to that entity +- Optionally enables derived annotations to auto-populate metadata +- Returns binding details + ```python # Bind a JSON Schema to a Synapse entity result = bind_jsonschema( @@ -176,13 +172,6 @@ result = bind_jsonschema( print(f"Successfully bound schema to entity: {result}") ``` -The `bind_jsonschema` function: -- Takes a Synapse entity ID (e.g., "syn12345678") -- Binds the registered schema URI to that entity -- Optionally enables derived annotations to auto-populate metadata -- Returns binding details - - ## Reference - [JSON Schema Object Definition](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/schema/JsonSchema.html) - [JSON Schema Draft 7](https://json-schema.org/draft-07)