From 9663b60dabf9738d44eb3de30ac57f089c76b273 Mon Sep 17 00:00:00 2001 From: Kyoichi Iwasaki Date: Sat, 2 Oct 2021 17:50:14 +0900 Subject: [PATCH 1/4] Omit TOC due to github feature change(We don't need TOC now.) --- docs/how-to/FAQ.md | 2 -- docs/how-to/GeneralDocumentation.md | 2 -- 2 files changed, 4 deletions(-) diff --git a/docs/how-to/FAQ.md b/docs/how-to/FAQ.md index 24d28aa..1803f95 100644 --- a/docs/how-to/FAQ.md +++ b/docs/how-to/FAQ.md @@ -1,7 +1,5 @@ # Frequently Asked Question -[[_TOC_]] - ## When running the ADO training pipeline, the pipeline fails at the _invoke_ step. What's the error ? If you see the error below. You have to ensure that the service connection is created at the Azure Machine Learning Workspace level and not Subscription level diff --git a/docs/how-to/GeneralDocumentation.md b/docs/how-to/GeneralDocumentation.md index 973265f..f792c9e 100644 --- a/docs/how-to/GeneralDocumentation.md +++ b/docs/how-to/GeneralDocumentation.md @@ -1,7 +1,5 @@ # General Documentation -[[_TOC_]] - ## Data Science Lifecycle Base Repo The base project structure was inspired by the following [dslp repo](https://github.com/dslp/dslp-repo-template). We readapted it to support minimal MLOps principles. From 59025a5b6f66a06970bb7dd41da87a06698383c6 Mon Sep 17 00:00:00 2001 From: Kyoichi Iwasaki Date: Sat, 2 Oct 2021 23:28:50 +0900 Subject: [PATCH 2/4] Added sample notebooks & modify column name of sample data --- notebooks/tutorial.ipynb | 103 ++++++++++++++++++++++++++++++++++++++ src/train_1_classifier.py | 2 +- src/train_n_classifier.py | 2 +- 3 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 notebooks/tutorial.ipynb diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb new file mode 100644 index 0000000..8a73440 --- /dev/null +++ b/notebooks/tutorial.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "# Get data & train ML model\n", + "\n", + "We show how to use our scripts with sample data. Please change the setting aligning to your situation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1633182532724 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "## Script `train_1_classifier.py` in ../src executes retrieving data, splitting them \n", + "## and generating model with RandomForest algorithm\n", + "!python ../src/train_1_classifier.py \\\n", + " --dataset-name \"../docs/data/Two_class.csv\" ## Specify your data here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1633183686133 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "## script `train_n_classifier.py` in ../src executes retrieving data, splitting them \n", + "## and generating model with various algorithms and pick up the best.\n", + "!python ../src/train_n_classifier.py \\\n", + " --dataset-name \"../docs/data/Two_class.csv\" ## Specify your data here" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3-azureml" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "microsoft": { + "host": { + "AzureML": { + "notebookHasBeenCompleted": true + } + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/train_1_classifier.py b/src/train_1_classifier.py index 6ccf60b..dc1c2ae 100644 --- a/src/train_1_classifier.py +++ b/src/train_1_classifier.py @@ -134,7 +134,7 @@ def parse_args(args_list=None): parser.add_argument('--model-name', type=str, default='two_class.pkl') parser.add_argument('--model-metric-name', type=str, default='mse', help='The name of the evaluation metric used in Train step') - parser.add_argument('--keep-columns', type=str, default='Helpfulness Score|Score|Text|Target') + parser.add_argument('--keep-columns', type=str, default='Helpfulness_Score|Score|Text|Target') parser.add_argument('--target-column', type=str, default='Target') parser.add_argument('--target-values', type=str, default='toys games|not a toy/game') parser.add_argument('--text-columns', type=str, default='Text') diff --git a/src/train_n_classifier.py b/src/train_n_classifier.py index 0f74043..6e11a7d 100644 --- a/src/train_n_classifier.py +++ b/src/train_n_classifier.py @@ -176,7 +176,7 @@ def parse_args(args_list=None): parser.add_argument('--model-name', type=str, default='two_class.pkl') parser.add_argument('--model-metric-name', type=str, default='Recall', help='The name of the evaluation metric used in Train step') - parser.add_argument('--keep-columns', type=str, default='Helpfulness Score|Score|Text|Target') + parser.add_argument('--keep-columns', type=str, default='Helpfulness_Score|Score|Text|Target') parser.add_argument('--target-column', type=str, default='Target') parser.add_argument('--target-values', type=str, default='toys games|not a toy/game') parser.add_argument('--text-columns', type=str, default='Text') From 4434acf54299221189a0ecadf9c7f9f902b02d25 Mon Sep 17 00:00:00 2001 From: Kyoichi Iwasaki Date: Sat, 2 Oct 2021 23:35:55 +0900 Subject: [PATCH 3/4] Clean up notebook output --- notebooks/tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index 8a73440..084d888 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -85,7 +85,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.9.5" }, "microsoft": { "host": { From 96420663e14ef0eb3a4ce0bbea2d8c0d724d0f93 Mon Sep 17 00:00:00 2001 From: Kyoichi Iwasaki Date: Sun, 3 Oct 2021 10:11:46 +0900 Subject: [PATCH 4/4] Omit train_n_classifier.py for too long training time --- notebooks/tutorial.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index 084d888..e518357 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -37,7 +37,7 @@ "## Script `train_1_classifier.py` in ../src executes retrieving data, splitting them \n", "## and generating model with RandomForest algorithm\n", "!python ../src/train_1_classifier.py \\\n", - " --dataset-name \"../docs/data/Two_class.csv\" ## Specify your data here" + " --dataset-name \"../docs/data/Two_class.csv\"" ] }, { @@ -61,8 +61,8 @@ "source": [ "## script `train_n_classifier.py` in ../src executes retrieving data, splitting them \n", "## and generating model with various algorithms and pick up the best.\n", - "!python ../src/train_n_classifier.py \\\n", - " --dataset-name \"../docs/data/Two_class.csv\" ## Specify your data here" + "#!python ../src/train_n_classifier.py \\\n", + "# --dataset-name \"../docs/data/Two_class.csv\"" ] } ], @@ -71,7 +71,7 @@ "name": "python3-azureml" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -85,7 +85,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.7.10" }, "microsoft": { "host": {