diff --git a/docs/how-to/FAQ.md b/docs/how-to/FAQ.md index 24d28aa..1803f95 100644 --- a/docs/how-to/FAQ.md +++ b/docs/how-to/FAQ.md @@ -1,7 +1,5 @@ # Frequently Asked Question -[[_TOC_]] - ## When running the ADO training pipeline, the pipeline fails at the _invoke_ step. What's the error ? If you see the error below. You have to ensure that the service connection is created at the Azure Machine Learning Workspace level and not Subscription level diff --git a/docs/how-to/GeneralDocumentation.md b/docs/how-to/GeneralDocumentation.md index 973265f..f792c9e 100644 --- a/docs/how-to/GeneralDocumentation.md +++ b/docs/how-to/GeneralDocumentation.md @@ -1,7 +1,5 @@ # General Documentation -[[_TOC_]] - ## Data Science Lifecycle Base Repo The base project structure was inspired by the following [dslp repo](https://github.com/dslp/dslp-repo-template). We readapted it to support minimal MLOps principles. diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb new file mode 100644 index 0000000..e518357 --- /dev/null +++ b/notebooks/tutorial.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "source": [ + "# Get data & train ML model\n", + "\n", + "We show how to use our scripts with sample data. Please change the setting aligning to your situation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1633182532724 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "## Script `train_1_classifier.py` in ../src executes retrieving data, splitting them \n", + "## and generating model with RandomForest algorithm\n", + "!python ../src/train_1_classifier.py \\\n", + " --dataset-name \"../docs/data/Two_class.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "gather": { + "logged": 1633183686133 + }, + "jupyter": { + "outputs_hidden": false, + "source_hidden": false + }, + "nteract": { + "transient": { + "deleting": false + } + } + }, + "outputs": [], + "source": [ + "## script `train_n_classifier.py` in ../src executes retrieving data, splitting them \n", + "## and generating model with various algorithms and pick up the best.\n", + "#!python ../src/train_n_classifier.py \\\n", + "# --dataset-name \"../docs/data/Two_class.csv\"" + ] + } + ], + "metadata": { + "kernel_info": { + "name": "python3-azureml" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "microsoft": { + "host": { + "AzureML": { + "notebookHasBeenCompleted": true + } + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/train_1_classifier.py b/src/train_1_classifier.py index 6ccf60b..dc1c2ae 100644 --- a/src/train_1_classifier.py +++ b/src/train_1_classifier.py @@ -134,7 +134,7 @@ def parse_args(args_list=None): parser.add_argument('--model-name', type=str, default='two_class.pkl') parser.add_argument('--model-metric-name', type=str, default='mse', help='The name of the evaluation metric used in Train step') - parser.add_argument('--keep-columns', type=str, default='Helpfulness Score|Score|Text|Target') + parser.add_argument('--keep-columns', type=str, default='Helpfulness_Score|Score|Text|Target') parser.add_argument('--target-column', type=str, default='Target') parser.add_argument('--target-values', type=str, default='toys games|not a toy/game') parser.add_argument('--text-columns', type=str, default='Text') diff --git a/src/train_n_classifier.py b/src/train_n_classifier.py index 0f74043..6e11a7d 100644 --- a/src/train_n_classifier.py +++ b/src/train_n_classifier.py @@ -176,7 +176,7 @@ def parse_args(args_list=None): parser.add_argument('--model-name', type=str, default='two_class.pkl') parser.add_argument('--model-metric-name', type=str, default='Recall', help='The name of the evaluation metric used in Train step') - parser.add_argument('--keep-columns', type=str, default='Helpfulness Score|Score|Text|Target') + parser.add_argument('--keep-columns', type=str, default='Helpfulness_Score|Score|Text|Target') parser.add_argument('--target-column', type=str, default='Target') parser.add_argument('--target-values', type=str, default='toys games|not a toy/game') parser.add_argument('--text-columns', type=str, default='Text')