diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9603fa31..7396c5d0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,3 +23,9 @@ repos: rev: 5.13.2 hooks: - id: isort + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.1.0 + hooks: + - id: prettier + args: [--tab-width=4, --print-width=120] + files: ^configs/.*\.json$ diff --git a/configs/common/svm.json b/configs/common/svm.json index ca1e8f2d..68cde6d9 100644 --- a/configs/common/svm.json +++ b/configs/common/svm.json @@ -10,7 +10,7 @@ { "algorithm": { "library": "cuml", - "estimator_methods": {"inference": "predict"}, + "estimator_methods": { "inference": "predict" }, "estimator_params": { "verbose": false, "multiclass_strategy": "ovr" } }, "data": { "format": "cupy" } @@ -27,7 +27,7 @@ { "algorithm": { "library": "cuml", - "estimator_methods": {"inference": "predict"}, + "estimator_methods": { "inference": "predict" }, "estimator_params": { "multiclass_strategy": "ovr" } }, "data": { "format": "cupy" } @@ -43,7 +43,7 @@ { "algorithm": { "library": "cuml", - "estimator_methods": {"inference": "predict"} + "estimator_methods": { "inference": "predict" } }, "data": { "format": "cupy" } } diff --git a/configs/common/xgboost.json b/configs/common/xgboost.json index 66f1a708..fdb637a1 100644 --- a/configs/common/xgboost.json +++ b/configs/common/xgboost.json @@ -43,7 +43,7 @@ "algorithm": { "library": "xgboost", "estimator": "XGBRegressor", - "estimator_methods": {"inference": "predict"}, + "estimator_methods": { "inference": "predict" }, "estimator_params": { "objective": "reg:squarederror" } } } diff --git a/configs/regular/dbscan.json b/configs/regular/dbscan.json index 1e684212..1b801b65 100644 --- a/configs/regular/dbscan.json +++ b/configs/regular/dbscan.json @@ -35,12 +35,7 @@ ] }, "cuml dbscan": { - "SETS": [ - "cuml implementation", - "common dbscan parameters", - "cuml dbscan parameters", - "dbscan datasets" - ] + "SETS": ["cuml implementation", "common dbscan parameters", "cuml dbscan parameters", "dbscan datasets"] } } } diff --git a/configs/regular/kmeans.json b/configs/regular/kmeans.json index 4d9497ca..31acee45 100644 --- a/configs/regular/kmeans.json +++ b/configs/regular/kmeans.json @@ -45,7 +45,6 @@ "split_kwargs": { "train_size": 0.2, "test_size": null } } } - ] }, "TEMPLATES": { @@ -58,12 +57,7 @@ ] }, "cuml kmeans": { - "SETS": [ - "cuml implementation", - "common kmeans parameters", - "cuml kmeans parameters", - "kmeans datasets" - ] + "SETS": ["cuml implementation", "common kmeans parameters", "cuml kmeans parameters", "kmeans datasets"] } } } diff --git a/configs/regular/knn.json b/configs/regular/knn.json index c4c971ec..21626dd1 100644 --- a/configs/regular/knn.json +++ b/configs/regular/knn.json @@ -18,16 +18,28 @@ "source": "make_classification", "generation_kwargs": [ { - "n_classes": 5, "n_samples": 400000, "n_features": 4, - "n_redundant": 0, "n_repeated": 0, "n_informative": 4 + "n_classes": 5, + "n_samples": 400000, + "n_features": 4, + "n_redundant": 0, + "n_repeated": 0, + "n_informative": 4 }, { - "n_classes": 5, "n_samples": 200000, "n_features": 8, - "n_redundant": 2, "n_repeated": 2, "n_informative": 4 + "n_classes": 5, + "n_samples": 200000, + "n_features": 8, + "n_redundant": 2, + "n_repeated": 2, + "n_informative": 4 }, { - "n_classes": 5, "n_samples": 100000, "n_features": 16, - "n_redundant": 6, "n_repeated": 6, "n_informative": 4 + "n_classes": 5, + "n_samples": 100000, + "n_features": 16, + "n_redundant": 6, + "n_repeated": 6, + "n_informative": 4 } ], "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } @@ -48,7 +60,7 @@ }, { "source": "make_regression", - "generation_kwargs":[ + "generation_kwargs": [ { "n_samples": 400000, "n_features": 4, "noise": 1.0 }, { "n_samples": 200000, "n_features": 8, "noise": 1.5 }, { "n_samples": 100000, "n_features": 16, "noise": 2.0 } diff --git a/configs/regular/lightgbm.json b/configs/regular/lightgbm.json index 2271c49b..13b16fd4 100644 --- a/configs/regular/lightgbm.json +++ b/configs/regular/lightgbm.json @@ -1,32 +1,15 @@ { - "INCLUDE": [ - "../common/lightgbm.json", - "xgboost_binary.json", - "xgboost_multi.json", - "xgboost_regression.json" - ], + "INCLUDE": ["../common/lightgbm.json", "xgboost_binary.json", "xgboost_multi.json", "xgboost_regression.json"], "PARAMETERS_SETS": {}, "TEMPLATES": { "lightgbm binary classification": { - "SETS": [ - "lightgbm binary classification", - "lightgbm implementations", - "gbt binary classification data" - ] + "SETS": ["lightgbm binary classification", "lightgbm implementations", "gbt binary classification data"] }, "lightgbm multi classification": { - "SETS": [ - "lightgbm multi classification", - "lightgbm implementations", - "gbt multi classification data" - ] + "SETS": ["lightgbm multi classification", "lightgbm implementations", "gbt multi classification data"] }, "lightgbm regression": { - "SETS": [ - "lightgbm regression", - "lightgbm implementations", - "gbt regression data" - ] + "SETS": ["lightgbm regression", "lightgbm implementations", "gbt regression data"] } } } diff --git a/configs/regular/linear_model.json b/configs/regular/linear_model.json index 154f9004..3b53a04c 100644 --- a/configs/regular/linear_model.json +++ b/configs/regular/linear_model.json @@ -8,16 +8,22 @@ "split_kwargs": { "train_size": 0.2, "test_size": 0.8 }, "generation_kwargs": [ { - "n_samples": 5000000, "n_features": 50, - "n_informative": 5, "noise": 20.0 + "n_samples": 5000000, + "n_features": 50, + "n_informative": 5, + "noise": 20.0 }, { - "n_samples": 500000, "n_features": 400, - "n_informative": 5, "noise": 40.0 + "n_samples": 500000, + "n_features": 400, + "n_informative": 5, + "noise": 40.0 }, { - "n_samples": 100000, "n_features": 2000, - "n_informative": 5, "noise": 60.0 + "n_samples": 100000, + "n_features": 2000, + "n_informative": 5, + "noise": 60.0 } ] } @@ -55,50 +61,22 @@ ] }, "sklearn lasso": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common lasso parameters", - "regression datasets" - ] + "SETS": ["sklearn-ex[cpu] implementations", "common lasso parameters", "regression datasets"] }, "sklearn elasticnet": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common elasticnet parameters", - "regression datasets" - ] + "SETS": ["sklearn-ex[cpu] implementations", "common elasticnet parameters", "regression datasets"] }, "cuml linear": { - "SETS": [ - "cuml implementation", - "common linear parameters", - "cuml L2 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common linear parameters", "cuml L2 parameters", "regression datasets"] }, "cuml ridge": { - "SETS": [ - "cuml implementation", - "common ridge parameters", - "cuml L2 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common ridge parameters", "cuml L2 parameters", "regression datasets"] }, "cuml lasso": { - "SETS": [ - "cuml implementation", - "common lasso parameters", - "cuml L1 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common lasso parameters", "cuml L1 parameters", "regression datasets"] }, "cuml elasticnet": { - "SETS": [ - "cuml implementation", - "common elasticnet parameters", - "cuml L1 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common elasticnet parameters", "cuml L1 parameters", "regression datasets"] } } } diff --git a/configs/regular/logreg.json b/configs/regular/logreg.json index cd7dfba1..9defa24a 100644 --- a/configs/regular/logreg.json +++ b/configs/regular/logreg.json @@ -45,39 +45,39 @@ "test_size": 0.95 } }, - "algorithm": {"estimator_params": {"C": 1e-6}} + "algorithm": { "estimator_params": { "C": 1e-6 } } }, { "data": { "dataset": "mnist", "split_kwargs": { "train_size": 10000, "test_size": null } }, - "algorithm": {"estimator_params": {"C": 1e-8}} + "algorithm": { "estimator_params": { "C": 1e-8 } } }, { "data": { "dataset": "susy", "split_kwargs": { "train_size": 0.1, "test_size": null } }, - "algorithm": { "estimator_params": {"C": 1e-2} } + "algorithm": { "estimator_params": { "C": 1e-2 } } }, { "data": { "dataset": "hepmass", "split_kwargs": { "train_size": 0.1, "test_size": null } }, - "algorithm": { "estimator_params": {"C": 1e-5} } + "algorithm": { "estimator_params": { "C": 1e-5 } } }, { "data": { "dataset": "cifar", "split_kwargs": { "train_size": 0.1, "test_size": null } }, - "algorithm": { "estimator_params": {"C": 1e-9} } + "algorithm": { "estimator_params": { "C": 1e-9 } } }, { "data": { "dataset": "gisette", "split_kwargs": { "train_size": 2000, "test_size": null } }, - "algorithm": { "estimator_params": {"C": 1e1} } + "algorithm": { "estimator_params": { "C": 1e1 } } } ] }, @@ -91,12 +91,7 @@ ] }, "cuml logreg": { - "SETS": [ - "cuml implementation", - "common logreg parameters", - "cuml logreg parameters", - "logreg datasets" - ] + "SETS": ["cuml implementation", "common logreg parameters", "cuml logreg parameters", "logreg datasets"] } } } diff --git a/configs/regular/pca.json b/configs/regular/pca.json index 1e46a13e..77dbdbb3 100644 --- a/configs/regular/pca.json +++ b/configs/regular/pca.json @@ -28,19 +28,10 @@ }, "TEMPLATES": { "sklearn pca": { - "SETS": [ - "sklearn-ex[cpu,gpu] implementations", - "pca parameters", - "pca datasets" - ] + "SETS": ["sklearn-ex[cpu,gpu] implementations", "pca parameters", "pca datasets"] }, "cuml pca": { - "SETS": [ - "cuml implementation", - "pca parameters", - "cuml pca parameters", - "pca datasets" - ] + "SETS": ["cuml implementation", "pca parameters", "cuml pca parameters", "pca datasets"] } } } diff --git a/configs/regular/svm.json b/configs/regular/svm.json index babfdb9a..d81a37d8 100644 --- a/configs/regular/svm.json +++ b/configs/regular/svm.json @@ -3,7 +3,7 @@ "PARAMETERS_SETS": { "svc binary datasets": [ { - "data": { "dataset": "a9a", "split_kwargs": { "train_size": 5000, "test_size": null } }, + "data": { "dataset": "a9a", "split_kwargs": { "train_size": 5000, "test_size": null } }, "algorithm": { "estimator_params": { "C": 1.0, "kernel": "linear" } } }, { @@ -63,7 +63,7 @@ ], "nusvc datasets": [ { - "data": { "dataset": "a9a", "split_kwargs": { "train_size": 5000, "test_size": null } }, + "data": { "dataset": "a9a", "split_kwargs": { "train_size": 5000, "test_size": null } }, "algorithm": { "estimator_params": { "nu": 0.1, "kernel": ["poly", "rbf"] } } }, { @@ -121,17 +121,12 @@ "multi svc implementations", "common svm parameters", "svm clsf parameters", - "svc parameters", + "svc parameters", "svc multiclass datasets" ] }, "svr": { - "SETS": [ - "svr implementations", - "common svm parameters", - "svr parameters", - "svr datasets" - ] + "SETS": ["svr implementations", "common svm parameters", "svr parameters", "svr datasets"] }, "nusvc": { "SETS": [ @@ -143,12 +138,7 @@ ] }, "nusvr": { - "SETS": [ - "nusvm implementations", - "common svm parameters", - "nusvr parameters", - "nusvr datasets" - ] + "SETS": ["nusvm implementations", "common svm parameters", "nusvr parameters", "nusvr datasets"] } } } diff --git a/configs/regular/train_test_split.json b/configs/regular/train_test_split.json index a55b6e51..f17b80d5 100644 --- a/configs/regular/train_test_split.json +++ b/configs/regular/train_test_split.json @@ -4,14 +4,7 @@ "train_test_split datasets": [ { "data": { - "dataset": [ - "road_network", - "codrnanorm", - "susy", - "sift", - "gist", - "svhn" - ] + "dataset": ["road_network", "codrnanorm", "susy", "sift", "gist", "svhn"] } }, { @@ -29,18 +22,10 @@ }, "TEMPLATES": { "sklearn train_test_split": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "train_test_split parameters", - "train_test_split datasets" - ] + "SETS": ["sklearn-ex[cpu] implementations", "train_test_split parameters", "train_test_split datasets"] }, "cuml train_test_split": { - "SETS": [ - "cuml implementation", - "train_test_split parameters", - "train_test_split datasets" - ] + "SETS": ["cuml implementation", "train_test_split parameters", "train_test_split datasets"] } } } diff --git a/configs/regular/tsne.json b/configs/regular/tsne.json index 0be7a890..206051a5 100644 --- a/configs/regular/tsne.json +++ b/configs/regular/tsne.json @@ -32,12 +32,7 @@ ] }, "cuml tsne": { - "SETS": [ - "cuml implementation", - "common tsne parameters", - "cuml parameters", - "tsne datasets" - ] + "SETS": ["cuml implementation", "common tsne parameters", "cuml parameters", "tsne datasets"] } } } diff --git a/configs/regular/xgboost_binary.json b/configs/regular/xgboost_binary.json index bd1ac2c3..fa86cd00 100644 --- a/configs/regular/xgboost_binary.json +++ b/configs/regular/xgboost_binary.json @@ -64,11 +64,7 @@ }, "TEMPLATES": { "binary classification": { - "SETS": [ - "xgboost binary classification", - "xgboost implementations", - "gbt binary classification data" - ] + "SETS": ["xgboost binary classification", "xgboost implementations", "gbt binary classification data"] } } } diff --git a/configs/regular/xgboost_multi.json b/configs/regular/xgboost_multi.json index d56e9220..d51c1be7 100644 --- a/configs/regular/xgboost_multi.json +++ b/configs/regular/xgboost_multi.json @@ -63,11 +63,7 @@ }, "TEMPLATES": { "multi classification": { - "SETS": [ - "xgboost multiclassification", - "xgboost implementations", - "gbt multi classification data" - ] + "SETS": ["xgboost multiclassification", "xgboost implementations", "gbt multi classification data"] } } } diff --git a/configs/regular/xgboost_regression.json b/configs/regular/xgboost_regression.json index 5ad5f02f..01046df0 100644 --- a/configs/regular/xgboost_regression.json +++ b/configs/regular/xgboost_regression.json @@ -94,11 +94,7 @@ }, "TEMPLATES": { "regression": { - "SETS": [ - "xgboost regression", - "xgboost implementations", - "gbt regression data" - ] + "SETS": ["xgboost regression", "xgboost implementations", "gbt regression data"] } } } diff --git a/configs/sklearn_example.json b/configs/sklearn_example.json index b840b9e6..90d68bdc 100644 --- a/configs/sklearn_example.json +++ b/configs/sklearn_example.json @@ -1,87 +1,85 @@ -{ - "PARAMETERS_SETS": { - "common": { - "algorithm": { - "library": ["sklearn", "sklearnex"], - "device": "cpu", - "sklearn_context": { "assume_finite": true } - }, - "bench": { "n_runs": 10, "time_limit": 60 } - }, - "blobs data": { - "data": { - "source": "make_blobs", - "generation_kwargs": { - "centers": 2, - "n_samples": 5000, - "n_features": [16, 64] - }, - "split_kwargs": { "ignore": true } - } - }, - "binary clsf data": { - "data": [ - { - "source": "make_classification", - "generation_kwargs": { - "n_classes": 2, - "n_samples": 2000, - "n_features": "[RANGE]pow:2:5:6", - "n_informative": "[SPECIAL_VALUE]0.5" - }, - "split_kwargs": { "test_size": 0.2 } - } - ] - }, - "regression data": { - "data": { - "source": "make_regression", - "generation_kwargs": { - "n_samples": 10000, - "n_features": 100, - "n_informative": 50, - "noise": 0.1 - }, - "split_kwargs": { "test_size": 0.2 } - } - }, - "regression algorithms": { - "algorithm": [ - { "estimator": "LinearRegression" } - ] - }, - "unsupervised algorithms": { - "algorithm": [ - { - "estimator": "KMeans", - "estimator_params": { "n_init": 10, "n_clusters": "[SPECIAL_VALUE]auto" }, - "estimator_methods": { "inference": "predict" } - }, - { - "estimator": "DBSCAN", - "estimator_params": { "eps": 3, "min_samples": 5 } - } - ] - }, - "supervised algorithms": { - "algorithm": [ - { - "estimator": ["KNeighborsClassifier", "KNeighborsRegressor"], - "estimator_params": { "n_neighbors": 5, "algorithm": "kd_tree" } - }, - { "estimator": "SVC" } - ] - } - }, - "TEMPLATES": { - "supervised": { - "SETS": ["common", "binary clsf data", "supervised algorithms"] - }, - "unsupervised": { - "SETS": ["common", "blobs data", "unsupervised algorithms"] - }, - "regression": { - "SETS": ["common", "regression data", "regression algorithms"] - } - } -} +{ + "PARAMETERS_SETS": { + "common": { + "algorithm": { + "library": ["sklearn", "sklearnex"], + "device": "cpu", + "sklearn_context": { "assume_finite": true } + }, + "bench": { "n_runs": 10, "time_limit": 60 } + }, + "blobs data": { + "data": { + "source": "make_blobs", + "generation_kwargs": { + "centers": 2, + "n_samples": 5000, + "n_features": [16, 64] + }, + "split_kwargs": { "ignore": true } + } + }, + "binary clsf data": { + "data": [ + { + "source": "make_classification", + "generation_kwargs": { + "n_classes": 2, + "n_samples": 2000, + "n_features": "[RANGE]pow:2:5:6", + "n_informative": "[SPECIAL_VALUE]0.5" + }, + "split_kwargs": { "test_size": 0.2 } + } + ] + }, + "regression data": { + "data": { + "source": "make_regression", + "generation_kwargs": { + "n_samples": 10000, + "n_features": 100, + "n_informative": 50, + "noise": 0.1 + }, + "split_kwargs": { "test_size": 0.2 } + } + }, + "regression algorithms": { + "algorithm": [{ "estimator": "LinearRegression" }] + }, + "unsupervised algorithms": { + "algorithm": [ + { + "estimator": "KMeans", + "estimator_params": { "n_init": 10, "n_clusters": "[SPECIAL_VALUE]auto" }, + "estimator_methods": { "inference": "predict" } + }, + { + "estimator": "DBSCAN", + "estimator_params": { "eps": 3, "min_samples": 5 } + } + ] + }, + "supervised algorithms": { + "algorithm": [ + { + "estimator": ["KNeighborsClassifier", "KNeighborsRegressor"], + "estimator_params": { "n_neighbors": 5, "algorithm": "kd_tree" } + }, + { "estimator": "SVC" } + ] + } + }, + "TEMPLATES": { + "supervised": { + "SETS": ["common", "binary clsf data", "supervised algorithms"] + }, + "unsupervised": { + "SETS": ["common", "blobs data", "unsupervised algorithms"] + }, + "regression": { + "SETS": ["common", "regression data", "regression algorithms"] + } + } +} diff --git a/configs/spmd/ensemble.json b/configs/spmd/ensemble.json index c36e67c6..f8c6d2f4 100644 --- a/configs/spmd/ensemble.json +++ b/configs/spmd/ensemble.json @@ -24,8 +24,10 @@ { "dataset": "road_network", "split_kwargs": { - "train_size": 200000, "test_size": null, - "shuffle": true, "random_state": 42 + "train_size": 200000, + "test_size": null, + "shuffle": true, + "random_state": 42 } }, { "dataset": "creditcard", "split_kwargs": { "train_size": 100000, "test_size": null } }, diff --git a/configs/spmd_example.json b/configs/spmd_example.json index ea8548fe..32816aca 100644 --- a/configs/spmd_example.json +++ b/configs/spmd_example.json @@ -48,7 +48,7 @@ "random forest regression": { "algorithm": { "estimator": "RandomForestRegressor", - "estimator_params": { + "estimator_params": { "criterion": "squared_error", "max_features": 1.0, "n_estimators": 10, diff --git a/configs/testing/azure-pipelines-ci.json b/configs/testing/azure-pipelines-ci.json index 7edc55d4..da5535cf 100644 --- a/configs/testing/azure-pipelines-ci.json +++ b/configs/testing/azure-pipelines-ci.json @@ -31,8 +31,7 @@ ] }, "datasets": { - "data": - [ + "data": [ { "source": "make_classification", "generation_kwargs": { @@ -76,17 +75,17 @@ { "algorithm": { "estimator": [ - "RandomForestClassifier", "ExtraTreesClassifier", - "RandomForestRegressor", "ExtraTreesRegressor" + "RandomForestClassifier", + "ExtraTreesClassifier", + "RandomForestRegressor", + "ExtraTreesRegressor" ], "estimator_params": { "n_estimators": 20 } } }, { "algorithm": { - "estimator": [ - "KNeighborsClassifier", "KNeighborsRegressor" - ], + "estimator": ["KNeighborsClassifier", "KNeighborsRegressor"], "estimator_params": { "algorithm": ["brute", "kd_tree"] } } }, @@ -121,13 +120,7 @@ }, "TEMPLATES": { "test": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common parameters", - "data formats", - "datasets", - "algorithms" - ] + "SETS": ["sklearn-ex[cpu] implementations", "common parameters", "data formats", "datasets", "algorithms"] } } } diff --git a/configs/weekly/knn.json b/configs/weekly/knn.json index dfc2864f..0e4570d0 100644 --- a/configs/weekly/knn.json +++ b/configs/weekly/knn.json @@ -2,9 +2,7 @@ "INCLUDE": ["../common/sklearn.json", "../common/knn.json"], "PARAMETERS_SETS": { "high-load brute knn classification datasets": { - "data": [ - { "dataset": "susy", "split_kwargs": { "train_size": 500000, "test_size": 500000 } } - ] + "data": [{ "dataset": "susy", "split_kwargs": { "train_size": 500000, "test_size": 500000 } }] }, "high-load kd_tree knn classification datasets": { "data": [ @@ -12,16 +10,28 @@ "source": "make_classification", "generation_kwargs": [ { - "n_classes": 5, "n_samples": 4000000, "n_features": 4, - "n_redundant": 0, "n_repeated": 0, "n_informative": 4 + "n_classes": 5, + "n_samples": 4000000, + "n_features": 4, + "n_redundant": 0, + "n_repeated": 0, + "n_informative": 4 }, { - "n_classes": 5, "n_samples": 2000000, "n_features": 8, - "n_redundant": 2, "n_repeated": 2, "n_informative": 4 + "n_classes": 5, + "n_samples": 2000000, + "n_features": 8, + "n_redundant": 2, + "n_repeated": 2, + "n_informative": 4 }, { - "n_classes": 5, "n_samples": 1000000, "n_features": 16, - "n_redundant": 6, "n_repeated": 6, "n_informative": 4 + "n_classes": 5, + "n_samples": 1000000, + "n_features": 16, + "n_redundant": 6, + "n_repeated": 6, + "n_informative": 4 } ], "split_kwargs": { "ignore": true } @@ -29,15 +39,13 @@ ] }, "high-load brute knn regression datasets": { - "data": [ - { "dataset": "year_prediction_msd", "split_kwargs": { "ignore": true } } - ] + "data": [{ "dataset": "year_prediction_msd", "split_kwargs": { "ignore": true } }] }, "high-load kd_tree knn regression datasets": { "data": [ { "source": "make_regression", - "generation_kwargs":[ + "generation_kwargs": [ { "n_samples": 10000000, "n_features": 4, "noise": 1.0 }, { "n_samples": 4000000, "n_features": 8, "noise": 1.5 }, { "n_samples": 2000000, "n_features": 16, "noise": 2.0 } diff --git a/configs/weekly/linear_model.json b/configs/weekly/linear_model.json index 9cb0bd58..2913ae04 100644 --- a/configs/weekly/linear_model.json +++ b/configs/weekly/linear_model.json @@ -8,28 +8,29 @@ "split_kwargs": { "train_size": 0.5, "test_size": 0.5 }, "generation_kwargs": [ { - "n_samples": 20000000, "n_features": 50, - "n_informative": 5, "noise": 20.0 + "n_samples": 20000000, + "n_features": 50, + "n_informative": 5, + "noise": 20.0 }, { - "n_samples": 2000000, "n_features": 400, - "n_informative": 5, "noise": 40.0 + "n_samples": 2000000, + "n_features": 400, + "n_informative": 5, + "noise": 40.0 }, { - "n_samples": 200000, "n_features": 5000, - "n_informative": 5, "noise": 80.0 + "n_samples": 200000, + "n_features": 5000, + "n_informative": 5, + "noise": 80.0 } ] } }, { "data": { - "dataset": [ - "epsilon", - "yolanda", - "hepmass", - "susy" - ], + "dataset": ["epsilon", "yolanda", "hepmass", "susy"], "preprocessing_kwargs": { "normalize": true }, @@ -56,50 +57,22 @@ ] }, "sklearn lasso": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common lasso parameters", - "regression datasets" - ] + "SETS": ["sklearn-ex[cpu] implementations", "common lasso parameters", "regression datasets"] }, "sklearn elasticnet": { - "SETS": [ - "sklearn-ex[cpu] implementations", - "common elasticnet parameters", - "regression datasets" - ] + "SETS": ["sklearn-ex[cpu] implementations", "common elasticnet parameters", "regression datasets"] }, "cuml linear": { - "SETS": [ - "cuml implementation", - "common linear parameters", - "cuml L2 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common linear parameters", "cuml L2 parameters", "regression datasets"] }, "cuml ridge": { - "SETS": [ - "cuml implementation", - "common ridge parameters", - "cuml L2 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common ridge parameters", "cuml L2 parameters", "regression datasets"] }, "cuml lasso": { - "SETS": [ - "cuml implementation", - "common lasso parameters", - "cuml L1 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common lasso parameters", "cuml L1 parameters", "regression datasets"] }, "cuml elasticnet": { - "SETS": [ - "cuml implementation", - "common elasticnet parameters", - "cuml L1 parameters", - "regression datasets" - ] + "SETS": ["cuml implementation", "common elasticnet parameters", "cuml L1 parameters", "regression datasets"] } } } diff --git a/configs/weekly/logreg.json b/configs/weekly/logreg.json index d4576a4e..7f4fa76f 100644 --- a/configs/weekly/logreg.json +++ b/configs/weekly/logreg.json @@ -1,9 +1,7 @@ { "INCLUDE": ["../common/sklearn.json", "../common/logreg.json", "../regular/logreg.json"], "PARAMETERS_SETS": { - "high-load logreg datasets": [ - { "data": { "split_kwargs": { "ignore": true } } } - ] + "high-load logreg datasets": [{ "data": { "split_kwargs": { "ignore": true } } }] }, "TEMPLATES": { "sklearn logreg": { diff --git a/configs/weekly/pca.json b/configs/weekly/pca.json index e88b9592..1fb41feb 100644 --- a/configs/weekly/pca.json +++ b/configs/weekly/pca.json @@ -23,19 +23,10 @@ }, "TEMPLATES": { "sklearn pca": { - "SETS": [ - "sklearn-ex[cpu,gpu] implementations", - "pca parameters", - "high-load pca datasets" - ] + "SETS": ["sklearn-ex[cpu,gpu] implementations", "pca parameters", "high-load pca datasets"] }, "cuml pca": { - "SETS": [ - "cuml implementation", - "pca parameters", - "cuml pca parameters", - "high-load pca datasets" - ] + "SETS": ["cuml implementation", "pca parameters", "cuml pca parameters", "high-load pca datasets"] } } } diff --git a/configs/weekly/svm.json b/configs/weekly/svm.json index 93f73961..b3296afc 100644 --- a/configs/weekly/svm.json +++ b/configs/weekly/svm.json @@ -3,7 +3,7 @@ "PARAMETERS_SETS": { "high-load svc binary datasets": [ { - "data": { "dataset": "a9a", "split_kwargs": { "ignore": true } }, + "data": { "dataset": "a9a", "split_kwargs": { "ignore": true } }, "algorithm": { "estimator_params": { "C": 1.0, "kernel": "linear" } } }, { @@ -35,7 +35,10 @@ ], "high-load svr datasets": [ { - "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 50000, "test_size": null } }, + "data": { + "dataset": "year_prediction_msd", + "split_kwargs": { "train_size": 50000, "test_size": null } + }, "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } } }, { @@ -57,7 +60,7 @@ ], "high-load nusvc datasets": [ { - "data": { "dataset": "a9a", "split_kwargs": { "ignore": true } }, + "data": { "dataset": "a9a", "split_kwargs": { "ignore": true } }, "algorithm": { "estimator_params": { "nu": 0.1, "kernel": ["poly", "rbf"] } } }, { @@ -71,7 +74,10 @@ ], "high-load nusvr datasets": [ { - "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 50000, "test_size": null } }, + "data": { + "dataset": "year_prediction_msd", + "split_kwargs": { "train_size": 50000, "test_size": null } + }, "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } } }, { @@ -116,12 +122,7 @@ ] }, "svr": { - "SETS": [ - "svr implementations", - "common svm parameters", - "svr parameters", - "high-load svr datasets" - ] + "SETS": ["svr implementations", "common svm parameters", "svr parameters", "high-load svr datasets"] }, "nusvc": { "SETS": [ @@ -133,12 +134,7 @@ ] }, "nusvr": { - "SETS": [ - "nusvm implementations", - "common svm parameters", - "nusvr parameters", - "high-load nusvr datasets" - ] + "SETS": ["nusvm implementations", "common svm parameters", "nusvr parameters", "high-load nusvr datasets"] } } } diff --git a/configs/weekly/train_test_split.json b/configs/weekly/train_test_split.json index 4cd5e5a5..cbab6330 100644 --- a/configs/weekly/train_test_split.json +++ b/configs/weekly/train_test_split.json @@ -4,18 +4,13 @@ "high-load train_test_split datasets": [ { "data": { - "dataset": [ - "airline_depdelay", - "higgs" - ] + "dataset": ["airline_depdelay", "higgs"] } }, { "data": { "source": "make_regression", - "generation_kwargs": [ - { "n_samples": 200000000, "n_features": 5 } - ] + "generation_kwargs": [{ "n_samples": 200000000, "n_features": 5 }] } } ] @@ -29,11 +24,7 @@ ] }, "cuml train_test_split": { - "SETS": [ - "cuml implementation", - "train_test_split parameters", - "high-load train_test_split datasets" - ] + "SETS": ["cuml implementation", "train_test_split parameters", "high-load train_test_split datasets"] } } } diff --git a/configs/weekly/tsne.json b/configs/weekly/tsne.json index e743ddea..354cd2fa 100644 --- a/configs/weekly/tsne.json +++ b/configs/weekly/tsne.json @@ -32,12 +32,7 @@ ] }, "cuml tsne": { - "SETS": [ - "cuml implementation", - "common tsne parameters", - "cuml parameters", - "high-load tsne datasets" - ] + "SETS": ["cuml implementation", "common tsne parameters", "cuml parameters", "high-load tsne datasets"] } } } diff --git a/configs/weekly/xgboost_binary.json b/configs/weekly/xgboost_binary.json index 96bc5e82..b454fd90 100644 --- a/configs/weekly/xgboost_binary.json +++ b/configs/weekly/xgboost_binary.json @@ -18,11 +18,7 @@ }, "TEMPLATES": { "regression": { - "SETS": [ - "xgboost binary classification", - "xgboost implementations", - "xgboost data" - ] + "SETS": ["xgboost binary classification", "xgboost implementations", "xgboost data"] } } } diff --git a/configs/weekly/xgboost_regression.json b/configs/weekly/xgboost_regression.json index 7ead5652..c56c21be 100644 --- a/configs/weekly/xgboost_regression.json +++ b/configs/weekly/xgboost_regression.json @@ -2,49 +2,47 @@ "INCLUDE": ["../common/xgboost.json"], "PARAMETERS_SETS": { "xgboost data": [ - { - "data": { - "source": "make_regression", - "generation_kwargs": [ - { - "n_samples": 2097152, "n_features": 256 - }, - { - "n_samples": 2097152, "n_features": 32 - } - ], - "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } - }, - "algorithm": { - "estimator_params": [ - { - "n_estimators": 128, - "max_depth": 8 - }, - { - "n_estimators": 128, - "max_depth": 3 - }, - { - "n_estimators": 32, - "max_depth": 8 - }, - { - "n_estimators": 32, - "max_depth": 3 - } - ] - } - } + { + "data": { + "source": "make_regression", + "generation_kwargs": [ + { + "n_samples": 2097152, + "n_features": 256 + }, + { + "n_samples": 2097152, + "n_features": 32 + } + ], + "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } + }, + "algorithm": { + "estimator_params": [ + { + "n_estimators": 128, + "max_depth": 8 + }, + { + "n_estimators": 128, + "max_depth": 3 + }, + { + "n_estimators": 32, + "max_depth": 8 + }, + { + "n_estimators": 32, + "max_depth": 3 + } + ] + } + } ] }, "TEMPLATES": { "regression": { - "SETS": [ - "xgboost regression", - "xgboost implementations", - "xgboost data" - ] + "SETS": ["xgboost regression", "xgboost implementations", "xgboost data"] } } } diff --git a/configs/xgboost_example.json b/configs/xgboost_example.json index de06d647..fce53afd 100644 --- a/configs/xgboost_example.json +++ b/configs/xgboost_example.json @@ -1,45 +1,45 @@ -{ - "PARAMETERS_SETS": { - "common": { - "algorithm": { - "device": "cpu", - "library": "xgboost", - "estimator_params": { "max_depth": [3, 5] }, - "enable_modelbuilders": [true, false] - }, - "bench": { "n_runs": 5, "time_limit": 60 } - }, - "classification": { - "algorithm": { - "estimator": "XGBClassifier", - "estimator_params": { "scale_pos_weight": "[SPECIAL_VALUE]auto" } - }, - "data": { - "source": "make_classification", - "generation_kwargs": { - "n_classes": [2, 4], - "n_samples": 1250, - "n_features": 8, - "n_informative": "[SPECIAL_VALUE]0.75" - }, - "split_kwargs": { "test_size": 0.2 } - } - }, - "regression": { - "algorithm": { "estimator": "XGBRegressor" }, - "data": { - "source": "make_regression", - "generation_kwargs": { "n_samples": 1000, "n_features": 8 }, - "split_kwargs": { "ignore": true } - } - } - }, - "TEMPLATES": { - "classification": { - "SETS": ["common", "classification"] - }, - "regression": { - "SETS": ["common", "regression"] - } - } -} +{ + "PARAMETERS_SETS": { + "common": { + "algorithm": { + "device": "cpu", + "library": "xgboost", + "estimator_params": { "max_depth": [3, 5] }, + "enable_modelbuilders": [true, false] + }, + "bench": { "n_runs": 5, "time_limit": 60 } + }, + "classification": { + "algorithm": { + "estimator": "XGBClassifier", + "estimator_params": { "scale_pos_weight": "[SPECIAL_VALUE]auto" } + }, + "data": { + "source": "make_classification", + "generation_kwargs": { + "n_classes": [2, 4], + "n_samples": 1250, + "n_features": 8, + "n_informative": "[SPECIAL_VALUE]0.75" + }, + "split_kwargs": { "test_size": 0.2 } + } + }, + "regression": { + "algorithm": { "estimator": "XGBRegressor" }, + "data": { + "source": "make_regression", + "generation_kwargs": { "n_samples": 1000, "n_features": 8 }, + "split_kwargs": { "ignore": true } + } + } + }, + "TEMPLATES": { + "classification": { + "SETS": ["common", "classification"] + }, + "regression": { + "SETS": ["common", "regression"] + } + } +}