diff --git a/Auto-SKLearn_AutoML/Classification.ipynb b/Auto-SKLearn_AutoML/Classification.ipynb index 0585d146407718f0c1f47c6321d846e3922e5a29..14c5640829dd24121a4c03d5e481f86e4940e8c5 100644 --- a/Auto-SKLearn_AutoML/Classification.ipynb +++ b/Auto-SKLearn_AutoML/Classification.ipynb @@ -12,26 +12,29 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "id": "c69433ce", "metadata": {}, "outputs": [], "source": [ + "# imports\n", "from pprint import pprint\n", "\n", "import sklearn.datasets\n", "import sklearn.metrics\n", + "import pickle\n", "\n", "import autosklearn.classification" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "2b1e1930", "metadata": {}, "outputs": [], "source": [ + "# split the dataset\n", "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n", "X_train, X_test, y_train, y_test = \\\n", " sklearn.model_selection.train_test_split(X, y, random_state=1)" @@ -39,11 +42,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "15e5f821", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/mnt/home/buckl113/anaconda3/envs/auto_sklearn_env/lib/python3.9/site-packages/autosklearn/metalearning/metalearning/meta_base.py:68: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " self.metafeatures = self.metafeatures.append(metafeatures)\n", + "/mnt/home/buckl113/anaconda3/envs/auto_sklearn_env/lib/python3.9/site-packages/autosklearn/metalearning/metalearning/meta_base.py:72: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " self.algorithm_runs[metric].append(runs)\n" + ] + }, + { + "data": { + "text/plain": [ + "AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120,\n", + " tmp_folder='/tmp/autosklearn_classification_example_tmp')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "# Fit the classifier\n", "automl = autosklearn.classification.AutoSklearnClassifier(\n", " time_left_for_this_task=120,\n", " per_run_time_limit=30,\n", @@ -54,34 +80,406 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "2d4e4d9f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " rank ensemble_weight type cost duration\n", + "model_id \n", + "7 1 0.08 extra_trees 0.014184 2.037889\n", + "16 2 0.04 gradient_boosting 0.021277 1.420893\n", + "2 3 0.02 random_forest 0.028369 2.230037\n", + "3 4 0.02 mlp 0.028369 2.112077\n", + "26 5 0.04 extra_trees 0.028369 2.946061\n", + "6 6 0.06 mlp 0.028369 1.590191\n", + "22 7 0.04 gradient_boosting 0.028369 1.567744\n", + "10 8 0.06 random_forest 0.028369 2.733248\n", + "11 9 0.04 random_forest 0.028369 3.057115\n", + "19 10 0.02 extra_trees 0.028369 3.806663\n", + "13 11 0.04 gradient_boosting 0.028369 2.005229\n", + "14 12 0.04 mlp 0.028369 3.018905\n", + "18 13 0.02 random_forest 0.035461 2.909402\n", + "17 14 0.02 gradient_boosting 0.035461 2.358046\n", + "12 15 0.02 gradient_boosting 0.035461 2.129865\n", + "5 16 0.10 random_forest 0.035461 2.896027\n", + "9 17 0.02 extra_trees 0.042553 2.569632\n", + "25 18 0.02 adaboost 0.042553 2.708276\n", + "27 19 0.04 extra_trees 0.042553 2.473708\n", + "23 20 0.06 mlp 0.049645 3.621525\n", + "32 21 0.04 extra_trees 0.063830 2.219691\n", + "24 22 0.06 random_forest 0.070922 2.306794\n", + "20 23 0.02 passive_aggressive 0.078014 0.973551\n", + "30 24 0.02 adaboost 0.078014 1.630544\n", + "29 25 0.06 gaussian_nb 0.141844 1.153570\n" + ] + } + ], "source": [ + "# Different Models run by autosklearn\n", "print(automl.leaderboard())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "72e580e7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{ 2: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3ac8f13d0>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ac688a90>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3ac8f1310>,\n", + " 'model_id': 2,\n", + " 'rank': 3,\n", + " 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,\n", + " random_state=1, warm_start=True)},\n", + " 3: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3ac95b820>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ac6541f0>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3ac95b6a0>,\n", + " 'model_id': 3,\n", + " 'rank': 4,\n", + " 'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,\n", + " beta_2=0.9, early_stopping=True,\n", + " hidden_layer_sizes=(115, 115, 115),\n", + " learning_rate_init=0.00018009776276177523, max_iter=32,\n", + " n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},\n", + " 5: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b0500820>,\n", + " 'cost': 0.03546099290780147,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3affaeeb0>,\n", + " 'ensemble_weight': 0.1,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b034d700>,\n", + " 'model_id': 5,\n", + " 'rank': 13,\n", + " 'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,\n", + " n_estimators=512, n_jobs=1, random_state=1,\n", + " warm_start=True)},\n", + " 6: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3af75d4c0>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ac8477c0>,\n", + " 'ensemble_weight': 0.06,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3af75d3a0>,\n", + " 'model_id': 6,\n", + " 'rank': 5,\n", + " 'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,\n", + " early_stopping=True, hidden_layer_sizes=(101, 101),\n", + " learning_rate_init=0.0004684917334431039, max_iter=32,\n", + " n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},\n", + " 7: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3ac829e50>,\n", + " 'cost': 0.014184397163120588,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ac1c6c40>,\n", + " 'ensemble_weight': 0.08,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3ac829ee0>,\n", + " 'model_id': 7,\n", + " 'rank': 1,\n", + " 'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,\n", + " n_estimators=512, n_jobs=1, random_state=1,\n", + " warm_start=True)},\n", + " 9: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3afaa6100>,\n", + " 'cost': 0.04255319148936165,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b0717b80>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b0809c10>,\n", + " 'model_id': 9,\n", + " 'rank': 17,\n", + " 'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=512,\n", + " n_jobs=1, random_state=1, warm_start=True)},\n", + " 10: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3af765670>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ac75f1c0>,\n", + " 'ensemble_weight': 0.06,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3af765550>,\n", + " 'model_id': 10,\n", + " 'rank': 6,\n", + " 'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=4, min_samples_split=6,\n", + " n_estimators=512, n_jobs=1, random_state=1,\n", + " warm_start=True)},\n", + " 11: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3af85dac0>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ae0ae9a0>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3af85d940>,\n", + " 'model_id': 11,\n", + " 'rank': 7,\n", + " 'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=23, min_samples_leaf=7,\n", + " n_estimators=512, n_jobs=1, random_state=1,\n", + " warm_start=True)},\n", + " 12: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b04ebca0>,\n", + " 'cost': 0.03546099290780147,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b0045ca0>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b04eb820>,\n", + " 'model_id': 12,\n", + " 'rank': 14,\n", + " 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,\n", + " l2_regularization=0.005326508887463406,\n", + " learning_rate=0.060800813211425456, max_iter=512,\n", + " max_leaf_nodes=6, min_samples_leaf=5,\n", + " n_iter_no_change=5, random_state=1,\n", + " validation_fraction=None, warm_start=True)},\n", + " 13: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3afa53d60>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3af75db20>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3afa53a60>,\n", + " 'model_id': 13,\n", + " 'rank': 8,\n", + " 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False,\n", + " l2_regularization=1.0647401999412075e-10,\n", + " learning_rate=0.08291320147381159, max_iter=512,\n", + " max_leaf_nodes=39, n_iter_no_change=0,\n", + " random_state=1, validation_fraction=None,\n", + " warm_start=True)},\n", + " 14: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3affae4f0>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3af7a2340>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3afc8fc10>,\n", + " 'model_id': 14,\n", + " 'rank': 9,\n", + " 'sklearn_classifier': MLPClassifier(activation='tanh', alpha=2.5550223982458062e-06, beta_1=0.999,\n", + " beta_2=0.9, hidden_layer_sizes=(54, 54, 54),\n", + " learning_rate_init=0.00027271287919467994, max_iter=256,\n", + " n_iter_no_change=32, random_state=1, validation_fraction=0.0,\n", + " verbose=0, warm_start=True)},\n", + " 16: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3ac655700>,\n", + " 'cost': 0.021276595744680882,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3ac1c60a0>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3ac6552b0>,\n", + " 'model_id': 16,\n", + " 'rank': 2,\n", + " 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,\n", + " l2_regularization=3.387912939529945e-10,\n", + " learning_rate=0.30755227194768237, max_iter=128,\n", + " max_leaf_nodes=60, min_samples_leaf=39,\n", + " n_iter_no_change=18, random_state=1,\n", + " validation_fraction=None, warm_start=True)},\n", + " 17: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b09a1e50>,\n", + " 'cost': 0.03546099290780147,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b02b41f0>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b072a580>,\n", + " 'model_id': 17,\n", + " 'rank': 15,\n", + " 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,\n", + " l2_regularization=0.4635442279519353,\n", + " learning_rate=0.09809681787962342, max_iter=512,\n", + " max_leaf_nodes=328, min_samples_leaf=2,\n", + " n_iter_no_change=2, random_state=1,\n", + " validation_fraction=None, warm_start=True)},\n", + " 18: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b0782700>,\n", + " 'cost': 0.03546099290780147,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b045f0d0>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b09a1610>,\n", + " 'model_id': 18,\n", + " 'rank': 16,\n", + " 'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, n_estimators=512,\n", + " n_jobs=1, random_state=1, warm_start=True)},\n", + " 19: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3aff3ceb0>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3af9b7310>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3aff3ccd0>,\n", + " 'model_id': 19,\n", + " 'rank': 10,\n", + " 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=448, min_samples_leaf=2,\n", + " min_samples_split=20, n_estimators=512, n_jobs=1,\n", + " random_state=1, warm_start=True)},\n", + " 20: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b13f13a0>,\n", + " 'cost': 0.07801418439716312,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b0f96070>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b13f10a0>,\n", + " 'model_id': 20,\n", + " 'rank': 23,\n", + " 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=32, random_state=1,\n", + " tol=0.0002600768160857831, warm_start=True)},\n", + " 22: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b018d370>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3afc8f250>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3affd3370>,\n", + " 'model_id': 22,\n", + " 'rank': 11,\n", + " 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,\n", + " l2_regularization=8.057778875694463e-05,\n", + " learning_rate=0.09179220974965213, max_iter=256,\n", + " max_leaf_nodes=200, n_iter_no_change=18,\n", + " random_state=1,\n", + " validation_fraction=0.14295295806077554,\n", + " warm_start=True)},\n", + " 23: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b0f28160>,\n", + " 'cost': 0.049645390070921946,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b076e8b0>,\n", + " 'ensemble_weight': 0.06,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b0f15b20>,\n", + " 'model_id': 23,\n", + " 'rank': 20,\n", + " 'sklearn_classifier': MLPClassifier(alpha=0.02847755502162456, beta_1=0.999, beta_2=0.9,\n", + " hidden_layer_sizes=(123, 123),\n", + " learning_rate_init=0.000421568792103947, max_iter=256,\n", + " n_iter_no_change=32, random_state=1, validation_fraction=0.0,\n", + " verbose=0, warm_start=True)},\n", + " 24: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b12175e0>,\n", + " 'cost': 0.07092198581560283,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b0eacd60>,\n", + " 'ensemble_weight': 0.06,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b1195910>,\n", + " 'model_id': 24,\n", + " 'rank': 22,\n", + " 'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=16, n_estimators=512,\n", + " n_jobs=1, random_state=1, warm_start=True)},\n", + " 25: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b0d9c820>,\n", + " 'cost': 0.04255319148936165,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b09421c0>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3afab93d0>,\n", + " 'model_id': 25,\n", + " 'rank': 18,\n", + " 'sklearn_classifier': AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=3),\n", + " learning_rate=0.046269426995092074, n_estimators=406,\n", + " random_state=1)},\n", + " 26: { 'balancing': Balancing(random_state=1),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b01788b0>,\n", + " 'cost': 0.028368794326241176,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3afeae8e0>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b01784f0>,\n", + " 'model_id': 26,\n", + " 'rank': 12,\n", + " 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=414, min_samples_leaf=2,\n", + " min_samples_split=19, n_estimators=512, n_jobs=1,\n", + " random_state=1, warm_start=True)},\n", + " 27: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b0d6b9d0>,\n", + " 'cost': 0.04255319148936165,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b0b09b80>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b0d626d0>,\n", + " 'model_id': 27,\n", + " 'rank': 19,\n", + " 'sklearn_classifier': ExtraTreesClassifier(bootstrap=True, criterion='entropy', max_features=24,\n", + " min_samples_leaf=20, min_samples_split=5, n_estimators=512,\n", + " n_jobs=1, random_state=1, warm_start=True)},\n", + " 29: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b17611c0>,\n", + " 'cost': 0.14184397163120566,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b12ac2e0>,\n", + " 'ensemble_weight': 0.06,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b154bdc0>,\n", + " 'model_id': 29,\n", + " 'rank': 25,\n", + " 'sklearn_classifier': GaussianNB()},\n", + " 30: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b15c7ee0>,\n", + " 'cost': 0.07801418439716312,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3b10e40a0>,\n", + " 'ensemble_weight': 0.02,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b159f4f0>,\n", + " 'model_id': 30,\n", + " 'rank': 24,\n", + " 'sklearn_classifier': AdaBoostClassifier(algorithm='SAMME',\n", + " base_estimator=DecisionTreeClassifier(max_depth=7),\n", + " learning_rate=0.02920859465461962, n_estimators=96,\n", + " random_state=1)},\n", + " 32: { 'balancing': Balancing(random_state=1, strategy='weighting'),\n", + " 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x2ad3b107ad30>,\n", + " 'cost': 0.06382978723404253,\n", + " 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x2ad3adf57af0>,\n", + " 'ensemble_weight': 0.04,\n", + " 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x2ad3b107a970>,\n", + " 'model_id': 32,\n", + " 'rank': 21,\n", + " 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=1, min_samples_leaf=17,\n", + " min_samples_split=4, n_estimators=512, n_jobs=1,\n", + " random_state=1, warm_start=True)}}\n" + ] + } + ], "source": [ + "# Show the different models\n", "pprint(automl.show_models(), indent=4)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "027039cd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy score: 0.951048951048951\n" + ] + } + ], "source": [ + "# Predict the test labels\n", "predictions = automl.predict(X_test)\n", "print(\"Accuracy score:\", sklearn.metrics.accuracy_score(y_test, predictions))" ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "acd372ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Export the model with the highest rank\n", + "clf = automl.show_models()[7]['sklearn_classifier']\n", + "pickle.dump(clf,open('model.pickle','wb'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3324782", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "021b7159", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {