diff --git a/tpot_tutorial.ipynb b/tpot_tutorial.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7e28f42e8f6f5aa231b349623253761ecdb3d430
--- /dev/null
+++ b/tpot_tutorial.ipynb
@@ -0,0 +1,958 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "29cb46d7",
+   "metadata": {},
+   "source": [
+    "Install command\n",
+    "%pip install tpot"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60aa22cd",
+   "metadata": {},
+   "source": [
+    "What is TPOT?\n",
+    "* TPOT is an automated machine learning tool that utilizes genetic programming to optimize machine learning pipelines. It is essentially an assistant for tree based pipeline optimization. <br/>\n",
+    "<br/>\n",
+    "\n",
+    "What/Who is it good for? \n",
+    "* TPOT rids you of having to do the most tedious portion of machine learning. It does this by exploring many varieties of pipelines and finding the best one for the data you are working with. \n",
+    "* This AutoML tool is an unbeatable asset and is a real bargain if you want to get a classification accuracy which is very competitive. Over and above that, this tool can identify artificial feature constructors that can enhance the classification accuracy in a very demanding way by identifying novel pipeline operators. The operators of TPOT are chained together to develop a series of operations acting on the given dataset <br/>\n",
+    "<br/>\n",
+    "\n",
+    "How to Install\n",
+    "* We used the call `pip install tpot` in order to install TPOT. It is noted to have PyTorch installed as well, but it is not necessary. The installation for PyTorch is `conda install pytorch` <br/>\n",
+    "<br/>\n",
+    "\n",
+    "Remain target to class â€“ important step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "022ab1ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tpot import TPOTClassifier\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.datasets import load_iris\n",
+    "import pandas as pd \n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2898aaa9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([[5.1, 3.5, 1.4, 0.2],\n",
+       "        [4.9, 3. , 1.4, 0.2],\n",
+       "        [4.7, 3.2, 1.3, 0.2],\n",
+       "        [4.6, 3.1, 1.5, 0.2],\n",
+       "        [5. , 3.6, 1.4, 0.2]]),\n",
+       " array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+       "        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+       "        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "iris = load_iris()\n",
+    "iris.data[0:5], iris.target"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "1d1ac27a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((112, 4), (38, 4), (112,), (38,))"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.75, test_size=0.25)\n",
+    "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "015515d0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Warning: xgboost.XGBClassifier is not available and will not be used by TPOT.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "31f16c0878af4057a2347e6042c56a75",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Optimization Progress:   0%|          | 0/100 [00:00<?, ?pipeline/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "2.41 minutes have elapsed. TPOT will close down.\n",
+      "TPOT closed during evaluation in one generation.\n",
+      "WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n",
+      "\n",
+      "\n",
+      "TPOT closed prematurely. Will use the current best pipeline.\n",
+      "\n",
+      "Best pipeline: MLPClassifier(input_matrix, alpha=0.001, learning_rate_init=0.001)\n",
+      "0.9736842105263158\n"
+     ]
+    }
+   ],
+   "source": [
+    "tpot = TPOTClassifier(verbosity=2, max_time_mins=2)\n",
+    "tpot.fit(X_train, y_train)\n",
+    "print(tpot.score(X_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fedcae2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tpot.export('tpot_iris_pipeline.py')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "f2ac0eda",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PassengerId</th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Ticket</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Cabin</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Braund, Mr. Owen Harris</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>A/5 21171</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>PC 17599</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C85</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Heikkinen, Miss. Laina</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>STON/O2. 3101282</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>113803</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>C123</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Allen, Mr. William Henry</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>373450</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   PassengerId  Survived  Pclass  \\\n",
+       "0            1         0       3   \n",
+       "1            2         1       1   \n",
+       "2            3         1       3   \n",
+       "3            4         1       1   \n",
+       "4            5         0       3   \n",
+       "\n",
+       "                                                Name     Sex   Age  SibSp  \\\n",
+       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
+       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
+       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
+       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
+       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
+       "\n",
+       "   Parch            Ticket     Fare Cabin Embarked  \n",
+       "0      0         A/5 21171   7.2500   NaN        S  \n",
+       "1      0          PC 17599  71.2833   C85        C  \n",
+       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
+       "3      0            113803  53.1000  C123        S  \n",
+       "4      0            373450   8.0500   NaN        S  "
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "titanic = pd.read_csv('titanic_train.csv')\n",
+    "titanic.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "30eeb3aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#rename target to class\n",
+    "titanic.rename(columns={'Survived': 'class'}, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "bcc561a3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of levels in category 'Name': \b 891.00 \n",
+      "Number of levels in category 'Sex': \b 2.00 \n",
+      "Number of levels in category 'Ticket': \b 681.00 \n",
+      "Number of levels in category 'Cabin': \b 148.00 \n",
+      "Number of levels in category 'Embarked': \b 4.00 \n"
+     ]
+    }
+   ],
+   "source": [
+    "for cat in ['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked']:\n",
+    "    print(\"Number of levels in category '{0}': \\b {1:2.2f} \".format(cat, titanic[cat].unique().size))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "5be7251f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Levels for catgeory 'Sex': ['male' 'female']\n",
+      "Levels for catgeory 'Embarked': ['S' 'C' 'Q' nan]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for cat in ['Sex', 'Embarked']:\n",
+    "    print(\"Levels for catgeory '{0}': {1}\".format(cat, titanic[cat].unique()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "8bb50fa8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "titanic['Sex'] = titanic['Sex'].map({'male':0,'female':1})\n",
+    "titanic['Embarked'] = titanic['Embarked'].map({'S':0,'C':1,'Q':2})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "11c06d01",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PassengerId    False\n",
+       "class          False\n",
+       "Pclass         False\n",
+       "Name           False\n",
+       "Sex            False\n",
+       "Age            False\n",
+       "SibSp          False\n",
+       "Parch          False\n",
+       "Ticket         False\n",
+       "Fare           False\n",
+       "Cabin          False\n",
+       "Embarked       False\n",
+       "dtype: bool"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "titanic = titanic.fillna(-999)\n",
+    "pd.isnull(titanic).any()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "aa017a2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import MultiLabelBinarizer\n",
+    "mlb = MultiLabelBinarizer()\n",
+    "CabinTrans = mlb.fit_transform([{str(val)} for val in titanic['Cabin'].values])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "34567d07",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1, 0, 0, ..., 0, 0, 0],\n",
+       "       [0, 0, 0, ..., 0, 0, 0],\n",
+       "       [1, 0, 0, ..., 0, 0, 0],\n",
+       "       ...,\n",
+       "       [1, 0, 0, ..., 0, 0, 0],\n",
+       "       [0, 0, 0, ..., 0, 0, 0],\n",
+       "       [1, 0, 0, ..., 0, 0, 0]])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "CabinTrans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "f31db644",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "titanic_new = titanic.drop(['Name','Ticket','Cabin','class'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "e8ccd33c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert (len(titanic['Cabin'].unique()) == len(mlb.classes_)), \"Not Equal\" #check correct encoding done"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "594776f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "titanic_new = np.hstack((titanic_new.values,CabinTrans))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "e8d47aef",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.isnan(titanic_new).any()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "6fb6b7df",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "156"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "titanic_new[0].size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "09fe6803",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "titanic_class = titanic['class'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "a14bb5fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(668, 223)"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "training_indices, validation_indices = training_indices, testing_indices = train_test_split(titanic.index, stratify = titanic_class, train_size=0.75, test_size=0.25)\n",
+    "training_indices.size, validation_indices.size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "227863a0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Warning: xgboost.XGBClassifier is not available and will not be used by TPOT.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Optimization Progress:   0%|          | 0/40 [00:00<?, ?pipeline/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Generation 1 - Current best internal CV score: 0.8023903041185052\n",
+      "\n",
+      "2.03 minutes have elapsed. TPOT will close down.\n",
+      "TPOT closed during evaluation in one generation.\n",
+      "WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n",
+      "\n",
+      "\n",
+      "TPOT closed prematurely. Will use the current best pipeline.\n",
+      "\n",
+      "Best pipeline: GradientBoostingClassifier(input_matrix, learning_rate=0.1, max_depth=6, max_features=0.3, min_samples_leaf=20, min_samples_split=15, n_estimators=100, subsample=0.45)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TPOTClassifier(max_eval_time_mins=0.04, max_time_mins=2, population_size=40,\n",
+       "               verbosity=2)"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tpot = TPOTClassifier(verbosity=2, max_time_mins=2, max_eval_time_mins=0.04, population_size=40)\n",
+    "tpot.fit(titanic_new[training_indices], titanic_class[training_indices])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "ca18f35b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8251121076233184"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tpot.score(titanic_new[validation_indices], titanic.loc[validation_indices, 'class'].values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "4d710e07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tpot.export('tpot_titanic_pipeline.py')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "d7ff45ed",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PassengerId</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>418.000000</td>\n",
+       "      <td>418.000000</td>\n",
+       "      <td>332.000000</td>\n",
+       "      <td>418.000000</td>\n",
+       "      <td>418.000000</td>\n",
+       "      <td>417.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>1100.500000</td>\n",
+       "      <td>2.265550</td>\n",
+       "      <td>30.272590</td>\n",
+       "      <td>0.447368</td>\n",
+       "      <td>0.392344</td>\n",
+       "      <td>35.627188</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>120.810458</td>\n",
+       "      <td>0.841838</td>\n",
+       "      <td>14.181209</td>\n",
+       "      <td>0.896760</td>\n",
+       "      <td>0.981429</td>\n",
+       "      <td>55.907576</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>892.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.170000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>996.250000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>21.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>7.895800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>1100.500000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>27.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>14.454200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>1204.750000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>39.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>31.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>1309.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>76.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>512.329200</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       PassengerId      Pclass         Age       SibSp       Parch        Fare\n",
+       "count   418.000000  418.000000  332.000000  418.000000  418.000000  417.000000\n",
+       "mean   1100.500000    2.265550   30.272590    0.447368    0.392344   35.627188\n",
+       "std     120.810458    0.841838   14.181209    0.896760    0.981429   55.907576\n",
+       "min     892.000000    1.000000    0.170000    0.000000    0.000000    0.000000\n",
+       "25%     996.250000    1.000000   21.000000    0.000000    0.000000    7.895800\n",
+       "50%    1100.500000    3.000000   27.000000    0.000000    0.000000   14.454200\n",
+       "75%    1204.750000    3.000000   39.000000    1.000000    0.000000   31.500000\n",
+       "max    1309.000000    3.000000   76.000000    8.000000    9.000000  512.329200"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "titanic_sub = pd.read_csv('titanic_test.csv')\n",
+    "titanic_sub.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "8264d2ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for var in ['Cabin']: #,'Name','Ticket']:\n",
+    "    new = list(set(titanic_sub[var]) - set(titanic[var]))\n",
+    "    titanic_sub.loc[titanic_sub[var].isin(new), var] = -999"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "fe8198e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "titanic_sub['Sex'] = titanic_sub['Sex'].map({'male':0,'female':1})\n",
+    "titanic_sub['Embarked'] = titanic_sub['Embarked'].map({'S':0,'C':1,'Q':2})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "13204313",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PassengerId    False\n",
+       "Pclass         False\n",
+       "Name           False\n",
+       "Sex            False\n",
+       "Age            False\n",
+       "SibSp          False\n",
+       "Parch          False\n",
+       "Ticket         False\n",
+       "Fare           False\n",
+       "Cabin          False\n",
+       "Embarked       False\n",
+       "dtype: bool"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "titanic_sub = titanic_sub.fillna(-999)\n",
+    "pd.isnull(titanic_sub).any()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "82e8d3fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import MultiLabelBinarizer\n",
+    "mlb = MultiLabelBinarizer()\n",
+    "SubCabinTrans = mlb.fit([{str(val)} for val in titanic['Cabin'].values]).transform([{str(val)} for val in titanic_sub['Cabin'].values])\n",
+    "titanic_sub = titanic_sub.drop(['Name','Ticket','Cabin'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "185ba8c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "titanic_sub_new = np.hstack((titanic_sub.values,SubCabinTrans))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "359c8b6b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.any(np.isnan(titanic_sub_new))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "e73a0c32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert (titanic_new.shape[1] == titanic_sub_new.shape[1]), \"Not Equal\" "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "d868e452",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "submission = tpot.predict(titanic_sub_new)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "3d91d737",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "final = pd.DataFrame({'PassengerId': titanic_sub['PassengerId'], 'Survived': submission})\n",
+    "final.to_csv('data/submission.csv', index = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "240feb73",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(418, 2)"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "final.shape"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}