diff --git a/Diabetes_prediction.ipynb b/Diabetes_prediction.ipynb new file mode 100644 index 0000000..0bd35eb --- /dev/null +++ b/Diabetes_prediction.ipynb @@ -0,0 +1,1621 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "LnPbntVRnfvV" + }, + "source": [ + "Importing the Dependencies" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-71UtHzNVWjB" + }, + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn import svm\n", + "from sklearn.metrics import accuracy_score" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bmfOfG8joBBy" + }, + "source": [ + "Data Collection and Analysis\n", + "\n", + "PIMA Diabetes Dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Xpw6Mj_pn_TL" + }, + "source": [ + "# loading the diabetes dataset to a pandas DataFrame\n", + "diabetes_dataset = pd.read_csv('/content/diabetes.csv')" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "eupSUC7yoo9M" + }, + "source": [ + "pd.read_csv?" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "-tjO09ncovoh", + "outputId": "be1717b1-cd50-419d-a3fa-16b4e8fec9a6" + }, + "source": [ + "# printing the first 5 rows of the dataset\n", + "diabetes_dataset.head()" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", + "0 6 148 72 35 0 33.6 \n", + "1 1 85 66 29 0 26.6 \n", + "2 8 183 64 0 0 23.3 \n", + "3 1 89 66 23 94 28.1 \n", + "4 0 137 40 35 168 43.1 \n", + "\n", + " DiabetesPedigreeFunction Age Outcome \n", + "0 0.627 50 1 \n", + "1 0.351 31 0 \n", + "2 0.672 32 1 \n", + "3 0.167 21 0 \n", + "4 2.288 33 1 " + ], + "text/html": [ + "\n", + "\n", + "
| \n", + " | Pregnancies\n", + " | Glucose\n", + " | BloodPressure\n", + " | SkinThickness\n", + " | Insulin\n", + " | BMI\n", + " | DiabetesPedigreeFunction\n", + " | Age\n", + " | Outcome\n", + " | 
|---|---|---|---|---|---|---|---|---|---|
| 0\n", + " | 6\n", + " | 148\n", + " | 72\n", + " | 35\n", + " | 0\n", + " | 33.6\n", + " | 0.627\n", + " | 50\n", + " | 1\n", + " | 
| 1\n", + " | 1\n", + " | 85\n", + " | 66\n", + " | 29\n", + " | 0\n", + " | 26.6\n", + " | 0.351\n", + " | 31\n", + " | 0\n", + " | 
| 2\n", + " | 8\n", + " | 183\n", + " | 64\n", + " | 0\n", + " | 0\n", + " | 23.3\n", + " | 0.672\n", + " | 32\n", + " | 1\n", + " | 
| 3\n", + " | 1\n", + " | 89\n", + " | 66\n", + " | 23\n", + " | 94\n", + " | 28.1\n", + " | 0.167\n", + " | 21\n", + " | 0\n", + " | 
| 4\n", + " | 0\n", + " | 137\n", + " | 40\n", + " | 35\n", + " | 168\n", + " | 43.1\n", + " | 2.288\n", + " | 33\n", + " | 1\n", + " | 
| \n", + " | Pregnancies\n", + " | Glucose\n", + " | BloodPressure\n", + " | SkinThickness\n", + " | Insulin\n", + " | BMI\n", + " | DiabetesPedigreeFunction\n", + " | Age\n", + " | Outcome\n", + " | 
|---|---|---|---|---|---|---|---|---|---|
| count\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 768.000000\n", + " | 
| mean\n", + " | 3.845052\n", + " | 120.894531\n", + " | 69.105469\n", + " | 20.536458\n", + " | 79.799479\n", + " | 31.992578\n", + " | 0.471876\n", + " | 33.240885\n", + " | 0.348958\n", + " | 
| std\n", + " | 3.369578\n", + " | 31.972618\n", + " | 19.355807\n", + " | 15.952218\n", + " | 115.244002\n", + " | 7.884160\n", + " | 0.331329\n", + " | 11.760232\n", + " | 0.476951\n", + " | 
| min\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.078000\n", + " | 21.000000\n", + " | 0.000000\n", + " | 
| 25%\n", + " | 1.000000\n", + " | 99.000000\n", + " | 62.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 27.300000\n", + " | 0.243750\n", + " | 24.000000\n", + " | 0.000000\n", + " | 
| 50%\n", + " | 3.000000\n", + " | 117.000000\n", + " | 72.000000\n", + " | 23.000000\n", + " | 30.500000\n", + " | 32.000000\n", + " | 0.372500\n", + " | 29.000000\n", + " | 0.000000\n", + " | 
| 75%\n", + " | 6.000000\n", + " | 140.250000\n", + " | 80.000000\n", + " | 32.000000\n", + " | 127.250000\n", + " | 36.600000\n", + " | 0.626250\n", + " | 41.000000\n", + " | 1.000000\n", + " | 
| max\n", + " | 17.000000\n", + " | 199.000000\n", + " | 122.000000\n", + " | 99.000000\n", + " | 846.000000\n", + " | 67.100000\n", + " | 2.420000\n", + " | 81.000000\n", + " | 1.000000\n", + " | 
| \n", + " | Pregnancies\n", + " | Glucose\n", + " | BloodPressure\n", + " | SkinThickness\n", + " | Insulin\n", + " | BMI\n", + " | DiabetesPedigreeFunction\n", + " | Age\n", + " | 
|---|---|---|---|---|---|---|---|---|
| Outcome\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | 
| 0\n", + " | 3.298000\n", + " | 109.980000\n", + " | 68.184000\n", + " | 19.664000\n", + " | 68.792000\n", + " | 30.304200\n", + " | 0.429734\n", + " | 31.190000\n", + " | 
| 1\n", + " | 4.865672\n", + " | 141.257463\n", + " | 70.824627\n", + " | 22.164179\n", + " | 100.335821\n", + " | 35.142537\n", + " | 0.550500\n", + " | 37.067164\n", + " | 
StandardScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
StandardScaler()
SVC(kernel='linear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(kernel='linear')
| \n", + " | MODELYEAR\n", + " | MAKE\n", + " | MODEL\n", + " | VEHICLECLASS\n", + " | ENGINESIZE\n", + " | CYLINDERS\n", + " | TRANSMISSION\n", + " | FUELTYPE\n", + " | FUELCONSUMPTION_CITY\n", + " | FUELCONSUMPTION_HWY\n", + " | FUELCONSUMPTION_COMB\n", + " | FUELCONSUMPTION_COMB_MPG\n", + " | CO2EMISSIONS\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0\n", + " | 2014\n", + " | ACURA\n", + " | ILX\n", + " | COMPACT\n", + " | 2.0\n", + " | 4\n", + " | AS5\n", + " | Z\n", + " | 9.9\n", + " | 6.7\n", + " | 8.5\n", + " | 33\n", + " | 196\n", + " | 
| 1\n", + " | 2014\n", + " | ACURA\n", + " | ILX\n", + " | COMPACT\n", + " | 2.4\n", + " | 4\n", + " | M6\n", + " | Z\n", + " | 11.2\n", + " | 7.7\n", + " | 9.6\n", + " | 29\n", + " | 221\n", + " | 
| 2\n", + " | 2014\n", + " | ACURA\n", + " | ILX HYBRID\n", + " | COMPACT\n", + " | 1.5\n", + " | 4\n", + " | AV7\n", + " | Z\n", + " | 6.0\n", + " | 5.8\n", + " | 5.9\n", + " | 48\n", + " | 136\n", + " | 
| 3\n", + " | 2014\n", + " | ACURA\n", + " | MDX 4WD\n", + " | SUV - SMALL\n", + " | 3.5\n", + " | 6\n", + " | AS6\n", + " | Z\n", + " | 12.7\n", + " | 9.1\n", + " | 11.1\n", + " | 25\n", + " | 255\n", + " | 
| 4\n", + " | 2014\n", + " | ACURA\n", + " | RDX AWD\n", + " | SUV - SMALL\n", + " | 3.5\n", + " | 6\n", + " | AS6\n", + " | Z\n", + " | 12.1\n", + " | 8.7\n", + " | 10.6\n", + " | 27\n", + " | 244\n", + " | 
| ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | 
| 1062\n", + " | 2014\n", + " | VOLVO\n", + " | XC60 AWD\n", + " | SUV - SMALL\n", + " | 3.0\n", + " | 6\n", + " | AS6\n", + " | X\n", + " | 13.4\n", + " | 9.8\n", + " | 11.8\n", + " | 24\n", + " | 271\n", + " | 
| 1063\n", + " | 2014\n", + " | VOLVO\n", + " | XC60 AWD\n", + " | SUV - SMALL\n", + " | 3.2\n", + " | 6\n", + " | AS6\n", + " | X\n", + " | 13.2\n", + " | 9.5\n", + " | 11.5\n", + " | 25\n", + " | 264\n", + " | 
| 1064\n", + " | 2014\n", + " | VOLVO\n", + " | XC70 AWD\n", + " | SUV - SMALL\n", + " | 3.0\n", + " | 6\n", + " | AS6\n", + " | X\n", + " | 13.4\n", + " | 9.8\n", + " | 11.8\n", + " | 24\n", + " | 271\n", + " | 
| 1065\n", + " | 2014\n", + " | VOLVO\n", + " | XC70 AWD\n", + " | SUV - SMALL\n", + " | 3.2\n", + " | 6\n", + " | AS6\n", + " | X\n", + " | 12.9\n", + " | 9.3\n", + " | 11.3\n", + " | 25\n", + " | 260\n", + " | 
| 1066\n", + " | 2014\n", + " | VOLVO\n", + " | XC90 AWD\n", + " | SUV - STANDARD\n", + " | 3.2\n", + " | 6\n", + " | AS6\n", + " | X\n", + " | 14.9\n", + " | 10.2\n", + " | 12.8\n", + " | 22\n", + " | 294\n", + " | 
1067 rows × 13 columns
\n", + "| \n", + " | MODELYEAR\n", + " | ENGINESIZE\n", + " | CYLINDERS\n", + " | FUELCONSUMPTION_CITY\n", + " | FUELCONSUMPTION_HWY\n", + " | FUELCONSUMPTION_COMB\n", + " | FUELCONSUMPTION_COMB_MPG\n", + " | CO2EMISSIONS\n", + " | 
|---|---|---|---|---|---|---|---|---|
| count\n", + " | 1067.0\n", + " | 1067.000000\n", + " | 1067.000000\n", + " | 1067.000000\n", + " | 1067.000000\n", + " | 1067.000000\n", + " | 1067.000000\n", + " | 1067.000000\n", + " | 
| mean\n", + " | 2014.0\n", + " | 3.346298\n", + " | 5.794752\n", + " | 13.296532\n", + " | 9.474602\n", + " | 11.580881\n", + " | 26.441425\n", + " | 256.228679\n", + " | 
| std\n", + " | 0.0\n", + " | 1.415895\n", + " | 1.797447\n", + " | 4.101253\n", + " | 2.794510\n", + " | 3.485595\n", + " | 7.468702\n", + " | 63.372304\n", + " | 
| min\n", + " | 2014.0\n", + " | 1.000000\n", + " | 3.000000\n", + " | 4.600000\n", + " | 4.900000\n", + " | 4.700000\n", + " | 11.000000\n", + " | 108.000000\n", + " | 
| 25%\n", + " | 2014.0\n", + " | 2.000000\n", + " | 4.000000\n", + " | 10.250000\n", + " | 7.500000\n", + " | 9.000000\n", + " | 21.000000\n", + " | 207.000000\n", + " | 
| 50%\n", + " | 2014.0\n", + " | 3.400000\n", + " | 6.000000\n", + " | 12.600000\n", + " | 8.800000\n", + " | 10.900000\n", + " | 26.000000\n", + " | 251.000000\n", + " | 
| 75%\n", + " | 2014.0\n", + " | 4.300000\n", + " | 8.000000\n", + " | 15.550000\n", + " | 10.850000\n", + " | 13.350000\n", + " | 31.000000\n", + " | 294.000000\n", + " | 
| max\n", + " | 2014.0\n", + " | 8.400000\n", + " | 12.000000\n", + " | 30.200000\n", + " | 20.500000\n", + " | 25.800000\n", + " | 60.000000\n", + " | 488.000000\n", + " | 
| \n", + " | ENGINESIZE\n", + " | CYLINDERS\n", + " | FUELCONSUMPTION_COMB\n", + " | FUELCONSUMPTION_CITY\n", + " | FUELCONSUMPTION_HWY\n", + " | CO2EMISSIONS\n", + " | 
|---|---|---|---|---|---|---|
| 0\n", + " | 2.0\n", + " | 4\n", + " | 8.5\n", + " | 9.9\n", + " | 6.7\n", + " | 196\n", + " | 
| 1\n", + " | 2.4\n", + " | 4\n", + " | 9.6\n", + " | 11.2\n", + " | 7.7\n", + " | 221\n", + " | 
| 2\n", + " | 1.5\n", + " | 4\n", + " | 5.9\n", + " | 6.0\n", + " | 5.8\n", + " | 136\n", + " | 
| 3\n", + " | 3.5\n", + " | 6\n", + " | 11.1\n", + " | 12.7\n", + " | 9.1\n", + " | 255\n", + " | 
| 4\n", + " | 3.5\n", + " | 6\n", + " | 10.6\n", + " | 12.1\n", + " | 8.7\n", + " | 244\n", + " | 
| \n", + " | age\n", + " | sex\n", + " | cp\n", + " | trestbps\n", + " | chol\n", + " | fbs\n", + " | restecg\n", + " | thalach\n", + " | exang\n", + " | oldpeak\n", + " | slope\n", + " | ca\n", + " | thal\n", + " | target\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0\n", + " | 63\n", + " | 1\n", + " | 3\n", + " | 145\n", + " | 233\n", + " | 1\n", + " | 0\n", + " | 150\n", + " | 0\n", + " | 2.3\n", + " | 0\n", + " | 0\n", + " | 1\n", + " | 1\n", + " | 
| 1\n", + " | 37\n", + " | 1\n", + " | 2\n", + " | 130\n", + " | 250\n", + " | 0\n", + " | 1\n", + " | 187\n", + " | 0\n", + " | 3.5\n", + " | 0\n", + " | 0\n", + " | 2\n", + " | 1\n", + " | 
| 2\n", + " | 41\n", + " | 0\n", + " | 1\n", + " | 130\n", + " | 204\n", + " | 0\n", + " | 0\n", + " | 172\n", + " | 0\n", + " | 1.4\n", + " | 2\n", + " | 0\n", + " | 2\n", + " | 1\n", + " | 
| 3\n", + " | 56\n", + " | 1\n", + " | 1\n", + " | 120\n", + " | 236\n", + " | 0\n", + " | 1\n", + " | 178\n", + " | 0\n", + " | 0.8\n", + " | 2\n", + " | 0\n", + " | 2\n", + " | 1\n", + " | 
| 4\n", + " | 57\n", + " | 0\n", + " | 0\n", + " | 120\n", + " | 354\n", + " | 0\n", + " | 1\n", + " | 163\n", + " | 1\n", + " | 0.6\n", + " | 2\n", + " | 0\n", + " | 2\n", + " | 1\n", + " | 
| \n", + " | age\n", + " | sex\n", + " | cp\n", + " | trestbps\n", + " | chol\n", + " | fbs\n", + " | restecg\n", + " | thalach\n", + " | exang\n", + " | oldpeak\n", + " | slope\n", + " | ca\n", + " | thal\n", + " | target\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 298\n", + " | 57\n", + " | 0\n", + " | 0\n", + " | 140\n", + " | 241\n", + " | 0\n", + " | 1\n", + " | 123\n", + " | 1\n", + " | 0.2\n", + " | 1\n", + " | 0\n", + " | 3\n", + " | 0\n", + " | 
| 299\n", + " | 45\n", + " | 1\n", + " | 3\n", + " | 110\n", + " | 264\n", + " | 0\n", + " | 1\n", + " | 132\n", + " | 0\n", + " | 1.2\n", + " | 1\n", + " | 0\n", + " | 3\n", + " | 0\n", + " | 
| 300\n", + " | 68\n", + " | 1\n", + " | 0\n", + " | 144\n", + " | 193\n", + " | 1\n", + " | 1\n", + " | 141\n", + " | 0\n", + " | 3.4\n", + " | 1\n", + " | 2\n", + " | 3\n", + " | 0\n", + " | 
| 301\n", + " | 57\n", + " | 1\n", + " | 0\n", + " | 130\n", + " | 131\n", + " | 0\n", + " | 1\n", + " | 115\n", + " | 1\n", + " | 1.2\n", + " | 1\n", + " | 1\n", + " | 3\n", + " | 0\n", + " | 
| 302\n", + " | 57\n", + " | 0\n", + " | 1\n", + " | 130\n", + " | 236\n", + " | 0\n", + " | 0\n", + " | 174\n", + " | 0\n", + " | 0.0\n", + " | 1\n", + " | 1\n", + " | 2\n", + " | 0\n", + " | 
| \n", + " | age\n", + " | sex\n", + " | cp\n", + " | trestbps\n", + " | chol\n", + " | fbs\n", + " | restecg\n", + " | thalach\n", + " | exang\n", + " | oldpeak\n", + " | slope\n", + " | ca\n", + " | thal\n", + " | target\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 
| mean\n", + " | 54.366337\n", + " | 0.683168\n", + " | 0.966997\n", + " | 131.623762\n", + " | 246.264026\n", + " | 0.148515\n", + " | 0.528053\n", + " | 149.646865\n", + " | 0.326733\n", + " | 1.039604\n", + " | 1.399340\n", + " | 0.729373\n", + " | 2.313531\n", + " | 0.544554\n", + " | 
| std\n", + " | 9.082101\n", + " | 0.466011\n", + " | 1.032052\n", + " | 17.538143\n", + " | 51.830751\n", + " | 0.356198\n", + " | 0.525860\n", + " | 22.905161\n", + " | 0.469794\n", + " | 1.161075\n", + " | 0.616226\n", + " | 1.022606\n", + " | 0.612277\n", + " | 0.498835\n", + " | 
| min\n", + " | 29.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 94.000000\n", + " | 126.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 71.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 
| 25%\n", + " | 47.500000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 120.000000\n", + " | 211.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 133.500000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 1.000000\n", + " | 0.000000\n", + " | 2.000000\n", + " | 0.000000\n", + " | 
| 50%\n", + " | 55.000000\n", + " | 1.000000\n", + " | 1.000000\n", + " | 130.000000\n", + " | 240.000000\n", + " | 0.000000\n", + " | 1.000000\n", + " | 153.000000\n", + " | 0.000000\n", + " | 0.800000\n", + " | 1.000000\n", + " | 0.000000\n", + " | 2.000000\n", + " | 1.000000\n", + " | 
| 75%\n", + " | 61.000000\n", + " | 1.000000\n", + " | 2.000000\n", + " | 140.000000\n", + " | 274.500000\n", + " | 0.000000\n", + " | 1.000000\n", + " | 166.000000\n", + " | 1.000000\n", + " | 1.600000\n", + " | 2.000000\n", + " | 1.000000\n", + " | 3.000000\n", + " | 1.000000\n", + " | 
| max\n", + " | 77.000000\n", + " | 1.000000\n", + " | 3.000000\n", + " | 200.000000\n", + " | 564.000000\n", + " | 1.000000\n", + " | 2.000000\n", + " | 202.000000\n", + " | 1.000000\n", + " | 6.200000\n", + " | 2.000000\n", + " | 4.000000\n", + " | 3.000000\n", + " | 1.000000\n", + " | 
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
| \n", + " | age\n", + " | sex\n", + " | cp\n", + " | trestbps\n", + " | chol\n", + " | fbs\n", + " | restecg\n", + " | thalach\n", + " | exang\n", + " | oldpeak\n", + " | slope\n", + " | ca\n", + " | thal\n", + " | target\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 303.000000\n", + " | 
| mean\n", + " | 54.366337\n", + " | 0.683168\n", + " | 0.966997\n", + " | 131.623762\n", + " | 246.264026\n", + " | 0.148515\n", + " | 0.528053\n", + " | 149.646865\n", + " | 0.326733\n", + " | 1.039604\n", + " | 1.399340\n", + " | 0.729373\n", + " | 2.313531\n", + " | 0.544554\n", + " | 
| std\n", + " | 9.082101\n", + " | 0.466011\n", + " | 1.032052\n", + " | 17.538143\n", + " | 51.830751\n", + " | 0.356198\n", + " | 0.525860\n", + " | 22.905161\n", + " | 0.469794\n", + " | 1.161075\n", + " | 0.616226\n", + " | 1.022606\n", + " | 0.612277\n", + " | 0.498835\n", + " | 
| min\n", + " | 29.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 94.000000\n", + " | 126.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 71.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 
| 25%\n", + " | 47.500000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 120.000000\n", + " | 211.000000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 133.500000\n", + " | 0.000000\n", + " | 0.000000\n", + " | 1.000000\n", + " | 0.000000\n", + " | 2.000000\n", + " | 0.000000\n", + " | 
| 50%\n", + " | 55.000000\n", + " | 1.000000\n", + " | 1.000000\n", + " | 130.000000\n", + " | 240.000000\n", + " | 0.000000\n", + " | 1.000000\n", + " | 153.000000\n", + " | 0.000000\n", + " | 0.800000\n", + " | 1.000000\n", + " | 0.000000\n", + " | 2.000000\n", + " | 1.000000\n", + " | 
| 75%\n", + " | 61.000000\n", + " | 1.000000\n", + " | 2.000000\n", + " | 140.000000\n", + " | 274.500000\n", + " | 0.000000\n", + " | 1.000000\n", + " | 166.000000\n", + " | 1.000000\n", + " | 1.600000\n", + " | 2.000000\n", + " | 1.000000\n", + " | 3.000000\n", + " | 1.000000\n", + " | 
| max\n", + " | 77.000000\n", + " | 1.000000\n", + " | 3.000000\n", + " | 200.000000\n", + " | 564.000000\n", + " | 1.000000\n", + " | 2.000000\n", + " | 202.000000\n", + " | 1.000000\n", + " | 6.200000\n", + " | 2.000000\n", + " | 4.000000\n", + " | 3.000000\n", + " | 1.000000\n", + " | 
| \n", + " | Item_Identifier\n", + " | Item_Weight\n", + " | Item_Fat_Content\n", + " | Item_Visibility\n", + " | Item_Type\n", + " | Item_MRP\n", + " | Outlet_Identifier\n", + " | Outlet_Establishment_Year\n", + " | Outlet_Size\n", + " | Outlet_Location_Type\n", + " | Outlet_Type\n", + " | Item_Outlet_Sales\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0\n", + " | FDA15\n", + " | 9.30\n", + " | Low Fat\n", + " | 0.016047\n", + " | Dairy\n", + " | 249.8092\n", + " | OUT049\n", + " | 1999\n", + " | Medium\n", + " | Tier 1\n", + " | Supermarket Type1\n", + " | 3735.1380\n", + " | 
| 1\n", + " | DRC01\n", + " | 5.92\n", + " | Regular\n", + " | 0.019278\n", + " | Soft Drinks\n", + " | 48.2692\n", + " | OUT018\n", + " | 2009\n", + " | Medium\n", + " | Tier 3\n", + " | Supermarket Type2\n", + " | 443.4228\n", + " | 
| 2\n", + " | FDN15\n", + " | 17.50\n", + " | Low Fat\n", + " | 0.016760\n", + " | Meat\n", + " | 141.6180\n", + " | OUT049\n", + " | 1999\n", + " | Medium\n", + " | Tier 1\n", + " | Supermarket Type1\n", + " | 2097.2700\n", + " | 
| 3\n", + " | FDX07\n", + " | 19.20\n", + " | Regular\n", + " | 0.000000\n", + " | Fruits and Vegetables\n", + " | 182.0950\n", + " | OUT010\n", + " | 1998\n", + " | NaN\n", + " | Tier 3\n", + " | Grocery Store\n", + " | 732.3800\n", + " | 
| 4\n", + " | NCD19\n", + " | 8.93\n", + " | Low Fat\n", + " | 0.000000\n", + " | Household\n", + " | 53.8614\n", + " | OUT013\n", + " | 1987\n", + " | High\n", + " | Tier 3\n", + " | Supermarket Type1\n", + " | 994.7052\n", + " | 
| \n", + " | Item_Weight\n", + " | Item_Visibility\n", + " | Item_MRP\n", + " | Outlet_Establishment_Year\n", + " | Item_Outlet_Sales\n", + " | 
|---|---|---|---|---|---|
| count\n", + " | 8523.000000\n", + " | 8523.000000\n", + " | 8523.000000\n", + " | 8523.000000\n", + " | 8523.000000\n", + " | 
| mean\n", + " | 12.857645\n", + " | 0.066132\n", + " | 140.992782\n", + " | 1997.831867\n", + " | 2181.288914\n", + " | 
| std\n", + " | 4.226124\n", + " | 0.051598\n", + " | 62.275067\n", + " | 8.371760\n", + " | 1706.499616\n", + " | 
| min\n", + " | 4.555000\n", + " | 0.000000\n", + " | 31.290000\n", + " | 1985.000000\n", + " | 33.290000\n", + " | 
| 25%\n", + " | 9.310000\n", + " | 0.026989\n", + " | 93.826500\n", + " | 1987.000000\n", + " | 834.247400\n", + " | 
| 50%\n", + " | 12.857645\n", + " | 0.053931\n", + " | 143.012800\n", + " | 1999.000000\n", + " | 1794.331000\n", + " | 
| 75%\n", + " | 16.000000\n", + " | 0.094585\n", + " | 185.643700\n", + " | 2004.000000\n", + " | 3101.296400\n", + " | 
| max\n", + " | 21.350000\n", + " | 0.328391\n", + " | 266.888400\n", + " | 2009.000000\n", + " | 13086.964800\n", + " | 
| \n", + " | Item_Identifier\n", + " | Item_Weight\n", + " | Item_Fat_Content\n", + " | Item_Visibility\n", + " | Item_Type\n", + " | Item_MRP\n", + " | Outlet_Identifier\n", + " | Outlet_Establishment_Year\n", + " | Outlet_Size\n", + " | Outlet_Location_Type\n", + " | Outlet_Type\n", + " | Item_Outlet_Sales\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0\n", + " | FDA15\n", + " | 9.30\n", + " | Low Fat\n", + " | 0.016047\n", + " | Dairy\n", + " | 249.8092\n", + " | OUT049\n", + " | 1999\n", + " | Medium\n", + " | Tier 1\n", + " | Supermarket Type1\n", + " | 3735.1380\n", + " | 
| 1\n", + " | DRC01\n", + " | 5.92\n", + " | Regular\n", + " | 0.019278\n", + " | Soft Drinks\n", + " | 48.2692\n", + " | OUT018\n", + " | 2009\n", + " | Medium\n", + " | Tier 3\n", + " | Supermarket Type2\n", + " | 443.4228\n", + " | 
| 2\n", + " | FDN15\n", + " | 17.50\n", + " | Low Fat\n", + " | 0.016760\n", + " | Meat\n", + " | 141.6180\n", + " | OUT049\n", + " | 1999\n", + " | Medium\n", + " | Tier 1\n", + " | Supermarket Type1\n", + " | 2097.2700\n", + " | 
| 3\n", + " | FDX07\n", + " | 19.20\n", + " | Regular\n", + " | 0.000000\n", + " | Fruits and Vegetables\n", + " | 182.0950\n", + " | OUT010\n", + " | 1998\n", + " | Small\n", + " | Tier 3\n", + " | Grocery Store\n", + " | 732.3800\n", + " | 
| 4\n", + " | NCD19\n", + " | 8.93\n", + " | Low Fat\n", + " | 0.000000\n", + " | Household\n", + " | 53.8614\n", + " | OUT013\n", + " | 1987\n", + " | High\n", + " | Tier 3\n", + " | Supermarket Type1\n", + " | 994.7052\n", + " | 
| \n", + " | Item_Identifier\n", + " | Item_Weight\n", + " | Item_Fat_Content\n", + " | Item_Visibility\n", + " | Item_Type\n", + " | Item_MRP\n", + " | Outlet_Identifier\n", + " | Outlet_Establishment_Year\n", + " | Outlet_Size\n", + " | Outlet_Location_Type\n", + " | Outlet_Type\n", + " | Item_Outlet_Sales\n", + " | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0\n", + " | 156\n", + " | 9.30\n", + " | 0\n", + " | 0.016047\n", + " | 4\n", + " | 249.8092\n", + " | 9\n", + " | 1999\n", + " | 1\n", + " | 0\n", + " | 1\n", + " | 3735.1380\n", + " | 
| 1\n", + " | 8\n", + " | 5.92\n", + " | 1\n", + " | 0.019278\n", + " | 14\n", + " | 48.2692\n", + " | 3\n", + " | 2009\n", + " | 1\n", + " | 2\n", + " | 2\n", + " | 443.4228\n", + " | 
| 2\n", + " | 662\n", + " | 17.50\n", + " | 0\n", + " | 0.016760\n", + " | 10\n", + " | 141.6180\n", + " | 9\n", + " | 1999\n", + " | 1\n", + " | 0\n", + " | 1\n", + " | 2097.2700\n", + " | 
| 3\n", + " | 1121\n", + " | 19.20\n", + " | 1\n", + " | 0.000000\n", + " | 6\n", + " | 182.0950\n", + " | 0\n", + " | 1998\n", + " | 2\n", + " | 2\n", + " | 0\n", + " | 732.3800\n", + " | 
| 4\n", + " | 1297\n", + " | 8.93\n", + " | 0\n", + " | 0.000000\n", + " | 9\n", + " | 53.8614\n", + " | 1\n", + " | 1987\n", + " | 0\n", + " | 2\n", + " | 1\n", + " | 994.7052\n", + " | 
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)
| \n", + " | Date\n", + " | Open\n", + " | High\n", + " | Low\n", + " | Close\n", + " | Adj Close\n", + " | Volume\n", + " | 
|---|---|---|---|---|---|---|---|
| 0\n", + " | 2010-01-04\n", + " | 377.399994\n", + " | 379.450012\n", + " | 374.524994\n", + " | 375.825012\n", + " | 286.916779\n", + " | 1963682\n", + " | 
| 1\n", + " | 2010-01-05\n", + " | 377.500000\n", + " | 379.774994\n", + " | 373.274994\n", + " | 375.924988\n", + " | 286.993042\n", + " | 2014488\n", + " | 
| 2\n", + " | 2010-01-06\n", + " | 375.924988\n", + " | 376.100006\n", + " | 366.500000\n", + " | 367.424988\n", + " | 280.503876\n", + " | 3349176\n", + " | 
| 3\n", + " | 2010-01-07\n", + " | 367.750000\n", + " | 369.700012\n", + " | 355.500000\n", + " | 357.200012\n", + " | 272.697754\n", + " | 6474892\n", + " | 
| 4\n", + " | 2010-01-08\n", + " | 358.000000\n", + " | 359.250000\n", + " | 348.250000\n", + " | 349.899994\n", + " | 267.124725\n", + " | 6048178\n", + " | 
| ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | ...\n", + " | 
| 3204\n", + " | 2022-12-26\n", + " | 3228.350098\n", + " | 3272.000000\n", + " | 3225.050049\n", + " | 3252.899902\n", + " | 3156.570068\n", + " | 870157\n", + " | 
| 3205\n", + " | 2022-12-27\n", + " | 3269.199951\n", + " | 3273.800049\n", + " | 3231.500000\n", + " | 3259.500000\n", + " | 3162.974854\n", + " | 835883\n", + " | 
| 3206\n", + " | 2022-12-28\n", + " | 3249.800049\n", + " | 3266.100098\n", + " | 3226.000000\n", + " | 3257.100098\n", + " | 3160.645996\n", + " | 910795\n", + " | 
| 3207\n", + " | 2022-12-29\n", + " | 3231.100098\n", + " | 3271.500000\n", + " | 3228.000000\n", + " | 3268.750000\n", + " | 3171.950928\n", + " | 1037927\n", + " | 
| 3208\n", + " | 2022-12-30\n", + " | 3286.050049\n", + " | 3299.000000\n", + " | 3246.199951\n", + " | 3256.699951\n", + " | 3160.257568\n", + " | 1163131\n", + " | 
3209 rows × 7 columns
\n", + "| \n", + " | Close\n", + " | Predictions\n", + " | 
|---|---|---|
| Date\n", + " | \n", + " | \n", + " | 
| 2020-06-09\n", + " | 2072.050049\n", + " | 2008.057007\n", + " | 
| 2020-06-10\n", + " | 2108.750000\n", + " | 2016.563110\n", + " | 
| 2020-06-11\n", + " | 2067.649902\n", + " | 2026.163086\n", + " | 
| 2020-06-12\n", + " | 2039.500000\n", + " | 2033.527466\n", + " | 
| 2020-06-15\n", + " | 2029.900024\n", + " | 2037.387573\n", + " | 
| ...\n", + " | ...\n", + " | ...\n", + " | 
| 2022-12-26\n", + " | 3252.899902\n", + " | 3173.970947\n", + " | 
| 2022-12-27\n", + " | 3259.500000\n", + " | 3166.584229\n", + " | 
| 2022-12-28\n", + " | 3257.100098\n", + " | 3161.392822\n", + " | 
| 2022-12-29\n", + " | 3268.750000\n", + " | 3157.791992\n", + " | 
| 2022-12-30\n", + " | 3256.699951\n", + " | 3156.115967\n", + " | 
641 rows × 2 columns
\n", + "