diff options
Diffstat (limited to 'dslr_notebook.ipynb')
| -rw-r--r-- | dslr_notebook.ipynb | 481 |
1 files changed, 481 insertions, 0 deletions
diff --git a/dslr_notebook.ipynb b/dslr_notebook.ipynb new file mode 100644 index 0000000..c85b8c6 --- /dev/null +++ b/dslr_notebook.ipynb @@ -0,0 +1,481 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# dslr\n", + "\n", + "DataScience project of school 42" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Index</th>\n", + " <th>Hogwarts House</th>\n", + " <th>First Name</th>\n", + " <th>Last Name</th>\n", + " <th>Birthday</th>\n", + " <th>Best Hand</th>\n", + " <th>Arithmancy</th>\n", + " <th>Astronomy</th>\n", + " <th>Herbology</th>\n", + " <th>Defense Against the Dark Arts</th>\n", + " <th>Divination</th>\n", + " <th>Muggle Studies</th>\n", + " <th>Ancient Runes</th>\n", + " <th>History of Magic</th>\n", + " <th>Transfiguration</th>\n", + " <th>Potions</th>\n", + " <th>Care of Magical Creatures</th>\n", + " <th>Charms</th>\n", + " <th>Flying</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>Ravenclaw</td>\n", + " <td>Tamara</td>\n", + " <td>Hsu</td>\n", + " <td>2000-03-30</td>\n", + " <td>Left</td>\n", + " <td>58384.0</td>\n", + " <td>-487.886086</td>\n", + " <td>5.727180</td>\n", + " <td>4.878861</td>\n", + " <td>4.722</td>\n", + " <td>272.035831</td>\n", + " <td>532.484226</td>\n", + " <td>5.231058</td>\n", + " <td>1039.788281</td>\n", + " <td>3.790369</td>\n", + " <td>0.715939</td>\n", + " <td>-232.79405</td>\n", + " <td>-26.89</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>Slytherin</td>\n", + " <td>Erich</td>\n", + " <td>Paredes</td>\n", + " <td>1999-10-14</td>\n", + " <td>Right</td>\n", + " <td>67239.0</td>\n", + " <td>-552.060507</td>\n", + " <td>-5.987446</td>\n", + " <td>5.520605</td>\n", + " <td>-5.612</td>\n", + " <td>-487.340557</td>\n", + " <td>367.760303</td>\n", + " <td>4.107170</td>\n", + " <td>1058.944592</td>\n", + " <td>7.248742</td>\n", + " <td>0.091674</td>\n", + " <td>-252.18425</td>\n", + " <td>-113.45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>Ravenclaw</td>\n", + " <td>Stephany</td>\n", + " <td>Braun</td>\n", + " <td>1999-11-03</td>\n", + " <td>Left</td>\n", + " <td>23702.0</td>\n", + " <td>-366.076117</td>\n", + " <td>7.725017</td>\n", + " <td>3.660761</td>\n", + " <td>6.140</td>\n", + " <td>664.893521</td>\n", + " <td>602.585284</td>\n", + " <td>3.555579</td>\n", + " <td>1088.088348</td>\n", + " <td>8.728531</td>\n", + " <td>-0.515327</td>\n", + " <td>-227.34265</td>\n", + " <td>30.42</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>Gryffindor</td>\n", + " <td>Vesta</td>\n", + " <td>Mcmichael</td>\n", + " <td>2000-08-19</td>\n", + " <td>Left</td>\n", + " <td>32667.0</td>\n", + " <td>697.742809</td>\n", + " <td>-6.497214</td>\n", + " <td>-6.977428</td>\n", + " <td>4.026</td>\n", + " <td>-537.001128</td>\n", + " <td>523.982133</td>\n", + " <td>-4.809637</td>\n", + " <td>920.391449</td>\n", + " <td>0.821911</td>\n", + " <td>-0.014040</td>\n", + " <td>-256.84675</td>\n", + " <td>200.64</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>Gryffindor</td>\n", + " <td>Gaston</td>\n", + " <td>Gibbs</td>\n", + " <td>1998-09-27</td>\n", + " <td>Left</td>\n", + " <td>60158.0</td>\n", + " <td>436.775204</td>\n", + " <td>-7.820623</td>\n", + " <td>NaN</td>\n", + " <td>2.236</td>\n", + " <td>-444.262537</td>\n", + " <td>599.324514</td>\n", + " <td>-3.444377</td>\n", + " <td>937.434724</td>\n", + " <td>4.311066</td>\n", + " <td>-0.264070</td>\n", + " <td>-256.38730</td>\n", + " <td>157.98</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Index Hogwarts House First Name Last Name Birthday Best Hand \\\n", + "0 0 Ravenclaw Tamara Hsu 2000-03-30 Left \n", + "1 1 Slytherin Erich Paredes 1999-10-14 Right \n", + "2 2 Ravenclaw Stephany Braun 1999-11-03 Left \n", + "3 3 Gryffindor Vesta Mcmichael 2000-08-19 Left \n", + "4 4 Gryffindor Gaston Gibbs 1998-09-27 Left \n", + "\n", + " Arithmancy Astronomy Herbology Defense Against the Dark Arts \\\n", + "0 58384.0 -487.886086 5.727180 4.878861 \n", + "1 67239.0 -552.060507 -5.987446 5.520605 \n", + "2 23702.0 -366.076117 7.725017 3.660761 \n", + "3 32667.0 697.742809 -6.497214 -6.977428 \n", + "4 60158.0 436.775204 -7.820623 NaN \n", + "\n", + " Divination Muggle Studies Ancient Runes History of Magic \\\n", + "0 4.722 272.035831 532.484226 5.231058 \n", + "1 -5.612 -487.340557 367.760303 4.107170 \n", + "2 6.140 664.893521 602.585284 3.555579 \n", + "3 4.026 -537.001128 523.982133 -4.809637 \n", + "4 2.236 -444.262537 599.324514 -3.444377 \n", + "\n", + " Transfiguration Potions Care of Magical Creatures Charms Flying \n", + "0 1039.788281 3.790369 0.715939 -232.79405 -26.89 \n", + "1 1058.944592 7.248742 0.091674 -252.18425 -113.45 \n", + "2 1088.088348 8.728531 -0.515327 -227.34265 30.42 \n", + "3 920.391449 0.821911 -0.014040 -256.84675 200.64 \n", + "4 937.434724 4.311066 -0.264070 -256.38730 157.98 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "df = pd.read_csv(\"./datasets/dataset_train.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Index</th>\n", + " <th>Arithmancy</th>\n", + " <th>Astronomy</th>\n", + " <th>Herbology</th>\n", + " <th>Defense Against the Dark Arts</th>\n", + " <th>Divination</th>\n", + " <th>Muggle Studies</th>\n", + " <th>Ancient Runes</th>\n", + " <th>History of Magic</th>\n", + " <th>Transfiguration</th>\n", + " <th>Potions</th>\n", + " <th>Care of Magical Creatures</th>\n", + " <th>Charms</th>\n", + " <th>Flying</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>count</th>\n", + " <td>1600.00000</td>\n", + " <td>1566.000000</td>\n", + " <td>1568.000000</td>\n", + " <td>1567.000000</td>\n", + " <td>1569.000000</td>\n", + " <td>1561.000000</td>\n", + " <td>1565.000000</td>\n", + " <td>1565.000000</td>\n", + " <td>1557.000000</td>\n", + " <td>1566.000000</td>\n", + " <td>1570.000000</td>\n", + " <td>1560.000000</td>\n", + " <td>1600.000000</td>\n", + " <td>1600.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mean</th>\n", + " <td>799.50000</td>\n", + " <td>49634.570243</td>\n", + " <td>39.797131</td>\n", + " <td>1.141020</td>\n", + " <td>-0.387863</td>\n", + " <td>3.153910</td>\n", + " <td>-224.589915</td>\n", + " <td>495.747970</td>\n", + " <td>2.963095</td>\n", + " <td>1030.096946</td>\n", + " <td>5.950373</td>\n", + " <td>-0.053427</td>\n", + " <td>-243.374409</td>\n", + " <td>21.958012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>std</th>\n", + " <td>462.02453</td>\n", + " <td>16679.806036</td>\n", + " <td>520.298268</td>\n", + " <td>5.219682</td>\n", + " <td>5.212794</td>\n", + " <td>4.155301</td>\n", + " <td>486.344840</td>\n", + " <td>106.285165</td>\n", + " <td>4.425775</td>\n", + " <td>44.125116</td>\n", + " <td>3.147854</td>\n", + " <td>0.971457</td>\n", + " <td>8.783640</td>\n", + " <td>97.631602</td>\n", + " </tr>\n", + " <tr>\n", + " <th>min</th>\n", + " <td>0.00000</td>\n", + " <td>-24370.000000</td>\n", + " <td>-966.740546</td>\n", + " <td>-10.295663</td>\n", + " <td>-10.162119</td>\n", + " <td>-8.727000</td>\n", + " <td>-1086.496835</td>\n", + " <td>283.869609</td>\n", + " <td>-8.858993</td>\n", + " <td>906.627320</td>\n", + " <td>-4.697484</td>\n", + " <td>-3.313676</td>\n", + " <td>-261.048920</td>\n", + " <td>-181.470000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25%</th>\n", + " <td>399.75000</td>\n", + " <td>38511.500000</td>\n", + " <td>-489.551387</td>\n", + " <td>-4.308182</td>\n", + " <td>-5.259095</td>\n", + " <td>3.099000</td>\n", + " <td>-577.580096</td>\n", + " <td>397.511047</td>\n", + " <td>2.218653</td>\n", + " <td>1026.209993</td>\n", + " <td>3.646785</td>\n", + " <td>-0.671606</td>\n", + " <td>-250.652600</td>\n", + " <td>-41.870000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50%</th>\n", + " <td>799.50000</td>\n", + " <td>49013.500000</td>\n", + " <td>260.289446</td>\n", + " <td>3.469012</td>\n", + " <td>-2.589342</td>\n", + " <td>4.624000</td>\n", + " <td>-419.164294</td>\n", + " <td>463.918305</td>\n", + " <td>4.378176</td>\n", + " <td>1045.506996</td>\n", + " <td>5.874837</td>\n", + " <td>-0.044811</td>\n", + " <td>-244.867765</td>\n", + " <td>-2.515000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75%</th>\n", + " <td>1199.25000</td>\n", + " <td>60811.250000</td>\n", + " <td>524.771949</td>\n", + " <td>5.419183</td>\n", + " <td>4.904680</td>\n", + " <td>5.667000</td>\n", + " <td>254.994857</td>\n", + " <td>597.492230</td>\n", + " <td>5.825242</td>\n", + " <td>1058.436410</td>\n", + " <td>8.248173</td>\n", + " <td>0.589919</td>\n", + " <td>-232.552305</td>\n", + " <td>50.560000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>max</th>\n", + " <td>1599.00000</td>\n", + " <td>104956.000000</td>\n", + " <td>1016.211940</td>\n", + " <td>11.612895</td>\n", + " <td>9.667405</td>\n", + " <td>10.032000</td>\n", + " <td>1092.388611</td>\n", + " <td>745.396220</td>\n", + " <td>11.889713</td>\n", + " <td>1098.958201</td>\n", + " <td>13.536762</td>\n", + " <td>3.056546</td>\n", + " <td>-225.428140</td>\n", + " <td>279.070000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Index Arithmancy Astronomy Herbology \\\n", + "count 1600.00000 1566.000000 1568.000000 1567.000000 \n", + "mean 799.50000 49634.570243 39.797131 1.141020 \n", + "std 462.02453 16679.806036 520.298268 5.219682 \n", + "min 0.00000 -24370.000000 -966.740546 -10.295663 \n", + "25% 399.75000 38511.500000 -489.551387 -4.308182 \n", + "50% 799.50000 49013.500000 260.289446 3.469012 \n", + "75% 1199.25000 60811.250000 524.771949 5.419183 \n", + "max 1599.00000 104956.000000 1016.211940 11.612895 \n", + "\n", + " Defense Against the Dark Arts Divination Muggle Studies \\\n", + "count 1569.000000 1561.000000 1565.000000 \n", + "mean -0.387863 3.153910 -224.589915 \n", + "std 5.212794 4.155301 486.344840 \n", + "min -10.162119 -8.727000 -1086.496835 \n", + "25% -5.259095 3.099000 -577.580096 \n", + "50% -2.589342 4.624000 -419.164294 \n", + "75% 4.904680 5.667000 254.994857 \n", + "max 9.667405 10.032000 1092.388611 \n", + "\n", + " Ancient Runes History of Magic Transfiguration Potions \\\n", + "count 1565.000000 1557.000000 1566.000000 1570.000000 \n", + "mean 495.747970 2.963095 1030.096946 5.950373 \n", + "std 106.285165 4.425775 44.125116 3.147854 \n", + "min 283.869609 -8.858993 906.627320 -4.697484 \n", + "25% 397.511047 2.218653 1026.209993 3.646785 \n", + "50% 463.918305 4.378176 1045.506996 5.874837 \n", + "75% 597.492230 5.825242 1058.436410 8.248173 \n", + "max 745.396220 11.889713 1098.958201 13.536762 \n", + "\n", + " Care of Magical Creatures Charms Flying \n", + "count 1560.000000 1600.000000 1600.000000 \n", + "mean -0.053427 -243.374409 21.958012 \n", + "std 0.971457 8.783640 97.631602 \n", + "min -3.313676 -261.048920 -181.470000 \n", + "25% -0.671606 -250.652600 -41.870000 \n", + "50% -0.044811 -244.867765 -2.515000 \n", + "75% 0.589919 -232.552305 50.560000 \n", + "max 3.056546 -225.428140 279.070000 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
