aboutsummaryrefslogtreecommitdiff
path: root/dslr_notebook.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'dslr_notebook.ipynb')
-rw-r--r--dslr_notebook.ipynb481
1 files changed, 481 insertions, 0 deletions
diff --git a/dslr_notebook.ipynb b/dslr_notebook.ipynb
new file mode 100644
index 0000000..c85b8c6
--- /dev/null
+++ b/dslr_notebook.ipynb
@@ -0,0 +1,481 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# dslr\n",
+ "\n",
+ "DataScience project of school 42"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>Index</th>\n",
+ " <th>Hogwarts House</th>\n",
+ " <th>First Name</th>\n",
+ " <th>Last Name</th>\n",
+ " <th>Birthday</th>\n",
+ " <th>Best Hand</th>\n",
+ " <th>Arithmancy</th>\n",
+ " <th>Astronomy</th>\n",
+ " <th>Herbology</th>\n",
+ " <th>Defense Against the Dark Arts</th>\n",
+ " <th>Divination</th>\n",
+ " <th>Muggle Studies</th>\n",
+ " <th>Ancient Runes</th>\n",
+ " <th>History of Magic</th>\n",
+ " <th>Transfiguration</th>\n",
+ " <th>Potions</th>\n",
+ " <th>Care of Magical Creatures</th>\n",
+ " <th>Charms</th>\n",
+ " <th>Flying</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>0</td>\n",
+ " <td>Ravenclaw</td>\n",
+ " <td>Tamara</td>\n",
+ " <td>Hsu</td>\n",
+ " <td>2000-03-30</td>\n",
+ " <td>Left</td>\n",
+ " <td>58384.0</td>\n",
+ " <td>-487.886086</td>\n",
+ " <td>5.727180</td>\n",
+ " <td>4.878861</td>\n",
+ " <td>4.722</td>\n",
+ " <td>272.035831</td>\n",
+ " <td>532.484226</td>\n",
+ " <td>5.231058</td>\n",
+ " <td>1039.788281</td>\n",
+ " <td>3.790369</td>\n",
+ " <td>0.715939</td>\n",
+ " <td>-232.79405</td>\n",
+ " <td>-26.89</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>1</td>\n",
+ " <td>Slytherin</td>\n",
+ " <td>Erich</td>\n",
+ " <td>Paredes</td>\n",
+ " <td>1999-10-14</td>\n",
+ " <td>Right</td>\n",
+ " <td>67239.0</td>\n",
+ " <td>-552.060507</td>\n",
+ " <td>-5.987446</td>\n",
+ " <td>5.520605</td>\n",
+ " <td>-5.612</td>\n",
+ " <td>-487.340557</td>\n",
+ " <td>367.760303</td>\n",
+ " <td>4.107170</td>\n",
+ " <td>1058.944592</td>\n",
+ " <td>7.248742</td>\n",
+ " <td>0.091674</td>\n",
+ " <td>-252.18425</td>\n",
+ " <td>-113.45</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>2</td>\n",
+ " <td>Ravenclaw</td>\n",
+ " <td>Stephany</td>\n",
+ " <td>Braun</td>\n",
+ " <td>1999-11-03</td>\n",
+ " <td>Left</td>\n",
+ " <td>23702.0</td>\n",
+ " <td>-366.076117</td>\n",
+ " <td>7.725017</td>\n",
+ " <td>3.660761</td>\n",
+ " <td>6.140</td>\n",
+ " <td>664.893521</td>\n",
+ " <td>602.585284</td>\n",
+ " <td>3.555579</td>\n",
+ " <td>1088.088348</td>\n",
+ " <td>8.728531</td>\n",
+ " <td>-0.515327</td>\n",
+ " <td>-227.34265</td>\n",
+ " <td>30.42</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>3</td>\n",
+ " <td>Gryffindor</td>\n",
+ " <td>Vesta</td>\n",
+ " <td>Mcmichael</td>\n",
+ " <td>2000-08-19</td>\n",
+ " <td>Left</td>\n",
+ " <td>32667.0</td>\n",
+ " <td>697.742809</td>\n",
+ " <td>-6.497214</td>\n",
+ " <td>-6.977428</td>\n",
+ " <td>4.026</td>\n",
+ " <td>-537.001128</td>\n",
+ " <td>523.982133</td>\n",
+ " <td>-4.809637</td>\n",
+ " <td>920.391449</td>\n",
+ " <td>0.821911</td>\n",
+ " <td>-0.014040</td>\n",
+ " <td>-256.84675</td>\n",
+ " <td>200.64</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>4</td>\n",
+ " <td>Gryffindor</td>\n",
+ " <td>Gaston</td>\n",
+ " <td>Gibbs</td>\n",
+ " <td>1998-09-27</td>\n",
+ " <td>Left</td>\n",
+ " <td>60158.0</td>\n",
+ " <td>436.775204</td>\n",
+ " <td>-7.820623</td>\n",
+ " <td>NaN</td>\n",
+ " <td>2.236</td>\n",
+ " <td>-444.262537</td>\n",
+ " <td>599.324514</td>\n",
+ " <td>-3.444377</td>\n",
+ " <td>937.434724</td>\n",
+ " <td>4.311066</td>\n",
+ " <td>-0.264070</td>\n",
+ " <td>-256.38730</td>\n",
+ " <td>157.98</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " Index Hogwarts House First Name Last Name Birthday Best Hand \\\n",
+ "0 0 Ravenclaw Tamara Hsu 2000-03-30 Left \n",
+ "1 1 Slytherin Erich Paredes 1999-10-14 Right \n",
+ "2 2 Ravenclaw Stephany Braun 1999-11-03 Left \n",
+ "3 3 Gryffindor Vesta Mcmichael 2000-08-19 Left \n",
+ "4 4 Gryffindor Gaston Gibbs 1998-09-27 Left \n",
+ "\n",
+ " Arithmancy Astronomy Herbology Defense Against the Dark Arts \\\n",
+ "0 58384.0 -487.886086 5.727180 4.878861 \n",
+ "1 67239.0 -552.060507 -5.987446 5.520605 \n",
+ "2 23702.0 -366.076117 7.725017 3.660761 \n",
+ "3 32667.0 697.742809 -6.497214 -6.977428 \n",
+ "4 60158.0 436.775204 -7.820623 NaN \n",
+ "\n",
+ " Divination Muggle Studies Ancient Runes History of Magic \\\n",
+ "0 4.722 272.035831 532.484226 5.231058 \n",
+ "1 -5.612 -487.340557 367.760303 4.107170 \n",
+ "2 6.140 664.893521 602.585284 3.555579 \n",
+ "3 4.026 -537.001128 523.982133 -4.809637 \n",
+ "4 2.236 -444.262537 599.324514 -3.444377 \n",
+ "\n",
+ " Transfiguration Potions Care of Magical Creatures Charms Flying \n",
+ "0 1039.788281 3.790369 0.715939 -232.79405 -26.89 \n",
+ "1 1058.944592 7.248742 0.091674 -252.18425 -113.45 \n",
+ "2 1088.088348 8.728531 -0.515327 -227.34265 30.42 \n",
+ "3 920.391449 0.821911 -0.014040 -256.84675 200.64 \n",
+ "4 937.434724 4.311066 -0.264070 -256.38730 157.98 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "df = pd.read_csv(\"./datasets/dataset_train.csv\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>Index</th>\n",
+ " <th>Arithmancy</th>\n",
+ " <th>Astronomy</th>\n",
+ " <th>Herbology</th>\n",
+ " <th>Defense Against the Dark Arts</th>\n",
+ " <th>Divination</th>\n",
+ " <th>Muggle Studies</th>\n",
+ " <th>Ancient Runes</th>\n",
+ " <th>History of Magic</th>\n",
+ " <th>Transfiguration</th>\n",
+ " <th>Potions</th>\n",
+ " <th>Care of Magical Creatures</th>\n",
+ " <th>Charms</th>\n",
+ " <th>Flying</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>count</th>\n",
+ " <td>1600.00000</td>\n",
+ " <td>1566.000000</td>\n",
+ " <td>1568.000000</td>\n",
+ " <td>1567.000000</td>\n",
+ " <td>1569.000000</td>\n",
+ " <td>1561.000000</td>\n",
+ " <td>1565.000000</td>\n",
+ " <td>1565.000000</td>\n",
+ " <td>1557.000000</td>\n",
+ " <td>1566.000000</td>\n",
+ " <td>1570.000000</td>\n",
+ " <td>1560.000000</td>\n",
+ " <td>1600.000000</td>\n",
+ " <td>1600.000000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>mean</th>\n",
+ " <td>799.50000</td>\n",
+ " <td>49634.570243</td>\n",
+ " <td>39.797131</td>\n",
+ " <td>1.141020</td>\n",
+ " <td>-0.387863</td>\n",
+ " <td>3.153910</td>\n",
+ " <td>-224.589915</td>\n",
+ " <td>495.747970</td>\n",
+ " <td>2.963095</td>\n",
+ " <td>1030.096946</td>\n",
+ " <td>5.950373</td>\n",
+ " <td>-0.053427</td>\n",
+ " <td>-243.374409</td>\n",
+ " <td>21.958012</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>std</th>\n",
+ " <td>462.02453</td>\n",
+ " <td>16679.806036</td>\n",
+ " <td>520.298268</td>\n",
+ " <td>5.219682</td>\n",
+ " <td>5.212794</td>\n",
+ " <td>4.155301</td>\n",
+ " <td>486.344840</td>\n",
+ " <td>106.285165</td>\n",
+ " <td>4.425775</td>\n",
+ " <td>44.125116</td>\n",
+ " <td>3.147854</td>\n",
+ " <td>0.971457</td>\n",
+ " <td>8.783640</td>\n",
+ " <td>97.631602</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>min</th>\n",
+ " <td>0.00000</td>\n",
+ " <td>-24370.000000</td>\n",
+ " <td>-966.740546</td>\n",
+ " <td>-10.295663</td>\n",
+ " <td>-10.162119</td>\n",
+ " <td>-8.727000</td>\n",
+ " <td>-1086.496835</td>\n",
+ " <td>283.869609</td>\n",
+ " <td>-8.858993</td>\n",
+ " <td>906.627320</td>\n",
+ " <td>-4.697484</td>\n",
+ " <td>-3.313676</td>\n",
+ " <td>-261.048920</td>\n",
+ " <td>-181.470000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>25%</th>\n",
+ " <td>399.75000</td>\n",
+ " <td>38511.500000</td>\n",
+ " <td>-489.551387</td>\n",
+ " <td>-4.308182</td>\n",
+ " <td>-5.259095</td>\n",
+ " <td>3.099000</td>\n",
+ " <td>-577.580096</td>\n",
+ " <td>397.511047</td>\n",
+ " <td>2.218653</td>\n",
+ " <td>1026.209993</td>\n",
+ " <td>3.646785</td>\n",
+ " <td>-0.671606</td>\n",
+ " <td>-250.652600</td>\n",
+ " <td>-41.870000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>50%</th>\n",
+ " <td>799.50000</td>\n",
+ " <td>49013.500000</td>\n",
+ " <td>260.289446</td>\n",
+ " <td>3.469012</td>\n",
+ " <td>-2.589342</td>\n",
+ " <td>4.624000</td>\n",
+ " <td>-419.164294</td>\n",
+ " <td>463.918305</td>\n",
+ " <td>4.378176</td>\n",
+ " <td>1045.506996</td>\n",
+ " <td>5.874837</td>\n",
+ " <td>-0.044811</td>\n",
+ " <td>-244.867765</td>\n",
+ " <td>-2.515000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>75%</th>\n",
+ " <td>1199.25000</td>\n",
+ " <td>60811.250000</td>\n",
+ " <td>524.771949</td>\n",
+ " <td>5.419183</td>\n",
+ " <td>4.904680</td>\n",
+ " <td>5.667000</td>\n",
+ " <td>254.994857</td>\n",
+ " <td>597.492230</td>\n",
+ " <td>5.825242</td>\n",
+ " <td>1058.436410</td>\n",
+ " <td>8.248173</td>\n",
+ " <td>0.589919</td>\n",
+ " <td>-232.552305</td>\n",
+ " <td>50.560000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>max</th>\n",
+ " <td>1599.00000</td>\n",
+ " <td>104956.000000</td>\n",
+ " <td>1016.211940</td>\n",
+ " <td>11.612895</td>\n",
+ " <td>9.667405</td>\n",
+ " <td>10.032000</td>\n",
+ " <td>1092.388611</td>\n",
+ " <td>745.396220</td>\n",
+ " <td>11.889713</td>\n",
+ " <td>1098.958201</td>\n",
+ " <td>13.536762</td>\n",
+ " <td>3.056546</td>\n",
+ " <td>-225.428140</td>\n",
+ " <td>279.070000</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " Index Arithmancy Astronomy Herbology \\\n",
+ "count 1600.00000 1566.000000 1568.000000 1567.000000 \n",
+ "mean 799.50000 49634.570243 39.797131 1.141020 \n",
+ "std 462.02453 16679.806036 520.298268 5.219682 \n",
+ "min 0.00000 -24370.000000 -966.740546 -10.295663 \n",
+ "25% 399.75000 38511.500000 -489.551387 -4.308182 \n",
+ "50% 799.50000 49013.500000 260.289446 3.469012 \n",
+ "75% 1199.25000 60811.250000 524.771949 5.419183 \n",
+ "max 1599.00000 104956.000000 1016.211940 11.612895 \n",
+ "\n",
+ " Defense Against the Dark Arts Divination Muggle Studies \\\n",
+ "count 1569.000000 1561.000000 1565.000000 \n",
+ "mean -0.387863 3.153910 -224.589915 \n",
+ "std 5.212794 4.155301 486.344840 \n",
+ "min -10.162119 -8.727000 -1086.496835 \n",
+ "25% -5.259095 3.099000 -577.580096 \n",
+ "50% -2.589342 4.624000 -419.164294 \n",
+ "75% 4.904680 5.667000 254.994857 \n",
+ "max 9.667405 10.032000 1092.388611 \n",
+ "\n",
+ " Ancient Runes History of Magic Transfiguration Potions \\\n",
+ "count 1565.000000 1557.000000 1566.000000 1570.000000 \n",
+ "mean 495.747970 2.963095 1030.096946 5.950373 \n",
+ "std 106.285165 4.425775 44.125116 3.147854 \n",
+ "min 283.869609 -8.858993 906.627320 -4.697484 \n",
+ "25% 397.511047 2.218653 1026.209993 3.646785 \n",
+ "50% 463.918305 4.378176 1045.506996 5.874837 \n",
+ "75% 597.492230 5.825242 1058.436410 8.248173 \n",
+ "max 745.396220 11.889713 1098.958201 13.536762 \n",
+ "\n",
+ " Care of Magical Creatures Charms Flying \n",
+ "count 1560.000000 1600.000000 1600.000000 \n",
+ "mean -0.053427 -243.374409 21.958012 \n",
+ "std 0.971457 8.783640 97.631602 \n",
+ "min -3.313676 -261.048920 -181.470000 \n",
+ "25% -0.671606 -250.652600 -41.870000 \n",
+ "50% -0.044811 -244.867765 -2.515000 \n",
+ "75% 0.589919 -232.552305 50.560000 \n",
+ "max 3.056546 -225.428140 279.070000 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}