diff options
Diffstat (limited to 'dslr_notebook.ipynb')
| -rw-r--r-- | dslr_notebook.ipynb | 587 |
1 files changed, 136 insertions, 451 deletions
diff --git a/dslr_notebook.ipynb b/dslr_notebook.ipynb index 929bb71..78781b2 100644 --- a/dslr_notebook.ipynb +++ b/dslr_notebook.ipynb @@ -11,45 +11,13 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 1, "metadata": { "scrolled": false }, "outputs": [ { "data": { - "text/plain": [ - "1251" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "df = pd.read_csv(\"./datasets/dataset_train.csv\")\n", - "df.drop(columns=[\"Index\"], inplace=True)\n", - "df.dropna(inplace=True)\n", - "df.columns = df.columns.str.lower()\n", - "df.columns = df.columns.str.replace(' ', '_')\n", - "df.rename(columns={'hogwarts_house': 'house'}, inplace=True)\n", - "df.describe()\n", - "df['arithmancy'][df['arithmancy'] == 48793.000000]\n", - "len(df['arithmancy'])" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { "text/html": [ "<div>\n", "<style scoped>\n", @@ -69,24 +37,34 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>Arithmancy</th>\n", - " <th>Astronomy</th>\n", - " <th>Herbology</th>\n", - " <th>Defense Against the Dark Arts</th>\n", - " <th>Divination</th>\n", - " <th>Muggle Studies</th>\n", - " <th>Ancient Runes</th>\n", - " <th>History of Magic</th>\n", - " <th>Transfiguration</th>\n", - " <th>Potions</th>\n", - " <th>Care of Magical Creatures</th>\n", - " <th>Charms</th>\n", - " <th>Flying</th>\n", + " <th>house</th>\n", + " <th>first_name</th>\n", + " <th>last_name</th>\n", + " <th>birthday</th>\n", + " <th>best_hand</th>\n", + " <th>arithmancy</th>\n", + " <th>astronomy</th>\n", + " <th>herbology</th>\n", + " <th>defense_against_the_dark_arts</th>\n", + " <th>divination</th>\n", + " <th>muggle_studies</th>\n", + " <th>ancient_runes</th>\n", + " <th>history_of_magic</th>\n", + " <th>transfiguration</th>\n", + " <th>potions</th>\n", + " <th>care_of_magical_creatures</th>\n", + " <th>charms</th>\n", + " <th>flying</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", + " <td>Ravenclaw</td>\n", + " <td>Tamara</td>\n", + " <td>Hsu</td>\n", + " <td>2000-03-30</td>\n", + " <td>Left</td>\n", " <td>58384.0</td>\n", " <td>-487.886086</td>\n", " <td>5.727180</td>\n", @@ -102,7 +80,33 @@ " <td>-26.89</td>\n", " </tr>\n", " <tr>\n", + " <th>1</th>\n", + " <td>Slytherin</td>\n", + " <td>Erich</td>\n", + " <td>Paredes</td>\n", + " <td>1999-10-14</td>\n", + " <td>Right</td>\n", + " <td>67239.0</td>\n", + " <td>-552.060507</td>\n", + " <td>-5.987446</td>\n", + " <td>5.520605</td>\n", + " <td>-5.612</td>\n", + " <td>-487.340557</td>\n", + " <td>367.760303</td>\n", + " <td>4.107170</td>\n", + " <td>1058.944592</td>\n", + " <td>7.248742</td>\n", + " <td>0.091674</td>\n", + " <td>-252.18425</td>\n", + " <td>-113.45</td>\n", + " </tr>\n", + " <tr>\n", " <th>2</th>\n", + " <td>Ravenclaw</td>\n", + " <td>Stephany</td>\n", + " <td>Braun</td>\n", + " <td>1999-11-03</td>\n", + " <td>Left</td>\n", " <td>23702.0</td>\n", " <td>-366.076117</td>\n", " <td>7.725017</td>\n", @@ -118,392 +122,120 @@ " <td>30.42</td>\n", " </tr>\n", " <tr>\n", - " <th>14</th>\n", - " <td>44791.0</td>\n", - " <td>-197.527318</td>\n", - " <td>2.742444</td>\n", - " <td>1.975273</td>\n", - " <td>6.603</td>\n", - " <td>527.356323</td>\n", - " <td>605.590600</td>\n", - " <td>5.480097</td>\n", - " <td>1063.522361</td>\n", - " <td>9.407484</td>\n", - " <td>-1.112649</td>\n", - " <td>-232.65964</td>\n", - " <td>-19.94</td>\n", + " <th>3</th>\n", + " <td>Gryffindor</td>\n", + " <td>Vesta</td>\n", + " <td>Mcmichael</td>\n", + " <td>2000-08-19</td>\n", + " <td>Left</td>\n", + " <td>32667.0</td>\n", + " <td>697.742809</td>\n", + " <td>-6.497214</td>\n", + " <td>-6.977428</td>\n", + " <td>4.026</td>\n", + " <td>-537.001128</td>\n", + " <td>523.982133</td>\n", + " <td>-4.809637</td>\n", + " <td>920.391449</td>\n", + " <td>0.821911</td>\n", + " <td>-0.014040</td>\n", + " <td>-256.84675</td>\n", + " <td>200.64</td>\n", " </tr>\n", " <tr>\n", - " <th>15</th>\n", - " <td>21630.0</td>\n", - " <td>-447.649812</td>\n", - " <td>4.046727</td>\n", - " <td>4.476498</td>\n", - " <td>4.949</td>\n", - " <td>810.154483</td>\n", - " <td>615.531088</td>\n", - " <td>3.653495</td>\n", - " <td>1075.853850</td>\n", - " <td>9.622899</td>\n", - " <td>0.567096</td>\n", - " <td>-229.38229</td>\n", - " <td>17.00</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>15578.0</td>\n", - " <td>-151.088209</td>\n", - " <td>6.871857</td>\n", - " <td>1.510882</td>\n", - " <td>7.906</td>\n", - " <td>645.447451</td>\n", - " <td>607.442729</td>\n", - " <td>6.317446</td>\n", - " <td>1091.054831</td>\n", - " <td>11.708777</td>\n", - " <td>0.260556</td>\n", - " <td>-226.76896</td>\n", - " <td>11.22</td>\n", + " <th>5</th>\n", + " <td>Slytherin</td>\n", + " <td>Corrine</td>\n", + " <td>Hammond</td>\n", + " <td>1999-04-04</td>\n", + " <td>Right</td>\n", + " <td>21209.0</td>\n", + " <td>-613.687160</td>\n", + " <td>-4.289197</td>\n", + " <td>6.136872</td>\n", + " <td>-6.592</td>\n", + " <td>-440.997704</td>\n", + " <td>396.201804</td>\n", + " <td>5.380286</td>\n", + " <td>1052.845164</td>\n", + " <td>11.751212</td>\n", + " <td>1.049894</td>\n", + " <td>-247.94549</td>\n", + " <td>-34.69</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " Arithmancy Astronomy Herbology Defense Against the Dark Arts \\\n", - "0 58384.0 -487.886086 5.727180 4.878861 \n", - "2 23702.0 -366.076117 7.725017 3.660761 \n", - "14 44791.0 -197.527318 2.742444 1.975273 \n", - "15 21630.0 -447.649812 4.046727 4.476498 \n", - "18 15578.0 -151.088209 6.871857 1.510882 \n", + " house first_name last_name birthday best_hand arithmancy \\\n", + "0 Ravenclaw Tamara Hsu 2000-03-30 Left 58384.0 \n", + "1 Slytherin Erich Paredes 1999-10-14 Right 67239.0 \n", + "2 Ravenclaw Stephany Braun 1999-11-03 Left 23702.0 \n", + "3 Gryffindor Vesta Mcmichael 2000-08-19 Left 32667.0 \n", + "5 Slytherin Corrine Hammond 1999-04-04 Right 21209.0 \n", "\n", - " Divination Muggle Studies Ancient Runes History of Magic \\\n", - "0 4.722 272.035831 532.484226 5.231058 \n", - "2 6.140 664.893521 602.585284 3.555579 \n", - "14 6.603 527.356323 605.590600 5.480097 \n", - "15 4.949 810.154483 615.531088 3.653495 \n", - "18 7.906 645.447451 607.442729 6.317446 \n", + " astronomy herbology defense_against_the_dark_arts divination \\\n", + "0 -487.886086 5.727180 4.878861 4.722 \n", + "1 -552.060507 -5.987446 5.520605 -5.612 \n", + "2 -366.076117 7.725017 3.660761 6.140 \n", + "3 697.742809 -6.497214 -6.977428 4.026 \n", + "5 -613.687160 -4.289197 6.136872 -6.592 \n", "\n", - " Transfiguration Potions Care of Magical Creatures Charms Flying \n", - "0 1039.788281 3.790369 0.715939 -232.79405 -26.89 \n", - "2 1088.088348 8.728531 -0.515327 -227.34265 30.42 \n", - "14 1063.522361 9.407484 -1.112649 -232.65964 -19.94 \n", - "15 1075.853850 9.622899 0.567096 -229.38229 17.00 \n", - "18 1091.054831 11.708777 0.260556 -226.76896 11.22 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def house_scores_df(house_name):\n", - " house_filt = (df['Hogwarts House'] == house_name)\n", - " return df[house_filt].loc[:, 'Arithmancy':'Flying']\n", - "\n", - "houses_df = {key: house_scores_df(key) for key in df['Hogwarts House'].unique()}\n", - "houses_df['Ravenclaw'].head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", + " muggle_studies ancient_runes history_of_magic transfiguration \\\n", + "0 272.035831 532.484226 5.231058 1039.788281 \n", + "1 -487.340557 367.760303 4.107170 1058.944592 \n", + "2 664.893521 602.585284 3.555579 1088.088348 \n", + "3 -537.001128 523.982133 -4.809637 920.391449 \n", + "5 -440.997704 396.201804 5.380286 1052.845164 \n", "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>Arithmancy</th>\n", - " <th>Astronomy</th>\n", - " <th>Herbology</th>\n", - " <th>Defense Against the Dark Arts</th>\n", - " <th>Divination</th>\n", - " <th>Muggle Studies</th>\n", - " <th>Ancient Runes</th>\n", - " <th>History of Magic</th>\n", - " <th>Transfiguration</th>\n", - " <th>Potions</th>\n", - " <th>Care of Magical Creatures</th>\n", - " <th>Charms</th>\n", - " <th>Flying</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>0.070306</td>\n", - " <td>-0.005636</td>\n", - " <td>0.043022</td>\n", - " <td>0.005636</td>\n", - " <td>-0.015935</td>\n", - " <td>-0.117316</td>\n", - " <td>-0.163066</td>\n", - " <td>0.019460</td>\n", - " <td>-0.077827</td>\n", - " <td>-0.252914</td>\n", - " <td>0.124483</td>\n", - " <td>-0.056668</td>\n", - " <td>-0.074683</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>-0.197869</td>\n", - " <td>0.068820</td>\n", - " <td>0.166863</td>\n", - " <td>-0.068820</td>\n", - " <td>0.075970</td>\n", - " <td>0.103740</td>\n", - " <td>0.013395</td>\n", - " <td>-0.092922</td>\n", - " <td>0.281957</td>\n", - " <td>0.135497</td>\n", - " <td>-0.091151</td>\n", - " <td>0.126031</td>\n", - " <td>0.106128</td>\n", - " </tr>\n", - " <tr>\n", - " <th>14</th>\n", - " <td>-0.034800</td>\n", - " <td>0.171845</td>\n", - " <td>-0.141995</td>\n", - " <td>-0.171845</td>\n", - " <td>0.105978</td>\n", - " <td>0.026350</td>\n", - " <td>0.020960</td>\n", - " <td>0.036164</td>\n", - " <td>0.098966</td>\n", - " <td>0.188900</td>\n", - " <td>-0.195761</td>\n", - " <td>-0.052163</td>\n", - " <td>-0.052756</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>-0.213890</td>\n", - " <td>0.018958</td>\n", - " <td>-0.061146</td>\n", - " <td>-0.018958</td>\n", - " <td>-0.001222</td>\n", - " <td>0.185477</td>\n", - " <td>0.045982</td>\n", - " <td>-0.086354</td>\n", - " <td>0.190823</td>\n", - " <td>0.205844</td>\n", - " <td>0.098415</td>\n", - " <td>0.057674</td>\n", - " <td>0.063788</td>\n", - " </tr>\n", - " <tr>\n", - " <th>18</th>\n", - " <td>-0.260687</td>\n", - " <td>0.200230</td>\n", - " <td>0.113977</td>\n", - " <td>-0.200230</td>\n", - " <td>0.190430</td>\n", - " <td>0.092798</td>\n", - " <td>0.025622</td>\n", - " <td>0.092329</td>\n", - " <td>0.304054</td>\n", - " <td>0.369908</td>\n", - " <td>0.044731</td>\n", - " <td>0.145258</td>\n", - " <td>0.045552</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " Arithmancy Astronomy Herbology Defense Against the Dark Arts \\\n", - "0 0.070306 -0.005636 0.043022 0.005636 \n", - "2 -0.197869 0.068820 0.166863 -0.068820 \n", - "14 -0.034800 0.171845 -0.141995 -0.171845 \n", - "15 -0.213890 0.018958 -0.061146 -0.018958 \n", - "18 -0.260687 0.200230 0.113977 -0.200230 \n", - "\n", - " Divination Muggle Studies Ancient Runes History of Magic \\\n", - "0 -0.015935 -0.117316 -0.163066 0.019460 \n", - "2 0.075970 0.103740 0.013395 -0.092922 \n", - "14 0.105978 0.026350 0.020960 0.036164 \n", - "15 -0.001222 0.185477 0.045982 -0.086354 \n", - "18 0.190430 0.092798 0.025622 0.092329 \n", - "\n", - " Transfiguration Potions Care of Magical Creatures Charms Flying \n", - "0 -0.077827 -0.252914 0.124483 -0.056668 -0.074683 \n", - "2 0.281957 0.135497 -0.091151 0.126031 0.106128 \n", - "14 0.098966 0.188900 -0.195761 -0.052163 -0.052756 \n", - "15 0.190823 0.205844 0.098415 0.057674 0.063788 \n", - "18 0.304054 0.369908 0.044731 0.145258 0.045552 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def normalize_house_scores_df(house_scores_df):\n", - " return (house_scores_df - house_scores_df.mean()) / (house_scores_df.max() - house_scores_df.min())\n", - "\n", - "for k, v in houses_df.items():\n", - " houses_df[k] = normalize_house_scores_df(v)\n", - "houses_df['Ravenclaw'].head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([-0.65609001, -0.55609001, -0.45609001, -0.35609001, -0.25609001,\n", - " -0.15609001, -0.05609001, 0.04390999, 0.14390999, 0.24390999,\n", - " 0.34390999, 0.44390999, 0.54390999])" + " potions care_of_magical_creatures charms flying \n", + "0 3.790369 0.715939 -232.79405 -26.89 \n", + "1 7.248742 0.091674 -252.18425 -113.45 \n", + "2 8.728531 -0.515327 -227.34265 30.42 \n", + "3 0.821911 -0.014040 -256.84675 200.64 \n", + "5 11.751212 1.049894 -247.94549 -34.69 " ] }, - "execution_count": 4, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "scores_df = normalize_house_scores_df(df.loc[:, 'Arithmancy':'Flying'])\n", - "\n", - "bins = np.arange(scores_df.min().min(), scores_df.max().max(), 0.1)\n", - "bins" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xt8VNW5//HPQ4gCBouawEEQggoVIhAggtaGpiKIlSIqglQLSgv4A7FStT+tPRWttlr1QP2V0h9WilcuRUXqsRZEFBQRCUTKReXSeEyK3CJICkgCz/ljduIEcpkkM5MQv+/Xa16ZWXvtvZ41ITyz9l6ztrk7IiIijeo6ABERqR+UEEREBFBCEBGRgBKCiIgASggiIhJQQhAREUAJQUREAkoIIiICKCGIiEigcV0HUJXk5GRPTU2t6zBERE4Y2dnZu909pbr71fuEkJqayurVq+s6DBGRE4aZfVKT/XTKSEREACUEEREJKCGIiAhwAlxDEJETU1FREXl5eRw6dKiuQ2mwmjRpQtu2bUlMTIzK8ZQQRCQm8vLyaN68OampqZhZXYfT4Lg7e/bsIS8vjw4dOkTlmDplJCIxcejQIc444wwlgxgxM84444yojsCUEEQkZpQMYiva768SgoiIALqGICJxsuOL6F5cbnVqkyrrJCQk0LVrV4qLi+nQoQPPPPMMLVq0iGoc1ZGbm8ugQYNYv359ncVQmSoTgpnNBAYBO939/KBsLvDNoEoLYK+7p5tZKrAJ+CjYttLdbw726QXMApoCrwI/cXePWk9E4qxox86Yt5HYqmXM22jImjZtSk5ODgCjRo1i2rRp3HPPPXUcVf0VySmjWcDA8AJ3H+7u6e6eDrwAvBi2eWvJtpJkEJgOjAE6Bo8yxxQ50ewuPBTzh0TPRRddRH5+PgCFhYX069ePnj170rVrV15++WUA7rrrLqZNm1a6z+TJk3n00UcBeOSRR7jgggvo1q0b9957LxD6xN+5c2fGjBlDWloaAwYM4ODBgwBs2bKFSy+9lO7du9OzZ0+2bt1aJp7c3FwyMzPp2bMnPXv2ZMWKFQBMmDCBhQsXAnDVVVcxevRoAGbOnBnzZFZlQnD3ZUBBedssdEVjGDC7smOYWWvgVHdfGYwKngaGVD9cEZHqO3LkCEuWLGHw4MFAaP7+Sy+9xJo1a1i6dCm333477s7w4cOZN29e6X7z5s1j+PDhLFq0iM2bN7Nq1SpycnLIzs5m2bJlAGzevJkJEyawYcMGWrRowQsvvADA9ddfz4QJE/jggw9YsWIFrVu3LhNTy5YtWbx4MWvWrGHu3LnceuutAGRmZrJ8+XIA8vPz2bhxIwDLly+nb9++MX2fansNIRPY4e6bw8o6mNla4AvgF+6+HGgD5IXVyQvKRERi5uDBg6Snp5Ofn0/nzp3p378/EJrD//Of/5xly5bRqFEj8vPz2bFjBz169GDnzp3861//YteuXZx22mmcddZZ/O53v2PRokX06NEDCI0wNm/eTLt27ejQoQPp6ekA9OrVi9zcXPbv309+fj5XXXUVEEpAxyoqKuKWW24hJyeHhIQEPv74YyCUEKZOncrGjRvp0qULn3/+Odu3b+fdd9/l8ccfj+n7VduEMIKyo4PtQDt33xNcM1hgZmnVPaiZjQXGArRr166WIYrI11XJNYQDBw5w2WWXMW3aNG699Vaee+45du3aRXZ2NomJiaSmppbO57/22muZP38+n332GcOHDwdCCeTuu+9m3LhxZY6fm5vLySefXPo6ISGh9JRRVaZMmUKrVq344IMPOHr0aGnSaNOmDXv37uW1116jb9++FBQUMG/ePJKSkmjevHk03pYK1XjaqZk1Bq4G5paUufuX7r4neJ4NbAU6AflA27Dd2wZl5XL3Ge6e4e4ZKSnVXtJbRKSMZs2a8fjjj/PYY49RXFzMvn37aNmyJYmJiSxdupRPPvlqtejhw4czZ84c5s+fz7XXXgvAZZddxsyZMyksLARCp3J27qx4UkHz5s1p27YtCxYsAODLL7/kwIEDZers27eP1q1b06hRI5555hmOHDlSuu3CCy9k6tSp9O3bl8zMTB599FEyMzOj9n5UpDYjhEuBD9299FSQmaUABe5+xMzOJnTxeJu7F5jZF2Z2IfAeMBL4f7UJXEROLJFME42lHj160K1bN2bPns3111/P97//fbp27UpGRgbnnXdeab20tDT2799PmzZtSs/7DxgwgE2bNnHRRRcBkJSUxLPPPktCQkKF7T3zzDOMGzeOX/7ylyQmJvKXv/yFRo2++gw+fvx4rrnmGp5++mkGDhzIKaecUrotMzOTRYsWce6559K+fXsKCgrikhCsqpmfZjYbyAKSgR3Ave7+pJnNIjSt9I9hda8B7geKgKNB3b8G2zL4atrp34CJkUw7zcjIcN0gR+qj7Vv/J+ZttD7nxD1lumnTJjp37lzXYTR45b3PZpbt7hnVPVaVIwR3H1FB+Y3llL1AaBpqefVXA+dXMz4REYkTLV0hIiKAEoKIiASUEEREBFBCEBGRgBKCiIgASggiEi9fbI/uI0IPPvggaWlpdOvWjfT0dN577z2ysrKobDr73r17+cMf/lD6+s0332TQoEG16v4f//hHnn766VodI9Z0PwQRabDeffddXnnlFdasWcPJJ5/M7t27OXz4cJX7lSSE8ePHRyWO4uJibr755qor1jElBBFpsLZv305ycnLpekPJycllts+cOZN169YxdepUAJ544gk2btzI9u3b2bp1K+np6fTv358rrriCwsJChg4dyvr16+nVqxfPPvssZkZ2djY//elPKSwsJDk5mVmzZtG6dWuysrJIT0/n7bffZsSIEezfv5+kpCTuuOMOsrKy6NOnD0uXLmXv3r08+eSTcfkmclV0ykhEGqwBAwbw6aef0qlTJ8aPH89bb71VZvuwYcP461//SlFREQB//vOfGT16NA899BDnnHMOOTk5PPLIIwCsXbu2dBXSbdu28c4771BUVMTEiROZP38+2dnZjB49usw9Cw4fPszq1au5/fbbj4utuLiYVatWMXXqVO67774YvguR0whBRBqspKQksrOzWb58OUuXLmX48OE89NBDZbZfcsklvPLKK3Tu3JmioiK6du1Kbm7uccfq3bs3bduG1uhMT08nNzeXFi1asH79+tJltY8cOVLmvgclq6WW5+qrrwa+WjK7PlBCEJEGLSEhgaysLLKysujatStPPfVUme0//vGP+fWvf815553HTTfdVOFxjl3muri4GHcnLS2Nd999t9x9whesq+h4JceqD3TKSEQarI8++ojNm7+6f1dOTg7t27cvU6dPnz58+umnPP/884wYEVq6rXnz5uzfv7/K43/zm99k165dpQmhqKiIDRs2RLEH8aURgojEx6mtq64TZYWFhUycOJG9e/fSuHFjzj33XGbMmMHQoUPL1Bs2bBg5OTmcdtppAJxxxhlcfPHFnH/++Vx++eVcccUV5R7/pJNOYv78+dx6663s27eP4uJibrvtNtLSqn1fsHqhyuWv65qWv5b6SstfV+5EWv560KBBTJo0iX79+tV1KNUWzeWvdcpIRL629u7dS6dOnWjatOkJmQyiTaeMRORrq0WLFqU3txeNEEREJKCEICIigBKCiIgElBBERASIICGY2Uwz22lm68PKJptZvpnlBI/vhW2728y2mNlHZnZZWPnAoGyLmd0V/a6ISH2288DOqD4itWPHDn7wgx9w9tln06tXLy666CJeeumlasU+YsQIunXrxpQpU/jwww9JT0+nR48ebN26lW9961vVOlY0ltKOlUhGCLOAgeWUT3H39ODxKoCZdQGuA9KCff5gZglmlgBMAy4HugAjgroiIjHj7gwZMoS+ffuybds2srOzmTNnDnl5eWXqVbZ0xGeffcb777/PunXrmDRpEgsWLGDo0KGsXbuWc845hxUrVsS0D/Fc1qLKaafuvszMUiM83pXAHHf/EvinmW0Begfbtrj7NgAzmxPU3VjtiEVEIvTGG29w0kknlbkXQfv27Zk4cSKzZs3ixRdfpLCwkCNHjtC+fXuuvvpqhgwZAsD111/PsGHD+M///E/y8/NJT0/nqquuYvr06SQkJLBkyRKWLl1KUlIShYWFvPnmm0yePJnk5OTjlsh+7bXXuO2222jWrBnf/va3S2MpKChg9OjRbNu2jWbNmjFjxgy6devG5MmT2bp1K9u2baNdu3bMnj07Lu9Xbb6HcIuZjQRWA7e7++dAG2BlWJ28oAzg02PK+9SibRGRKm3YsIGePXtWuH3NmjWsW7eO008/nbfeeospU6YwZMgQ9u3bx4oVK3jqqafo3r07gwYNIicnBwiNOkrua3CstWvXsmHDBs4880wuvvhi3nnnHTIyMhgzZgxvvPEG5557bpkVUO+991569OjBggULeOONNxg5cmRpOxs3buTtt9+madOmUX5XKlbTi8rTgXOAdGA78FjUIgLMbKyZrTaz1bt27YrmoUXka2zChAl0796dCy64AID+/ftz+umnA/Cd73yHzZs3s2vXLmbPns0111xD48bV+8xcskR2o0aNSpfI/vDDD+nQoQMdO3bEzLjhhhtK67/99tv88Ic/BOCSSy5hz549fPHFFwAMHjw4rskAapgQ3H2Hux9x96PAE3x1WigfOCusatugrKLyio4/w90z3D0jJSWlJiGKiJCWlsaaNWtKX0+bNo0lS5ZQ8kHz2OWpR44cybPPPlt6o5zqKm+J7JqqbOnsWKlRQjCz8GULrwJKZiAtBK4zs5PNrAPQEVgFvA90NLMOZnYSoQvPC2setohI1S655BIOHTrE9OnTS8sOHDhQYf0bb7yx9HaaXbpEZ97LeeedR25uLlu3bgUocz0gMzOT5557DgjNPkpOTubUU0+NSrs1UeV4yMxmA1lAspnlAfcCWWaWDjiQC4wDcPcNZjaP0MXiYmCCux8JjnML8HcgAZjp7ifuouEiUm0tm7WMe5tmxoIFC5g0aRK//e1vSUlJ4ZRTTuHhhx/m4MGDx9Vv1aoVnTt3Lr2wHA1NmjRhxowZXHHFFTRr1ozMzMzSey1MnjyZ0aNH061bN5o1a3bczXviTctfi9SQlr+u3Im0/HWJAwcO0LVrV9asWcM3vvGNug4nIlr+WkQkyl5//XU6d+7MxIkTT5hkEG1a/lpEBLj00kv55JNP6jqMOqURgoiIAEoIIiISUEIQERFACUFERAJKCCISF0U7dkb1EYmkpKQyr2fNmsUtt9xS5X533nknaWlp3HnnnezatYs+ffrQo0cPli9fTmpqKrt3767Re1CZypbYjhfNMhIROcaMGTMoKCggISGBOXPm0LVrV/70pz/FrL2SJba3bNkCwEMPPcTQoUP5xS9+EbM2y6MRgoh8Ld14443Mnz+/9HXJaGLw4MEUFhbSq1cvHn74YX72s5/x8ssvk56efty3m5999ll69+5Neno648aN48iRI6XHmjRpEmlpafTr16907aSsrCxKvmi7e/duUlNTARgwYEDpEtv33XcfU6dOZfr06Xz3u9+N9dtQhkYIItJgHTx4kPT09NLXBQUFDB48uNJ9Fi5cSFJSUuky1K1atWL16tX8/ve/L1Nv06ZNzJ07l3feeYfExETGjx/Pc889x8iRI/n3v/9NRkYGU6ZM4f777+e+++47bv9j24x0ie1YUkIQkQaradOmpf/JQugaQrSWwlmyZAnZ2dmlS2kfPHiQli1D6zU1atSo9L4HN9xwA1dffXVU2ow1JQQR+Vpq3LgxR48eBeDo0aMcPny4Wvu7O6NGjeI3v/lNlXXN7Lg2Dx06VM2IY0/XEETkayk1NZXs7GwgdMqmqKioWvv369eP+fPns3NnaMZTQUFB6dIXR48eLb0+8fzzz5feNjO8zfDrF/WFRggiEheJreK//HVlxowZw5VXXkn37t0ZOHBgtW9I06VLFx544AEGDBjA0aNHSUxMZNq0abRv355TTjmFVatW8cADD9CyZUvmzp0LwB133MGwYcNKl8Oub7T8tUgNafnryp2Iy19HS1JSEoWFhXFpS8tfi4hI1CkhiIhEWbxGB9GmhCAiMVPfT0mf6KL9/iohiEhMNGnShD179igpxIi7s2fPHpo0aRK1Y2qWkYjERNu2bcnLyytdtkGir0mTJrRt2zZqx6syIZjZTGAQsNPdzw/KHgG+DxwGtgI3ufteM0sFNgEfBbuvdPebg316AbOApsCrwE9cHx1EGqzExEQ6dOhQ12FINURyymgWMPCYssXA+e7eDfgYuDts21Z3Tw8eN4eVTwfGAB2Dx7HHFBGROlRlQnD3ZUDBMWWL3L04eLkSqHTMYmatgVPdfWUwKngaGFKzkEVEJBaicVF5NPC3sNcdzGytmb1lZplBWRsgL6xOXlAmIiL1RK0uKpvZPUAx8FxQtB1o5+57gmsGC8wsrQbHHQuMBWjX7sT9pqaIyImkxiMEM7uR0MXm60suDrv7l+6+J3ieTeiCcycgn7KnldoGZeVy9xnunuHuGSkpKTUNUUREqqFGCcHMBgI/Awa7+4Gw8hQzSwien03o4vE2d98OfGFmF1poHdiRwMu1jl5ERKImkmmns4EsINnM8oB7Cc0qOhlYHKzzXTK9tC9wv5kVAUeBm9295IL0eL6advo3yl53EBGROlZlQnD3EeUUP1lB3ReAFyrYtho4v1rRiYhI3GjpChERAZQQREQkoIQgIiKAFrcTqdeKduyMSzv17faWUjc0QhAREUAJQUREAkoIIiICKCGIiEhACUFERADNMpIGKl6zc0QaEiUEkRoqOFRQdaVaSk46M+ZtiJRQQhCpx3YXHopLO61bxaUZqed0DUFERAAlBBERCSghiIgIoIQgIiIBJQQREQGUEEREJKCEICIigBKCiIgEIkoIZjbTzHaa2fqwstPNbLGZbQ5+nhaUm5k9bmZbzGydmfUM22dUUH+zmY2KfndERKSmIh0hzAIGHlN2F7DE3TsCS4LXAJcDHYPHWGA6hBIIcC/QB+gN3FuSREREpO5FlBDcfRlw7MItVwJPBc+fAoaElT/tISuBFmbWGrgMWOzuBe7+ObCY45OMiIjUkdpcQ2jl7tuD558BJauhtAE+DauXF5RVVC4iIvVAVC4qu7sDHo1jAZjZWDNbbWard+3aFa3DiohIJWqTEHYEp4IIfpYsQJ8PnBVWr21QVlH5cdx9hrtnuHtGSkpKLUIUEZFI1SYhLARKZgqNAl4OKx8ZzDa6ENgXnFr6OzDAzE4LLiYPCMpERKQeiOh+CGY2G8gCks0sj9BsoYeAeWb2I+ATYFhQ/VXge8AW4ABwE4C7F5jZr4D3g3r3u3vs7zAiIiIRiSghuPuICjb1K6euAxMqOM5MYGbE0YmISNzom8oiIgIoIYiISEAJQUREACUEEREJRHRRWUSO1+jA7tg30uT02LchEtAIQUREACUEEREJKCGIiAighCAiIgFdVJYGaXfhoboOQeSEoxGCiIgASggiIhJQQhAREUAJQUREAkoIIiICKCGIiEhACUFERAAlBBERCSghiIgIoIQgIiKBGi9dYWbfBOaGFZ0N/BJoAYwBdgXlP3f3V4N97gZ+BBwBbnX3v9e0fZHKFBwqiHkb+jQlDU2NE4K7fwSkA5hZApAPvATcBExx90fD65tZF+A6IA04E3jdzDq5+5GaxiAiItETrQ85/YCt7v5JJXWuBOa4+5fu/k9gC9A7Su2LiEgtRSshXAfMDnt9i5mtM7OZZnZaUNYG+DSsTl5QJiIi9UCtl782s5OAwcDdQdF04FeABz8fA0ZX85hjgbEA7dq1q22IIieseFwLAWiN/s4kOiOEy4E17r4DwN13uPsRdz8KPMFXp4XygbPC9msblB3H3We4e4a7Z6SkpEQhRBERqUo0EsIIwk4XmVnrsG1XAeuD5wuB68zsZDPrAHQEVkWhfRERiYJanTIys1OA/sC4sOLfmlk6oVNGuSXb3H2Dmc0DNgLFwATNMBIRqT9qlRDc/d/AGceU/bCS+g8CD9amTRERiQ19t0ZERAAlBBERCSghiIgIoIQgIiIBJQQREQGUEEREJKCEICIiQBTWMhKpjxod2F3XIYiccDRCEBERQAlBREQCOmUkUo/p1JfEk0YIIiICKCGIiEhACUFERAAlBBERCSghiIgIoIQgIiIBJQQREQGUEEREJKCEICIiQBQSgpnlmtk/zCzHzFYHZaeb2WIz2xz8PC0oNzN73My2mNk6M+tZ2/ZFRCQ6ojVC+K67p7t7RvD6LmCJu3cElgSvAS4HOgaPscD0KLUvIiK1FKu1jK4EsoLnTwFvAv83KH/a3R1YaWYtzKy1u2+PURwiEoGiHTtj3kZiq5Yxb0NqJxojBAcWmVm2mY0NylqF/Sf/GdAqeN4G+DRs37ygTERE6lg0Rgjfdvd8M2sJLDazD8M3urubmVfngEFiGQvQrl27KIQoIiJVqfUIwd3zg587gZeA3sAOM2sNEPwsGY/mA2eF7d42KDv2mDPcPcPdM1JSUmobooiIRKBWCcHMTjGz5iXPgQHAemAhMCqoNgp4OXi+EBgZzDa6ENin6wciIvVDbU8ZtQJeMrOSYz3v7q+Z2fvAPDP7EfAJMCyo/yrwPWALcAC4qZbti4hIlNQqIbj7NqB7OeV7gH7llDswoTZtiohIbOibyiIiAighiIhIIFZfTBMpVzy+ACUiNaMRgoiIAEoIIiISUEIQERFACUFERAJKCCIiAighiIhIQAlBREQAJQQREQkoIYiICKCEICIiAS1dISLsLjwU8zZat6q6jtQtjRBERARQQhARkYASgoiIAEoIIiISUEIQERFACUFERAI1TghmdpaZLTWzjWa2wcx+EpRPNrN8M8sJHt8L2+duM9tiZh+Z2WXR6ICIiERHbb6HUAzc7u5rzKw5kG1mi4NtU9z90fDKZtYFuA5IA84EXjezTu5+pBYxiEgUFBwqiHkbrWkX8zakdmqcENx9O7A9eL7fzDYBbSrZ5Upgjrt/CfzTzLYAvYF3axqDnHji8QUoEamZqHxT2cxSgR7Ae8DFwC1mNhJYTWgU8TmhZLEybLc8Kk8gIhInjQ7srusQpB6odUIwsyTgBeA2d//CzKYDvwI8+PkYMLqaxxwLjAVo107DzIYkHqcmQLMlRGqiVn83ZpZIKBk85+4vArj7Dnc/4u5HgScInRYCyAfOCtu9bVB2HHef4e4Z7p6RkpJSmxBFRCRCtZllZMCTwCZ3/6+w8tZh1a4C1gfPFwLXmdnJZtYB6Aisqmn7IiISXbU5ZXQx8EPgH2aWE5T9HBhhZumEThnlAuMA3H2Dmc0DNhKaoTRBM4y+fnSuWqT+qs0so7cBK2fTq5Xs8yDwYE3bFBGR2NG1NxERAZQQREQkoIQgIiKAEoKIiASUEEREBFBCEBGRgBKCiIgASggiIhKIymqnIiJVKdqxM+ZtJLZqGfM2GjKNEEREBNAIQcLE4xOciNRfGiGIiAighCAiIgElBBERAXQNQUTipXBH7NvQLKNaUUKQr8TjD1a+tvYc+jzmbfxHzFto2HTKSEREAI0QJMyWAo0QJHY+P3A45m1ohFA7GiGIiAighCAiIoG4nzIys4HA74AE4E/u/lC8YzgR6VvEIlUr2vqPuLSTeE7XuLQTb3FNCGaWAEwD+gN5wPtmttDdN8YzjhOSZgCJVGnPv2N/nQKg0YHYf0Br2Sz+U2jjPULoDWxx920AZjYHuBKISULY8cWhWBy2ThTogq9IlT7/MvZTWwHO4Ky4tBNv8U4IbYBPw17nAX3iHENUFXz0dl2HICJxtmfDupi30fKCS2PexrHq5bRTMxsLjA1eFprZR3EOIRnYHec2Y6Wh9KWh9APUl/qqofQlGWhfkx3jnRDyocxYq21QVoa7zwBmxCuoY5nZanfPqKv2o6mh9KWh9APUl/qqofQl6EdqTfaN97TT94GOZtbBzE4CrgMWxjkGEREpR1xHCO5ebGa3AH8nNO10prtviGcMIiJSvrhfQ3D3V4FX491uNdXZ6aoYaCh9aSj9APWlvmoofalxP8zdoxmIiIicoLR0hYiIAEoIAJjZ6Wa22Mw2Bz9Pq6BeOzNbZGabzGyjmaXGN9KqRdqXoO6pZpZnZr+PZ4yRiKQfZpZuZu+a2QYzW2dmw+si1oqY2UAz+8jMtpjZXeVsP9nM5gbb36uP/55KRNCXnwZ/E+vMbImZ1WjaY6xV1Y+weteYmZtZvZ11FElfzGxY8HvZYGbPV3lQd//aP4DfAncFz+8CHq6g3ptA/+B5EtCsrmOvaV+C7b8Dngd+X9dx16QfQCegY/D8TGA70KKuYw/iSQC2AmcDJwEfAF2OqTMe+GPw/Dpgbl3HXYu+fLfk7wH4P/WxL5H0I6jXHFgGrAQy6jruWvxOOgJrgdOC1y2rOq5GCCFXAk8Fz58Chhxbwcy6AI3dfTGAuxe6+4H4hRixKvsCYGa9gFbAojjFVV1V9sPdP3b3zcHzfwE7gZS4RVi50mVa3P0wULJMS7jwPs4H+pmZxTHGSFXZF3dfGvb3sJLQd4zqm0h+JwC/Ah4G6vPaN5H0ZQwwzd0/B3D3KhdgUkIIaeXu24PnnxH6j/JYnYC9Zvaima01s0eCxfrqmyr7YmaNgMeAO+IZWDVF8jspZWa9CX1S2hrrwCJU3jItbSqq4+7FwD7gjLhEVz2R9CXcj4C/xTSimqmyH2bWEzjL3f87noHVQCS/k05AJzN7x8xWBitNV6peLl0RC2b2OuXfUOme8Bfu7mZW3tSrxkAm0AP4H2AucCPwZHQjrVoU+jIeeNXd8+ryA2kU+lFynNbAM8Aodz8a3SilOszsBiAD+E5dx1JdwQel/yL0d90QNCZ02iiL0IhtmZl1dfe9le3wteDuFa4UZWY7zKy1u28P/nMpb2iVB+T4Vyu1LgAupA4SQhT6chGQaWbjCV0LOcnMCt29wotssRCFfmBmpwL/Ddzj7itjFGpNRLJMS0mdPDNrDHwD2BOf8KoloiVnzOxSQsn8O+7+ZZxiq46q+tEcOB94M/ig9B/AQjMb7O6r4xZlZCL5neQB77l7EfBPM/uYUII7jJ+8AAABO0lEQVR4v6KD6pRRyEJgVPB8FPByOXXeB1qYWck56kuI0bLdtVRlX9z9endv56H1Tu4Ano53MohAlf0Ilj95iVD88+MYWyQiWaYlvI9DgTc8uPpXz1TZFzPrAfx/YHAk56rrSKX9cPd97p7s7qnB38ZKQv2pb8kAIvv3tYDQ6AAzSyZ0CmlbpUet66vl9eFB6LztEmAz8DpwelCeQeiubiX1+gPrgH8As4CT6jr2mvYlrP6N1M9ZRlX2A7gBKAJywh7pdR17WB++B3xM6LrGPUHZ/YT+kwFoAvwF2AKsAs6u65hr0ZfXgR1hv4eFdR1zTfpxTN03qaezjCL8nRihU2Abg/+zrqvqmPqmsoiIADplJCIiASUEEREBlBBERCSghCAiIoASgoiIBJQQREQEUEIQEZGAEoKIiADwv//QWuBycVMmAAAAAElFTkSuQmCC\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "houses_scores_series = {k: pd.Series() for k in houses_df.keys()}\n", - "for k, v in houses_df.items():\n", - " for col in houses_df[k]:\n", - " houses_scores_series[k] = houses_scores_series[k].append(houses_df[k][col])\n", - " \n", - "#print(houses_scores_series)\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", "\n", - "for k, v in houses_scores_series.items():\n", - " plt.hist(v, bins, histtype='bar', rwidth=1, alpha=0.1, label=k)\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAE1hJREFUeJzt3X+QnWV1wPHvCQQQBAIkxpgfxIzBlqltoDsRsY4GpEVaxRkBqYjRoc1MgY4t7SjWP2p/jdJOpVgEmymOwdECUpGgWIkhjFIaSBgwCFRYUimJ+QWGECYmZMnpH/eNXtMN9+7d++69u8/3M7Oz7/vc9557ntnsydnnvvtsZCaSpLJM6nUCkqSxZ/GXpAJZ/CWpQBZ/SSqQxV+SCmTxl6QCWfwlqUAWf0kqkMVfkgp0aK8TAJg6dWrOnTu312lI0rjy4IMPPpuZ0zp5bl8U/7lz57J27dpepyFJ40pEPN3pc132kaQCWfwlqUAWf0kqkMVfkgpk8ZekAln8JalAFn9JKpDFX5IKZPGXpAL1xW/4SoIH7lhfW+yF755XW2yNT211/hHx44h4JCIejoi11djxEbEiIp6sPh9XjUdEfC4iBiNiXUScWucEJEkjN5Jln0WZuSAzB6rzK4GVmTkfWFmdA7wLmF99LAGu71aykqTuGM2a/7nAsup4GfDepvEbs2E1MCUiZozidSRJXdZu8U/groh4MCKWVGPTM3NTdbwZmF4dzwSeaXruhmrsl0TEkohYGxFrt23b1kHqkqROtfuG729l5saIeA2wIiL+u/nBzMyIyJG8cGYuBZYCDAwMjOi5kqTRaavzz8yN1eetwG3AQmDL/uWc6vPW6vKNwOymp8+qxiRJfaJl8Y+IoyLi6P3HwG8DPwSWA4uryxYDt1fHy4EPVXf9nAbsaFoekiT1gXaWfaYDt0XE/uu/mpn/ERFrgFsi4hLgaeCC6vo7gXOAQWAX8JGuZy1JGpWWxT8z1wO/Mcz4c8CZw4wncFlXspMk1cLtHSSpQBZ/SSqQxV+SCmTxl6QCWfwlqUAWf0kqkMVfkgpk8ZekAln8JalAFn9JKpDFX5IKZPGXpAJZ/CWpQBZ/SSpQu3/GUVLN9ky+scbon6oxtsYjO39JKpDFX5IKZPGXpAJZ/CWpQBZ/SSqQxV+SCuStnlKfeG7n5l6noILY+UtSgSz+klQgi78kFcjiL0kFsvhLUoEs/pJUIIu/JBXI4i9JBbL4S1KB2i7+EXFIRDwUEd+szl8fEfdHxGBE3BwRh1Xjh1fng9Xjc+tJXZLUqZF0/h8FHm86vwq4OjPfAGwHLqnGLwG2V+NXV9dJkvpIW3v7RMQs4HeBvwOuiIgAzgA+UF2yjMbfibseOJdf/M24W4FrIyIyM7uXttRw39e+Ulvs08+/qLbYUq+12/n/E/AxYF91fgLwfGYOVecbgJnV8UzgGYDq8R3V9ZKkPtGy+EfE7wFbM/PBbr5wRCyJiLURsXbbtm3dDC1JaqGdzv+twHsi4sfATTSWe64BpkTE/mWjWcDG6ngjMBugevxY4LkDg2bm0swcyMyBadOmjWoSkqSRaVn8M/MTmTkrM+cCFwJ3Z+ZFwCrgvOqyxcDt1fHy6pzq8btd75ek/jKa+/w/TuPN30Eaa/o3VOM3ACdU41cAV44uRUlSt43oL3ll5j3APdXxemDhMNfsBs7vQm6SpJr4G76SVCCLvyQVyOIvSQWy+EtSgSz+klQgi78kFWhEt3pKpbp6xRO1xf7Ts06qLbZ0MHb+klQgi78kFcjiL0kFsvhLUoEs/pJUIIu/JBXI4i9JBbL4S1KB/CUvjWt3vPT92mKfzkW1xZZ6zc5fkgpk5y8V5rqHr6st9qULLq0ttrrLzl+SCmTxl6QCWfwlqUAWf0kqkG/4Sm1Yvf65XqcgdZWdvyQVyOIvSQVy2Ue12bHi6dpiH3vWibXFlkpg5y9JBbL4S1KBLP6SVCCLvyQVyOIvSQXybh+pDeft/HKN0d9SY2xpeC07/4g4IiIeiIgfRMSjEfFX1fjrI+L+iBiMiJsj4rBq/PDqfLB6fG69U5AkjVQ7nf8e4IzMfDEiJgP3RsS3gSuAqzPzpoj4AnAJcH31eXtmviEiLgSuAt5fU/7qY7seeKC22N7nL41Oy+KfmQm8WJ1Orj4SOAP4QDW+DPgUjeJ/bnUMcCtwbUREFUdSAbb987W1xp/2x5fXGr8Eba35R8QhwIPAG4DPA08Bz2fmUHXJBmBmdTwTeAYgM4ciYgdwAvDsATGXAEsA5syZM7pZSBPAC7v39joFFaSt4p+ZLwMLImIKcBvwK6N94cxcCiwFGBgY8KcC9bV7jqxvq4rza4ssHdyI7vbJzOcjYhWN2xOmRMShVfc/C9hYXbYRmA1siIhDgWMB98PtJ6s+XV/sRZ+oL7a64o2PvKa+4AvqC63uaudun2lVx09EvAo4C3gcWAWcV122GLi9Ol5enVM9frfr/ZLUX9rp/GcAy6p1/0nALZn5zYh4DLgpIv4WeAi4obr+BuDLETEI/BS4sIa8JUmj0M7dPuuAU4YZXw8sHGZ8Ny5jSlJfc3sHSSqQxV+SCuTePqrN5h0/rS32jNoiF2DyrTUGP6/1JeoLdv6SVCCLvyQVyOIvSQVyzV/j2jPbf9brFKRxyc5fkgpk5y+p+378/ZpfwC2dR8vOX5IKZPGXpAJZ/CWpQK75S33i7q2/WlvsD9UWWeOVnb8kFcjOXyrM83uHWl+kCc/iL7VhD/4xOk0sFn+NaxuHjuh1CtK45Jq/JBXIzl8qzdBhvc5AfcDOX5IKZPGXpAK57FOgbd98uLbY0xbVFnpYOw4fozd893m3jyYWO39JKpDFX5IKZPGXpAJZ/CWpQBZ/SSqQd/tIfeKpw2f2OgUVxM5fkgpk5y+1IXPfGLzGy7W/hrSfnb8kFahl8Y+I2RGxKiIei4hHI+Kj1fjxEbEiIp6sPh9XjUdEfC4iBiNiXUScWvckJEkj007nPwT8WWaeDJwGXBYRJwNXAiszcz6wsjoHeBcwv/pYAlzf9awlSaPScs0/MzcBm6rjnRHxODATOBd4R3XZMuAe4OPV+I2ZmcDqiJgSETOqOJIKMDRpYa9TUAsjWvOPiLnAKcD9wPSmgr4ZmF4dzwSeaXrahmpMktQn2r7bJyJeDfw78CeZ+UJE/PyxzMyIGNG2hxGxhMayEHPmzBnJUzVKa3ZvqS32ObVFloax6tP1xV70ifpi94G2Ov+ImEyj8H8lM79eDW+JiBnV4zOArdX4RmB209NnVWO/JDOXZuZAZg5Mmzat0/wlSR1o2flHo8W/AXg8Mz/b9NByYDHwmerz7U3jl0fETcCbgR2u9xdq945eZyDpINpZ9nkrcDHwSETs/ysgf0Gj6N8SEZcATwMXVI/dSeOn/0FgF/CRrmYsSRq1du72uReIgzx85jDXJ3DZKPOS2nLq0OO9TkEal/wNX0kqkMVfkgrkxm6qzV1zltcW+xQ+VltsqQQWf6kNkyYN9ToFqatc9pGkAln8JalAFn9JKpBr/lJhvjXlrbXFfl9tkdVtFn9JXfeToRdrjT+j1uhlsPhLhYl9I9qAVxOUa/6SVCA7/wLNP9JNVqXS2flLUoHs/FWbLa86otcpaBjrN07pdQrqAxb/Ah11yNxepyCpx1z2kaQCWfwlqUAu+0jquhmH39frFNSCnb8kFcjOX1LX7d1nX9nvLP59ZNWqVbXFXrRoUW2xi5BuiaCJxeKv2uwail6nIOkg/NlMkgpk569xbRL+dFGy655fV1vsS2uL3B8s/lJhfPtC4LKPJBXJzl+1scGU+pfFX9K4tePZLb1OYdxy2UeSCmTxl6QCuezTR9ZsXlNb7EX4G76SfsHOX5IK1LL4R8QXI2JrRPywaez4iFgREU9Wn4+rxiMiPhcRgxGxLiJOrTN5SVJn2un8vwScfcDYlcDKzJwPrKzOAd4FzK8+lgDXdydNSVI3tSz+mfk94KcHDJ8LLKuOlwHvbRq/MRtWA1MiYka3kpUkdUenb/hOz8xN1fFmYHp1PBN4pum6DdXYJg4QEUto/HTAnDlzOkxjYplx1NpepyCpEKN+wzczkw5+mTMzl2bmQGYOTJs2bbRpqFCxL2v7kCayTjv/LRExIzM3Vcs6W6vxjcDsputmVWOSCvLo1BNqjX9irdHL0GnnvxxYXB0vBm5vGv9QddfPacCOpuUhSVKfaNn5R8S/Ae8ApkbEBuAvgc8At0TEJcDTwAXV5XcC5wCDwC7gIzXkPHHtnNzrDCQVomXxz8zfP8hDZw5zbQKXjTapfvTAHetri73w3fNqiy1Jw3F7B0nj1ssv7Ox1CuOWxV/qE+FfQNAYsvgX6JETptYW+3W1RZ74fnb4vTVGv7jG2BqP3NhNkgpk5y+p61YctbnW+OfUGr0MFv827Zl8Y43RP1Vj7P/vOzV+Y/5ObZEldZPLPpJUIDt/qU9Myuh1CiqInb8kFcjOv0B7bTCl4tn5S1KBLP6SVCCXffrJ0O4xeZl0FwHVLCfv6XUKasHOX5IKZOcvqev2eVNB37P4F2jPkD/wSaWz+LfpuZ317lUijZ363/TZvce/StfvLP4Fikkv9zoFST3mz/+SVCA7f0nj1xjdHn3f175SW+zTz7+ottivxM5fkgpk599Hfn37U71OQRpXJh9y7Ji8zh0vfb+22KfTm87f4q9xbV/468pSJyz+fWTSkIVspLxzSeqMa/6SVKBx3/mvX39NbbHnzftobbGHs/7IebXFflNtkaXe2fyqI3qdwrhl5y9JBRr3nf9Y2fnSzl6noAluwd4nep2CCmLxl9rgJpX9adeQX5lOWfylwuw64t4ao19cY2x107gv/huf2F5b7Hn1vf86rO0vTKw3r9zTXepftRT/iDgbuAY4BPjXzPxMHa+j/uY9+KqbDUbnul78I+IQ4PPAWcAGYE1ELM/Mx7r9WhPNfa/eUlvsD9cWWZr4jtk18d6Mr6PzXwgMZuZ6gIi4CTgXsPhLGp9+tqPXGXRdHcV/JvBM0/kG4M01vA4A1+2eWldo3tZ0PGV9fe8tSGNpwd4ne51C14zV0uKze48Zk9cZS5HZ3f1kIuI84OzM/IPq/GLgzZl5+QHXLQGWVKdvBH7U1URe2VTg2TF8vbpNpPk4l/7kXPrTGzPz6E6eWEfnvxGY3XQ+qxr7JZm5FFhaw+u3FBFrM3OgF69dh4k0H+fSn5xLf4qItZ0+t47tHdYA8yPi9RFxGHAhsLyG15EkdajrnX9mDkXE5cB3aNzq+cXMfLTbryNJ6lwt9/ln5p3AnXXE7pKeLDfVaCLNx7n0J+fSnzqeS9ff8JUk9T+3dJakAhVR/CPi+IhYERFPVp+PO8h1cyLiroh4PCIei4i5Y5tpe9qdT3XtMRGxISKuHcsc29XOXCJiQUT8V0Q8GhHrIuL9vcj1YCLi7Ij4UUQMRsSVwzx+eETcXD1+f7/+u4K25nJF9b2xLiJWRsSJvcizHa3m0nTd+yIiI6Jv7wBqZy4RcUH1tXk0Ir7aMmhmTvgP4O+BK6vjK4GrDnLdPcBZ1fGrgSN7nfto5lM9fg3wVeDaXufd6VyAk4D51fHrgE3AlF7nXuVzCPAUMA84DPgBcPIB11wKfKE6vhC4udd5j2Iui/Z/XwB/NJ7nUl13NPA9YDUw0Ou8R/F1mQ88BBxXnb+mVdwiOn8a20ssq46XAe898IKIOBk4NDNXAGTmi5m5a+xSHJGW8wGIiN8EpgN3jVFenWg5l8x8IjOfrI5/AmwFpo1Zhq/s59uZZOZLwP7tTJo1z/FW4MyI6MctyVrOJTNXNX1frKbxezz9qJ2vC8DfAFcBu8cyuRFqZy5/CHw+M7cDZObWVkFLKf7TM3NTdbyZRkE80EnA8xHx9Yh4KCL+odqkrh+1nE9ETAL+EfjzsUysA+18bX4uIhbS6H6eqjuxNg23ncnMg12TmUPADuCEMcluZNqZS7NLgG/XmlHnWs4lIk4FZmfmt8YysQ6083U5CTgpIv4zIlZXOyu/onG/n/9+EfFd4LXDPPTJ5pPMzIgY7hanQ2ls53MK8L/AzTQ2w7yhu5m2pwvzuRS4MzM39LrJ7MJc9seZAXwZWJyZ+7qbpUYiIj4IDABv73Uunaiao88ycTa8PZTG0s87aPw09r2IeFNmPv9KT5gQMvOdB3ssIrZExIzM3FQVkOF+JNoAPJy/2I30G8Bp9Kj4d2E+bwHeFhGX0nj/4rCIeDEzD/rGV126MBci4hjgW8AnM3N1Tal2op3tTPZfsyEiDgWOBZ4bm/RGpK2tWSLinTT+4357Zu4Zo9xGqtVcjgZ+Dbinao5eCyyPiPdkZsdbJtSkna/LBuD+zNwL/E9EPEHjP4M1BwtayrLPcmBxdbwYuH2Ya9YAUyJi/1ryGfTvNtQt55OZF2XmnMycS2Pp58ZeFP42tJxLtU3IbTTmcOsY5taOdrYzaZ7jecDdWb0r12daziUiTgH+BXhPO+vKPfSKc8nMHZk5NTPnVt8jq2nMqd8KP7T3b+wbNLp+ImIqjWWg9a8YtdfvZI/FB4311ZXAk8B3geOr8QEaf2ls/3VnAeuAR4AvAYf1OvfRzKfp+g/Tv3f7tJwL8EFgL/Bw08eCXufeNIdzgCdovA/xyWrsr2kUE4AjgK8Bg8ADwLxe5zyKuXwX2NL0dVje65w7ncsB195Dn97t0+bXJWgsYz1W1a8LW8X0N3wlqUClLPtIkppY/CWpQBZ/SSqQxV+SCmTxl6QCWfwlqUAWf0kqkMVfkgr0f9KPeckwNEN8AAAAAElFTkSuQmCC\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "for col in scores_df:\n", - " plt.hist(scores_df[col], bins, rwidth=0.9, alpha=0.5, label=col)\n", - "#plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Arithmancy 0.082528\n", - "Astronomy 0.512377\n", - "Herbology 0.747755\n", - "Defense Against the Dark Arts 0.591140\n", - "Divination 0.549033\n", - "Muggle Studies 0.853373\n", - "Ancient Runes 0.566074\n", - "History of Magic 0.846511\n", - "Transfiguration 0.846511\n", - "Potions 0.598582\n", - "Care of Magical Creatures 0.063826\n", - "Charms 0.853373\n", - "Flying 0.566074\n", - "dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.corr()[df.corr() < 1.0].max()" + "df = pd.read_csv(\"./datasets/dataset_train.csv\")\n", + "df.drop(columns=[\"Index\"], inplace=True)\n", + "df.dropna(inplace=True)\n", + "df.columns = df.columns.str.lower()\n", + "df.columns = df.columns.str.replace(' ', '_')\n", + "df.rename(columns={'hogwarts_house': 'house'}, inplace=True)\n", + "df.head()" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "<matplotlib.collections.PathCollection at 0x7f9f1e8610f0>" + "<seaborn.axisgrid.PairGrid at 0x7f93b0d82b38>" ] }, - "execution_count": 23, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD8CAYAAACCRVh7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJztvXt8XNV96PtdljWgB7H1QvFDtixZ2JHzMQYEuMbGMTI0bn1M2huTpj0HH05bw21DfRJOb+OW05SEftKeHEKdnN4Gp7fUnJ487Jyb4kuhBDsOWDg2yERQrGAkWTKyTMxoJDtIIxhLXvePPWtrzZ49oxm9Zkb6fT8ff0baz7X3WOu3fm+ltUYQBEEQ/JiT6QEIgiAI2YsICUEQBCEhIiQEQRCEhIiQEARBEBIiQkIQBEFIiAgJQRAEISEiJARBEISEiJAQBEEQEiJCQhAEQUjI3EwPYKKUl5fr6urqTA9DEAQhZygvL+f5559/Xmv9ybGOzXkhUV1dTXNzc6aHIQiCkFMopcpTOU7MTYIgCEJCREgIgiAICREhIQiCICREhIQgCIKQEBESgiAIQkJESAiCIAgJESEhCIIgJESEhCAIAtA3GOGJFzvoG4xkeihZhQgJQRAE4EBzN1997i0ONHdneihZRc5nXAuCIEwG2xuqYj4FBxESgiAIQGlRgPs31mZ6GFmHmJsEQRCEhIiQEARBEBIyKUJCKfUPSqn3lFJvWttKlVIvKKXaop8l0e1KKfUNpVS7UuoNpdSN1jk7ose3KaV2TMbYBEEQhPEzWZrEPwLeuuRfBA5rreuAw9HfAbYAddF/O4G/A0eoAF8CbgVuAb5kBIsgCIKQGSZFSGitXwL6PJvvBvZFf94HfMra/pR2OA7MV0otAH4VeEFr3ae17gdeIF7wCIIgpIXkP0yMqfRJVGqt343+/AugMvrzIsAORD4X3ZZouyAIwriR/IeJMS0hsFprrZTSk3U9pdROHFMVS5YsmazLCoIwA5H8h4kxlZrEhagZiejne9HtPYD9bS2Obku0PQ6t9V6tdYPWuqGiomLSBy4IwszB5D+UFgUyPZScZCqFxEHARCjtAJ62tt8bjXJaC1yKmqWeB+5SSpVEHdZ3RbcJgiAIGWKyQmC/C/wUWKGUOqeU+l3gr4A7lVJtwObo7wDPAmeAduDbwB8AaK37gK8Ar0b/fTm6TRAEYVoQJ3c8k+KT0Fp/NsGuRp9jNfCHCa7zD8A/TMaYBEEQ0sU4uQEp0RFFMq4FQZi1eDWH7Q1V7N6yMsbJbY7pCA7MSi1DCvwJgjBr8WoOfkX+zDHHz4Q4cjroHjtbECEhCMKsJZXwWLNvc30la2suTCiUtm8wwoHmbrY3VOVMtJUICUEQZi2plAe3j6ndWDyh++Wiz0OEhCAIwhhMlgaQi4l94rgWBEEYg8kq7ZGLiX0iJARBmDUky4NIts8v6mm2IEJCEIRZQzKNINm+XNQAJgvxSQiCMGtI5hPIRX/BdKCcBOjcpaGhQTc3N2d6GIIgCDmFUuqk1rphrOPE3CQIwowmnXpMUrspHhESgiDMaNKJTBrr2LGEyEwUMuKTEARhRpOOr8HOrn7ixY64vAi7RMdj96xxt5njvPtngqNbNAlBEGY06UQmmWMPtV7w1Si2N1SxaUUFR04H2Xesk4f2t8QcZ++fKe1SRUgIgjDr6QgOcN+Tr9ARHAAcTWLTigo211fGHFdaFODhrfVsWlHBUOQKR04H2bSiwtVASosCPHbPGnY1LiccGXHNTrlshhIhIQjCjCaVCfrRZ1o5cjrIo8+0AnCo9QJHTgc51Hoh7lizryCQx+4tK+PMSqVFAQoDc9lzuM3VJsabsZ0NwkV8EoIgzGj8iup5azE9vLUeaI1+OprE8TOhOE0CYn0cpUUBdyK3/RdeP8j2hirCkWFXu0jVV5ENBQFFSAiCMKPxc1x7HcwlhQHW1pRRUuhM3kZbMILjUOsFVwh4K8eaa+1v7mbvvQ3UVhTHHWO0i68+9xaFgbyUJ/xsSPATISEIwozGrxz49oYqt4mQMQHZK3Z7P7QmbTa0vaGK/c3ddAQHefSZVp687xbfcYxnwk+llPlUI0JCEIRZh3EwG5NTfzjC0bZeevrDPP7C2+xYV+3u31xfyerF5wlHhhOaiho/VslHP3LJNVcluqc94XcEB3j0GUdTqa2YWJ+KqUSEhCAIsxJ70j7Q3E1Tey9N7c4+YxIy+wsDeVFT0Vzf9qZ7XzrD7i0r05rsjbMcEmsf2YAICUEQZjxjNQ1yHMsjDEWGKQjMdU1C5jzjwJ7MwoBeZ3m2IkJCEIQZQTJB4I0S8h5bWhTg83deF3OtJ17sIBwZYc/hNvc8P8brN6itKM5qDcIgeRKCIMwIkuUibK6v5LbaMnr6h3j8hdPsO9aZNG/B7B+KDMc0GzLCoyM4kPH8helCNAlBEGYEycw+h1ov8HJHiJc7QgDsaqyLyYq2NY++wQgnz14EoMDjg7BDZ/0iniarF3Y2IUJCEIScxp6YE5l9TDLb0OUrFOTPYce66hgTVGEgzxUuD+1voam9l00rKtixrjruOuBoJmtrLsQJpFST33JJmIiQEAQhp0llYnZ8DivcyRlGJ/xwZNg9H3DrMT28tZ4Dzd3cXF3KN3/c5oaqbm+oSjjBp+rEzoZM6lQRISEIQk6TTnSRmZzDkZEY7aEwMJfN9ZUcbOlh5+01FOTP4WBLD3sOt1NdVkhXKMzlkVP80+/dOqYTPJVJ32/M2apdiJAQBCGnSWVi9oay2tqDCX/9839+k5c7Qm6p712NdezespKe/jBdoXeoX3ANfYMRwpERdjUudyf48ZqY/PItslG7ECEhCMKMZ1SDGAYUQ5ERdjXWuaYjE+ZqzExray6wub6SQ60X2HHbMhaVFLK5vpKH9rdw5HSQ3VtWAvDEix0xORTphOF6yYY6TX6IkBAEIefwTsa2pmAX4zOM+h9G8x52NS6ntCjA5vpKjrb1UlNeRElRPiWFAbY3VPFH3/0ZTe29hCMjfP7O63jixQ6OnA5SW1HEzdWlrsA42hbkpqUlQGJB4Gggw65g8iMb6jT5IUJCEISsIB2bvHcy9oamhiPDFEYzp+1rbVuzkJNn+2lq72UocsVNmGtq7yU/T/HU8SAnz15Ea+2Gy4IGYosCfvPHbRw5HWRpaSFN7SGa2p1jhyJXWL+8LK7EuKOttLN7y8qs8jekgggJQRCygnRs8rZppm8wQmggwq3LSqgqKWRXYx2ArxAB+MZnb+BAczfhyAhffe4t7v2VpdRWFLHjV6oB3PyHksJ8/t3qBexYt8y97+rF81m9eB7b1ixi9eLzHD8T4mxfmFuXlXDy7EWa2nsBJy+jduNoHadsNSWlgggJQRCygnQm0tKigOtPCEeG2Xv0DAAnOvtdv0JhII/N9ZVxfgNj1ukbjFAYyOOlt4N0BAf59tEzNFSXUFVayMGW8/SHL9PdP+Su/I3vwsmfWEZhII8TnX1sWlHB6sXz2HO4nfXLy7hpaUncM2SrKSkVREgIgpAVpDuRGu3AyZ6uYygyTOu773PkdJC1NRe4f2MtT7zYEdNcyNtm9P6 |
