diff --git a/compare_datasets_10k.png b/compare_datasets_10k.png
new file mode 100644
index 0000000..4b887e6
Binary files /dev/null and b/compare_datasets_10k.png differ
diff --git a/compare_datasets_2k.png b/compare_datasets_2k.png
new file mode 100644
index 0000000..a9b54eb
Binary files /dev/null and b/compare_datasets_2k.png differ
diff --git a/compare_models_10k.png b/compare_models_10k.png
new file mode 100644
index 0000000..40fa4ef
Binary files /dev/null and b/compare_models_10k.png differ
diff --git a/compare_models_10k.py b/compare_models_10k.py
index 1f28b02..d1bb368 100644
--- a/compare_models_10k.py
+++ b/compare_models_10k.py
@@ -120,13 +120,14 @@ datasets = [
 ]
 estimators = {
     #"RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
-    #"PassiveAggressiveClassifier": PassiveAggressiveClassifier(random_state=0, max_iter=max_iter),
+    "PassiveAggressiveClassifier": PassiveAggressiveClassifier(random_state=0, max_iter=max_iter),
     #"Perceptron": Perceptron(random_state=0, max_iter=max_iter),
     #"SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
     #"NearestCentroid": NearestCentroid(),
-    #"LinearSVC": LinearSVC(random_state=0, max_iter=max_iter),
+    "LinearSVC": LinearSVC(random_state=0, max_iter=max_iter),
+    #"AdaBoost": AdaBoostClassifier(),
     #"GradientBoostingClassifier": GradientBoostingClassifier(random_state=0),
-    "HistGradientBoostingClassifier": HistGradientBoostingClassifier(random_state=0, max_iter=max_iter),
+    #"HistGradientBoostingClassifier": HistGradientBoostingClassifier(random_state=0, max_iter=max_iter),
     #"LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
     #"MLPClassifier": MLPClassifier(random_state=0, max_iter=int(max_iter/20), early_stopping=True),
 }
diff --git a/compare_models_10k_3.png b/compare_models_10k_3.png
new file mode 100644
index 0000000..37470e0
Binary files /dev/null and b/compare_models_10k_3.png differ
diff --git a/compare_models_2k.png b/compare_models_2k.png
index 5e46552..15b1fe8 100644
Binary files a/compare_models_2k.png and b/compare_models_2k.png differ
diff --git a/notebook.ipynb b/notebook.ipynb
index aede9bb..8d439c5 100644
--- a/notebook.ipynb
+++ b/notebook.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "id": "3116b75f",
    "metadata": {
     "jupyter": {
@@ -90,7 +90,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "id": "d159117377f3633c",
    "metadata": {},
    "outputs": [],
@@ -113,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "986fbb31a7ae0d8b",
    "metadata": {
     "jupyter": {
@@ -170,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "id": "44239f6b7fd23cde",
    "metadata": {},
    "outputs": [],
@@ -197,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "id": "ebc5a24e9bc87fdd",
    "metadata": {},
    "outputs": [
@@ -233,7 +233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "id": "d2c3527a5fc876bf",
    "metadata": {},
    "outputs": [
@@ -285,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "id": "4e8b407c",
    "metadata": {},
    "outputs": [
@@ -330,7 +330,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "id": "86d9da42f4df8e49",
    "metadata": {},
    "outputs": [],
@@ -348,12 +348,12 @@
     "\n",
     "####  Removing unpredicatble Datapoints\n",
     "\n",
-    "Some genres have too little datapoints to be predictable. The 10k Dataset has 12 Classes that have less than 5 Datapoints, usually only 1 oder 2. These have too big of a probability that they will fall into only the train or test data and therefore will be removed.  "
+    "Some genres have too little datapoints to be predictable. The 10k Dataset has 14 Classes that have less than 10 Datapoints, usually only 1 to 4. These have too big of a probability that they will fall into only the train or test data and therefore will be removed."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "id": "e1bc73d4",
    "metadata": {},
    "outputs": [
@@ -368,7 +368,7 @@
    ],
    "source": [
     "# remove genres that have less than min_entries entries -> probability of broken split to big\n",
-    "mask = (y == 1).sum() >= 5\n",
+    "mask = (y == 1).sum() >= 10\n",
     "print(\"Before\" + str(y.shape))\n",
     "y_prep = y.loc[:, mask]\n",
     "print(\"After\" + str(y_prep.shape))"
@@ -385,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "id": "4919bf1b37d171a7",
    "metadata": {},
    "outputs": [
@@ -417,7 +417,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "cfbf3787",
    "metadata": {
     "jupyter": {
@@ -441,17 +441,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "id": "0b0a46a4",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "1905"
+       "99"
       ]
      },
-     "execution_count": 30,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -502,49 +502,47 @@
    "source": [
     "# Excursion: Choosing a classification Model\n",
     "``sklearn`` has many different classification Models to choose from, but we only have limited time and computing power.\n",
-    "As such, we tested many different models on the 2k Dataset and chose the 5 best performing ones for the big dataset.\n",
+    "As such, we tested many different models on the small dataset and chose the best performing ones for the big dataset.\n",
     "\n",
     "### Initial Comparison\n",
     "We won't put the comparison script in this notebook, but you can find it in the ``compare_models_2k.py`` file and try it out yourself.\n",
     "There were some rules as a baseline for comparison:\n",
     "- All Hyperparameters are set to default\n",
-    "- All iteration limits are set to 3000 (exception: MLPClassifier with 300, where i-limit are epochs instead of iterations )\n",
+    "- All iteration limits are set to 3000 (exception: MLPClassifier with 300, where i-limit are epochs instead of iterations)\n",
     "- All ``random_state``s are set to 0\n",
     "\n",
     "Running all models with that configuration yields the following weighted F1-Scores (results as seen in the ``games_march2025_cleaned_2k_i3k`` folder): \n",
     "\n",
     "![Comparison Image 2k](./compare_models_2k.png)\n",
     "\n",
-    "If we also compare Micro/Macro values, we see that all models have a much lower Macro-F1 than Micro/Weighted-F1. That is because the 2k Dataset does not contain enough datapoints for every class (test data for 2 classes is 0), so we should proceed to the 10k Dataset before making major choices.\n",
+    "If we also compare Micro/Macro values, we see that all models have a much lower Macro-F1 than Micro/Weighted-F1. That is because the Dataset does not contain enough datapoints for every class (test data for 2 classes is 0 in the 2k dataset), so we should proceed to the 10k Dataset.\n",
     "\n",
     "![Comparison Image 2k Micro/Macro/Weighted](./compare_models_2k_3.png)\n",
     "\n",
     "The 10 best performing models which will run on the 10k Dataset with the same rules as before:\n",
-    "1. NearestCentroid\n",
+    "1. PassiveAggressiveClassifier \n",
     "2. Perceptron\n",
-    "3. PassiveAggressiveClassifier\n",
-    "4. LinearSVC\n",
-    "5. SDGClassifer\n",
-    "6. HistGradientBoostingClassifier\n",
+    "3. LinearSVC\n",
+    "4. SDGClassifer\n",
+    "5. HistGradientBoostingClassifier\n",
+    "6. NearestCentroid\n",
     "7. MLPClassifier\n",
-    "8. RidgeClassifier\n",
-    "9. GradientBoostingClassifier\n",
-    "10. LinearDiscriminationAnalysis\n",
+    "8. GradientBoostingClassifier \n",
+    "9. RidgeClassifier\n",
+    "10. AdaBoostClassifier (because of an evaluation mistake, we used LinearDiscriminantAnalysis instead)\n",
+    "\n",
+    "That gave us the following results:\n",
     "\n",
     "![Comparison Image 10k](./compare_models_10k.png)\n",
+    "![Comparison Image 10k](./compare_models_10k_3.png)\n",
     "\n",
-    "We can also compare these models between datasets, to see if a bigger dataset always improves the performance.\n",
+    "The top 5 are the same, with the only exception of Perceptron falling behind against the RidgeClassifier.\n",
+    "When comparing these models between datasets, it is evident that a bigger dataset yields better performance (for exponentially higher compute and time cost). Only NearestCentroid lost performance when comparing the Datasets.\n",
     "\n",
-    "![Comparison Image between 2k and 10k](./compare_models_2k_10k.png)\n",
+    "![Comparison Image between 2k and 10k](./compare_datasets_2k.png)\n",
+    "![Comparison Image between 2k and 10k, only 10k Models](./compare_datasets_10k.png)\n",
     "\n",
-    "The final contenders are:\n",
-    "1.\n",
-    "2.\n",
-    "3.\n",
-    "4.\n",
-    "5.\n",
-    "\n",
-    "..."
+    "The final contenders are LinearSVC and PassiveAggressiveClassifier, which we would compare against each other using k-fold cross validation with different hyperparameters, but since training the model on the dataset takes a lot of time and a big strain on our computers, we will stop here and use the LinearSVC Classifier."
    ]
   },
   {
@@ -553,23 +551,27 @@
    "metadata": {},
    "source": [
     "## Model Selection\n",
-    "**TODO Deciding which model to use for this task**\n",
     "\n",
-    "As a game can have multiple genres, our Model(s) has to be capable of multi-label-classification. sklearn's ``MultiOutputClassifier`` can do this. As a backend for ``MultiOutputClassifier`` we use ``LogisticRegression``"
+    "As a game can have multiple genres, our Model(s) has to be capable of multi-label-classification. sklearn's ``MultiOutputClassifier`` can do this. As a backend for ``MultiOutputClassifier`` we use ``LinearSVC``"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "8c1d72c4532bd509",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": []
+    }
+   ],
    "source": [
-    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.svm import LinearSVC\n",
     "from sklearn.multioutput import MultiOutputClassifier\n",
     "\n",
-    "# n_jobs=1 since there seems to be some multithreading join issue in sklearn (or my pc is to bad)\n",
-    "multi_target_clf = MultiOutputClassifier(LogisticRegression(max_iter=1337, random_state=0), n_jobs=1)\n",
+    "multi_target_clf = MultiOutputClassifier(LinearSVC(max_iter=1337, random_state=0), n_jobs=1)\n",
     "\n",
     "multi_target_clf.fit(X_train, y_train)\n",
     "\n",
@@ -587,7 +589,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "id": "e2ebea6945193e07",
    "metadata": {},
    "outputs": [
@@ -597,23 +599,23 @@
      "text": [
       "              precision    recall  f1-score   support\n",
       "\n",
-      "           0       0.78      0.91      0.84       300\n",
-      "           1       0.78      0.62      0.69       216\n",
-      "           2       1.00      0.03      0.07        86\n",
-      "           3       0.00      0.00      0.00        46\n",
-      "           4       1.00      0.04      0.07        83\n",
-      "           5       0.79      0.81      0.80       245\n",
-      "           6       0.00      0.00      0.00        42\n",
-      "           7       0.90      0.34      0.49       127\n",
-      "           8       0.00      0.00      0.00        12\n",
-      "           9       0.89      0.25      0.39       127\n",
-      "          10       0.00      0.00      0.00        14\n",
-      "          11       0.88      0.14      0.24       106\n",
+      "           0       0.84      0.86      0.85       300\n",
+      "           1       0.74      0.63      0.68       216\n",
+      "           2       0.77      0.31      0.45        86\n",
+      "           3       0.50      0.04      0.08        46\n",
+      "           4       0.69      0.33      0.44        83\n",
+      "           5       0.79      0.80      0.79       245\n",
+      "           6       0.69      0.26      0.38        42\n",
+      "           7       0.74      0.62      0.68       127\n",
+      "           8       1.00      0.67      0.80        12\n",
+      "           9       0.80      0.57      0.67       127\n",
+      "          10       1.00      0.50      0.67        14\n",
+      "          11       0.79      0.46      0.58       106\n",
       "\n",
-      "   micro avg       0.79      0.50      0.61      1404\n",
-      "   macro avg       0.58      0.26      0.30      1404\n",
-      "weighted avg       0.77      0.50      0.53      1404\n",
-      " samples avg       0.77      0.56      0.60      1404\n",
+      "   micro avg       0.79      0.62      0.69      1404\n",
+      "   macro avg       0.78      0.51      0.59      1404\n",
+      "weighted avg       0.77      0.62      0.67      1404\n",
+      " samples avg       0.80      0.68      0.70      1404\n",
       "\n"
      ]
     }
@@ -633,15 +635,6 @@
     "**TODO optimize the model based on the test results**"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "79b20645",
-   "metadata": {},
-   "source": [
-    "# Validation\n",
-    "**TODO Predict actual values**"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "3b709fb7",