mismades were taken

2025-08-18 14:14:34 +02:00
parent 3975cdf7e8
commit 28df88c0bf
38 changed files with 70 additions and 878 deletions
--- a/compare_dataset_sizes.png
+++ b/compare_dataset_sizes.png
--- a/compare_dataset_sizes.py
+++ b/compare_dataset_sizes.py
@@ -31,7 +31,7 @@ plt.bar(x, [results["cleaned_2k"][m] for m in models], width=0.25, label="cleane
 plt.bar([i + 0.25 for i in x], [results["cleaned_10k"][m] for m in models], width=0.25, label="cleaned_10k")

 plt.xticks(x, models, rotation=45)
-plt.ylabel("Weighted F1-Score")
+plt.ylabel("F1-Score")
 plt.title("Model Performance across Datasets")
 plt.legend()
 plt.tight_layout()
--- a/compare_models_2k.png
+++ b/compare_models_2k.png
--- a/games_march2025_cleaned/BernoulliNB.txt
+++ b/games_march2025_cleaned/BernoulliNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.75      0.90      0.82       300
-           1       0.72      0.68      0.70       216
-           2       0.50      0.08      0.14        86
-           3       0.27      0.07      0.11        46
-           4       0.40      0.07      0.12        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.82      0.79       245
-           7       0.33      0.10      0.15        42
-           8       0.67      0.40      0.50       127
-           9       0.00      0.00      0.00        12
-          10       0.71      0.37      0.49       127
-          11       0.00      0.00      0.00        14
-          12       0.49      0.31      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.70      0.55      0.62      1404
-   macro avg       0.40      0.27      0.30      1404
-weighted avg       0.64      0.55      0.56      1404
- samples avg       0.73      0.59      0.61      1404
--- a/games_march2025_cleaned/DecisionTreeClassifier.txt
+++ b/games_march2025_cleaned/DecisionTreeClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.73      0.75       300
-           1       0.56      0.53      0.54       216
-           2       0.36      0.33      0.34        86
-           3       0.33      0.26      0.29        46
-           4       0.40      0.46      0.43        83
-           5       0.00      0.00      0.00         0
-           6       0.65      0.61      0.63       245
-           7       0.39      0.40      0.40        42
-           8       0.59      0.57      0.58       127
-           9       0.60      0.25      0.35        12
-          10       0.56      0.51      0.53       127
-          11       0.39      0.50      0.44        14
-          12       0.52      0.49      0.50       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.58      0.55      0.57      1404
-   macro avg       0.44      0.40      0.41      1404
-weighted avg       0.58      0.55      0.57      1404
- samples avg       0.59      0.59      0.55      1404
--- a/games_march2025_cleaned/GaussianNB.txt
+++ b/games_march2025_cleaned/GaussianNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.80      0.78       300
-           1       0.62      0.51      0.56       216
-           2       0.63      0.14      0.23        86
-           3       0.17      0.02      0.04        46
-           4       0.42      0.10      0.16        83
-           5       0.00      0.00      0.00         0
-           6       0.68      0.66      0.67       245
-           7       0.56      0.12      0.20        42
-           8       0.55      0.33      0.41       127
-           9       0.67      0.17      0.27        12
-          10       0.65      0.31      0.42       127
-          11       1.00      0.14      0.25        14
-          12       0.53      0.29      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.66      0.47      0.55      1404
-   macro avg       0.52      0.26      0.31      1404
-weighted avg       0.62      0.47      0.51      1404
- samples avg       0.67      0.53      0.55      1404
--- a/games_march2025_cleaned/GradientBoostingClassifier.txt
+++ b/games_march2025_cleaned/GradientBoostingClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.80      0.83       300
-           1       0.77      0.61      0.68       216
-           2       0.55      0.13      0.21        86
-           3       0.42      0.11      0.17        46
-           4       0.68      0.33      0.44        83
-           5       0.00      0.00      0.00         0
-           6       0.71      0.76      0.74       245
-           7       0.61      0.26      0.37        42
-           8       0.81      0.50      0.61       127
-           9       0.75      0.25      0.38        12
-          10       0.81      0.54      0.65       127
-          11       0.40      0.43      0.41        14
-          12       0.69      0.42      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.76      0.57      0.65      1404
-   macro avg       0.57      0.37      0.43      1404
-weighted avg       0.74      0.57      0.63      1404
- samples avg       0.76      0.63      0.65      1404
--- a/games_march2025_cleaned/LinearSVC-i5000.txt
+++ b/games_march2025_cleaned/LinearSVC-i5000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.87      0.86       300
-           1       0.76      0.66      0.70       216
-           2       0.77      0.20      0.31        86
-           3       0.00      0.00      0.00        46
-           4       0.76      0.27      0.39        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.81      0.79       245
-           7       0.89      0.19      0.31        42
-           8       0.77      0.60      0.67       127
-           9       1.00      0.58      0.74        12
-          10       0.85      0.54      0.66       127
-          11       1.00      0.29      0.44        14
-          12       0.82      0.42      0.56       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.61      0.69      1404
-   macro avg       0.66      0.39      0.46      1404
-weighted avg       0.78      0.61      0.66      1404
- samples avg       0.81      0.67      0.69      1404
--- a/games_march2025_cleaned/LogisticRegression-i1000.txt
+++ b/games_march2025_cleaned/LogisticRegression-i1000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
--- a/games_march2025_cleaned/LogisticRegression-i10000.txt
+++ b/games_march2025_cleaned/LogisticRegression-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
--- a/games_march2025_cleaned/MLPClassifier-i10000.txt
+++ b/games_march2025_cleaned/MLPClassifier-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.84      0.85      0.84       300
-           1       0.73      0.67      0.70       216
-           2       0.74      0.30      0.43        86
-           3       0.50      0.02      0.04        46
-           4       0.69      0.24      0.36        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.79      0.79       245
-           7       0.86      0.14      0.24        42
-           8       0.76      0.63      0.69       127
-           9       1.00      0.33      0.50        12
-          10       0.81      0.52      0.63       127
-          11       1.00      0.14      0.25        14
-          12       0.75      0.41      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.60      0.68      1404
-   macro avg       0.68      0.36      0.43      1404
-weighted avg       0.78      0.60      0.65      1404
- samples avg       0.80      0.66      0.68      1404
--- a/games_march2025_cleaned/MultinomialNB.txt
+++ b/games_march2025_cleaned/MultinomialNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.64      0.99      0.78       300
-           1       0.85      0.24      0.37       216
-           2       0.60      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       0.80      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.80      0.79       245
-           7       0.40      0.05      0.09        42
-           8       1.00      0.04      0.08       127
-           9       0.00      0.00      0.00        12
-          10       0.20      0.01      0.02       127
-          11       0.00      0.00      0.00        14
-          12       1.00      0.05      0.09       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.69      0.40      0.51      1404
-   macro avg       0.45      0.16      0.17      1404
-weighted avg       0.68      0.40      0.39      1404
- samples avg       0.70      0.44      0.50      1404
--- a/games_march2025_cleaned/RandomForestClassifier.txt
+++ b/games_march2025_cleaned/RandomForestClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.80      0.88      0.84       300
-           1       0.78      0.55      0.64       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.06      0.11        83
-           5       0.00      0.00      0.00         0
-           6       0.74      0.78      0.76       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.24      0.38       127
-           9       0.00      0.00      0.00        12
-          10       0.91      0.24      0.38       127
-          11       1.00      0.14      0.25        14
-          12       1.00      0.25      0.39       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.48      0.59      1404
-   macro avg       0.58      0.23      0.27      1404
-weighted avg       0.78      0.48      0.52      1404
- samples avg       0.77      0.54      0.60      1404
--- a/games_march2025_cleaned/SVC-RBF-i10000.txt
+++ b/games_march2025_cleaned/SVC-RBF-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.81      0.90      0.85       300
-           1       0.76      0.63      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.83      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.40      0.54       127
-           9       1.00      0.17      0.29        12
-          10       0.90      0.34      0.49       127
-          11       1.00      0.14      0.25        14
-          12       0.92      0.21      0.34       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.53      0.63      1404
-   macro avg       0.64      0.26      0.32      1404
-weighted avg       0.79      0.53      0.56      1404
- samples avg       0.79      0.59      0.63      1404
--- a/games_march2025_cleaned_10k/BernoulliNB.txt
+++ b/games_march2025_cleaned_10k/BernoulliNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.75      0.90      0.82       300
-           1       0.72      0.68      0.70       216
-           2       0.50      0.08      0.14        86
-           3       0.27      0.07      0.11        46
-           4       0.40      0.07      0.12        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.82      0.79       245
-           7       0.33      0.10      0.15        42
-           8       0.67      0.40      0.50       127
-           9       0.00      0.00      0.00        12
-          10       0.71      0.37      0.49       127
-          11       0.00      0.00      0.00        14
-          12       0.49      0.31      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.70      0.55      0.62      1404
-   macro avg       0.40      0.27      0.30      1404
-weighted avg       0.64      0.55      0.56      1404
- samples avg       0.73      0.59      0.61      1404
--- a/games_march2025_cleaned_10k/DecisionTreeClassifier.txt
+++ b/games_march2025_cleaned_10k/DecisionTreeClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.73      0.75       300
-           1       0.56      0.53      0.54       216
-           2       0.36      0.33      0.34        86
-           3       0.33      0.26      0.29        46
-           4       0.40      0.46      0.43        83
-           5       0.00      0.00      0.00         0
-           6       0.65      0.61      0.63       245
-           7       0.39      0.40      0.40        42
-           8       0.59      0.57      0.58       127
-           9       0.60      0.25      0.35        12
-          10       0.56      0.51      0.53       127
-          11       0.39      0.50      0.44        14
-          12       0.52      0.49      0.50       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.58      0.55      0.57      1404
-   macro avg       0.44      0.40      0.41      1404
-weighted avg       0.58      0.55      0.57      1404
- samples avg       0.59      0.59      0.55      1404
--- a/games_march2025_cleaned_10k/GaussianNB.txt
+++ b/games_march2025_cleaned_10k/GaussianNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.80      0.78       300
-           1       0.62      0.51      0.56       216
-           2       0.63      0.14      0.23        86
-           3       0.17      0.02      0.04        46
-           4       0.42      0.10      0.16        83
-           5       0.00      0.00      0.00         0
-           6       0.68      0.66      0.67       245
-           7       0.56      0.12      0.20        42
-           8       0.55      0.33      0.41       127
-           9       0.67      0.17      0.27        12
-          10       0.65      0.31      0.42       127
-          11       1.00      0.14      0.25        14
-          12       0.53      0.29      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.66      0.47      0.55      1404
-   macro avg       0.52      0.26      0.31      1404
-weighted avg       0.62      0.47      0.51      1404
- samples avg       0.67      0.53      0.55      1404
--- a/games_march2025_cleaned_10k/GradientBoostingClassifier.txt
+++ b/games_march2025_cleaned_10k/GradientBoostingClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.80      0.83       300
-           1       0.77      0.61      0.68       216
-           2       0.55      0.13      0.21        86
-           3       0.42      0.11      0.17        46
-           4       0.68      0.33      0.44        83
-           5       0.00      0.00      0.00         0
-           6       0.71      0.76      0.74       245
-           7       0.61      0.26      0.37        42
-           8       0.81      0.50      0.61       127
-           9       0.75      0.25      0.38        12
-          10       0.81      0.54      0.65       127
-          11       0.40      0.43      0.41        14
-          12       0.69      0.42      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.76      0.57      0.65      1404
-   macro avg       0.57      0.37      0.43      1404
-weighted avg       0.74      0.57      0.63      1404
- samples avg       0.76      0.63      0.65      1404
--- a/games_march2025_cleaned_10k/LinearSVC-i5000.txt
+++ b/games_march2025_cleaned_10k/LinearSVC-i5000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.87      0.86       300
-           1       0.76      0.66      0.70       216
-           2       0.77      0.20      0.31        86
-           3       0.00      0.00      0.00        46
-           4       0.76      0.27      0.39        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.81      0.79       245
-           7       0.89      0.19      0.31        42
-           8       0.77      0.60      0.67       127
-           9       1.00      0.58      0.74        12
-          10       0.85      0.54      0.66       127
-          11       1.00      0.29      0.44        14
-          12       0.82      0.42      0.56       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.61      0.69      1404
-   macro avg       0.66      0.39      0.46      1404
-weighted avg       0.78      0.61      0.66      1404
- samples avg       0.81      0.67      0.69      1404
--- a/games_march2025_cleaned_10k/LogisticRegression-i1000.txt
+++ b/games_march2025_cleaned_10k/LogisticRegression-i1000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
--- a/games_march2025_cleaned_10k/LogisticRegression-i10000.txt
+++ b/games_march2025_cleaned_10k/LogisticRegression-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
--- a/games_march2025_cleaned_10k/MLPClassifier-i10000.txt
+++ b/games_march2025_cleaned_10k/MLPClassifier-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.84      0.85      0.84       300
-           1       0.73      0.67      0.70       216
-           2       0.74      0.30      0.43        86
-           3       0.50      0.02      0.04        46
-           4       0.69      0.24      0.36        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.79      0.79       245
-           7       0.86      0.14      0.24        42
-           8       0.76      0.63      0.69       127
-           9       1.00      0.33      0.50        12
-          10       0.81      0.52      0.63       127
-          11       1.00      0.14      0.25        14
-          12       0.75      0.41      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.60      0.68      1404
-   macro avg       0.68      0.36      0.43      1404
-weighted avg       0.78      0.60      0.65      1404
- samples avg       0.80      0.66      0.68      1404
--- a/games_march2025_cleaned_10k/MultinomialNB.txt
+++ b/games_march2025_cleaned_10k/MultinomialNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.64      0.99      0.78       300
-           1       0.85      0.24      0.37       216
-           2       0.60      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       0.80      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.80      0.79       245
-           7       0.40      0.05      0.09        42
-           8       1.00      0.04      0.08       127
-           9       0.00      0.00      0.00        12
-          10       0.20      0.01      0.02       127
-          11       0.00      0.00      0.00        14
-          12       1.00      0.05      0.09       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.69      0.40      0.51      1404
-   macro avg       0.45      0.16      0.17      1404
-weighted avg       0.68      0.40      0.39      1404
- samples avg       0.70      0.44      0.50      1404
--- a/games_march2025_cleaned_10k/RandomForestClassifier.txt
+++ b/games_march2025_cleaned_10k/RandomForestClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.80      0.88      0.84       300
-           1       0.78      0.55      0.64       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.06      0.11        83
-           5       0.00      0.00      0.00         0
-           6       0.74      0.78      0.76       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.24      0.38       127
-           9       0.00      0.00      0.00        12
-          10       0.91      0.24      0.38       127
-          11       1.00      0.14      0.25        14
-          12       1.00      0.25      0.39       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.48      0.59      1404
-   macro avg       0.58      0.23      0.27      1404
-weighted avg       0.78      0.48      0.52      1404
- samples avg       0.77      0.54      0.60      1404
--- a/games_march2025_cleaned_10k/SVC-RBF-i10000.txt
+++ b/games_march2025_cleaned_10k/SVC-RBF-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.81      0.90      0.85       300
-           1       0.76      0.63      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.83      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.40      0.54       127
-           9       1.00      0.17      0.29        12
-          10       0.90      0.34      0.49       127
-          11       1.00      0.14      0.25        14
-          12       0.92      0.21      0.34       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.53      0.63      1404
-   macro avg       0.64      0.26      0.32      1404
-weighted avg       0.79      0.53      0.56      1404
- samples avg       0.79      0.59      0.63      1404
--- a/games_march2025_cleaned_2k/BernoulliNB.txt
+++ b/games_march2025_cleaned_2k/BernoulliNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.75      0.90      0.82       300
-           1       0.72      0.68      0.70       216
-           2       0.50      0.08      0.14        86
-           3       0.27      0.07      0.11        46
-           4       0.40      0.07      0.12        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.82      0.79       245
-           7       0.33      0.10      0.15        42
-           8       0.67      0.40      0.50       127
-           9       0.00      0.00      0.00        12
-          10       0.71      0.37      0.49       127
-          11       0.00      0.00      0.00        14
-          12       0.49      0.31      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.70      0.55      0.62      1404
-   macro avg       0.40      0.27      0.30      1404
-weighted avg       0.64      0.55      0.56      1404
- samples avg       0.73      0.59      0.61      1404
--- a/games_march2025_cleaned_2k/DecisionTreeClassifier.txt
+++ b/games_march2025_cleaned_2k/DecisionTreeClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.73      0.75       300
-           1       0.56      0.53      0.54       216
-           2       0.36      0.33      0.34        86
-           3       0.33      0.26      0.29        46
-           4       0.40      0.46      0.43        83
-           5       0.00      0.00      0.00         0
-           6       0.65      0.61      0.63       245
-           7       0.39      0.40      0.40        42
-           8       0.59      0.57      0.58       127
-           9       0.60      0.25      0.35        12
-          10       0.56      0.51      0.53       127
-          11       0.39      0.50      0.44        14
-          12       0.52      0.49      0.50       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.58      0.55      0.57      1404
-   macro avg       0.44      0.40      0.41      1404
-weighted avg       0.58      0.55      0.57      1404
- samples avg       0.59      0.59      0.55      1404
--- a/games_march2025_cleaned_2k/GaussianNB.txt
+++ b/games_march2025_cleaned_2k/GaussianNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.80      0.78       300
-           1       0.62      0.51      0.56       216
-           2       0.63      0.14      0.23        86
-           3       0.17      0.02      0.04        46
-           4       0.42      0.10      0.16        83
-           5       0.00      0.00      0.00         0
-           6       0.68      0.66      0.67       245
-           7       0.56      0.12      0.20        42
-           8       0.55      0.33      0.41       127
-           9       0.67      0.17      0.27        12
-          10       0.65      0.31      0.42       127
-          11       1.00      0.14      0.25        14
-          12       0.53      0.29      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.66      0.47      0.55      1404
-   macro avg       0.52      0.26      0.31      1404
-weighted avg       0.62      0.47      0.51      1404
- samples avg       0.67      0.53      0.55      1404
--- a/games_march2025_cleaned_2k/GradientBoostingClassifier.txt
+++ b/games_march2025_cleaned_2k/GradientBoostingClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.80      0.83       300
-           1       0.77      0.61      0.68       216
-           2       0.55      0.13      0.21        86
-           3       0.42      0.11      0.17        46
-           4       0.68      0.33      0.44        83
-           5       0.00      0.00      0.00         0
-           6       0.71      0.76      0.74       245
-           7       0.61      0.26      0.37        42
-           8       0.81      0.50      0.61       127
-           9       0.75      0.25      0.38        12
-          10       0.81      0.54      0.65       127
-          11       0.40      0.43      0.41        14
-          12       0.69      0.42      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.76      0.57      0.65      1404
-   macro avg       0.57      0.37      0.43      1404
-weighted avg       0.74      0.57      0.63      1404
- samples avg       0.76      0.63      0.65      1404
--- a/games_march2025_cleaned_2k/LinearSVC-i5000.txt
+++ b/games_march2025_cleaned_2k/LinearSVC-i5000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.87      0.86       300
-           1       0.76      0.66      0.70       216
-           2       0.77      0.20      0.31        86
-           3       0.00      0.00      0.00        46
-           4       0.76      0.27      0.39        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.81      0.79       245
-           7       0.89      0.19      0.31        42
-           8       0.77      0.60      0.67       127
-           9       1.00      0.58      0.74        12
-          10       0.85      0.54      0.66       127
-          11       1.00      0.29      0.44        14
-          12       0.82      0.42      0.56       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.61      0.69      1404
-   macro avg       0.66      0.39      0.46      1404
-weighted avg       0.78      0.61      0.66      1404
- samples avg       0.81      0.67      0.69      1404
--- a/games_march2025_cleaned_2k/LogisticRegression-i1000.txt
+++ b/games_march2025_cleaned_2k/LogisticRegression-i1000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
--- a/games_march2025_cleaned_2k/LogisticRegression-i10000.txt
+++ b/games_march2025_cleaned_2k/LogisticRegression-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
--- a/games_march2025_cleaned_2k/MLPClassifier-i10000.txt
+++ b/games_march2025_cleaned_2k/MLPClassifier-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.84      0.85      0.84       300
-           1       0.73      0.67      0.70       216
-           2       0.74      0.30      0.43        86
-           3       0.50      0.02      0.04        46
-           4       0.69      0.24      0.36        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.79      0.79       245
-           7       0.86      0.14      0.24        42
-           8       0.76      0.63      0.69       127
-           9       1.00      0.33      0.50        12
-          10       0.81      0.52      0.63       127
-          11       1.00      0.14      0.25        14
-          12       0.75      0.41      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.60      0.68      1404
-   macro avg       0.68      0.36      0.43      1404
-weighted avg       0.78      0.60      0.65      1404
- samples avg       0.80      0.66      0.68      1404
--- a/games_march2025_cleaned_2k/MultinomialNB.txt
+++ b/games_march2025_cleaned_2k/MultinomialNB.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.64      0.99      0.78       300
-           1       0.85      0.24      0.37       216
-           2       0.60      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       0.80      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.80      0.79       245
-           7       0.40      0.05      0.09        42
-           8       1.00      0.04      0.08       127
-           9       0.00      0.00      0.00        12
-          10       0.20      0.01      0.02       127
-          11       0.00      0.00      0.00        14
-          12       1.00      0.05      0.09       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.69      0.40      0.51      1404
-   macro avg       0.45      0.16      0.17      1404
-weighted avg       0.68      0.40      0.39      1404
- samples avg       0.70      0.44      0.50      1404
--- a/games_march2025_cleaned_2k/RandomForestClassifier.txt
+++ b/games_march2025_cleaned_2k/RandomForestClassifier.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.80      0.88      0.84       300
-           1       0.78      0.55      0.64       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.06      0.11        83
-           5       0.00      0.00      0.00         0
-           6       0.74      0.78      0.76       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.24      0.38       127
-           9       0.00      0.00      0.00        12
-          10       0.91      0.24      0.38       127
-          11       1.00      0.14      0.25        14
-          12       1.00      0.25      0.39       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.48      0.59      1404
-   macro avg       0.58      0.23      0.27      1404
-weighted avg       0.78      0.48      0.52      1404
- samples avg       0.77      0.54      0.60      1404
--- a/games_march2025_cleaned_2k/SVC-RBF-i10000.txt
+++ b/games_march2025_cleaned_2k/SVC-RBF-i10000.txt
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.81      0.90      0.85       300
-           1       0.76      0.63      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.83      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.40      0.54       127
-           9       1.00      0.17      0.29        12
-          10       0.90      0.34      0.49       127
-          11       1.00      0.14      0.25        14
-          12       0.92      0.21      0.34       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.53      0.63      1404
-   macro avg       0.64      0.26      0.32      1404
-weighted avg       0.79      0.53      0.56      1404
- samples avg       0.79      0.59      0.63      1404
--- a/generate_compare_dataset.py
+++ b/generate_compare_dataset.py
@@ -4,12 +4,8 @@ import pandas as pd
 from sklearn import set_config

 from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import FunctionTransformer
-
-from sklearn.preprocessing import MultiLabelBinarizer
+from sklearn.preprocessing import FunctionTransformer, MultiLabelBinarizer
 import ast
-
-
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.linear_model import LogisticRegression
 from sklearn.multioutput import MultiOutputClassifier
@@ -20,15 +16,19 @@ from sklearn.metrics import accuracy_score, classification_report
 from sklearn.svm import SVC, LinearSVC
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
-from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
-from sklearn.neighbors import KNeighborsClassifier
+from sklearn.linear_model import LogisticRegression, RidgeClassifier, PassiveAggressiveClassifier, Perceptron, SGDClassifier
+from sklearn.neighbors import KNeighborsClassifier, NearestCentroid, RadiusNeighborsClassifier
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, VotingClassifier, StackingClassifier
+from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, ComplementNB
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
+from sklearn.dummy import DummyClassifier
 from sklearn.neural_network import MLPClassifier

-
 set_config(transform_output="pandas") # dataframe supremacy
-
 def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
-    dataset = pd.read_csv("./games_march2025_cleaned_2k.csv",sep=",")
+    dataset = pd.read_csv(dataset,sep=",")
    # desc, genres, tags
    column_transformer = ColumnTransformer([
            # merge all descriptions
@@ -39,9 +39,6 @@ def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
        verbose_feature_names_out=False
    )
    dataset = column_transformer.fit_transform(dataset)
-
-
-
    #### SET MISSING VALUES
    print("SETMISS")
    # Setting missing numeric values to the mean
@@ -50,36 +47,26 @@ def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
    dataset.fillna('', inplace=True)
    # Setting missing values in other columns to NaN
    dataset.dropna(inplace=True)
-
    ##### STRUCTURIZE GENRES to onehot
    #serialize array
    dataset['genres'] = dataset['genres'].map(lambda s: ast.literal_eval(s)) 
    #print(dataset['genres']) # in py but not yet onehotenc
-
    # MultiLabelBinarizer does onehotenc for arrays
    mlb_genres = MultiLabelBinarizer()
    genres_encoded = mlb_genres.fit_transform(dataset.pop('genres'))
    #genres_count = len(mlb_genres.classes_) # for multi-label classifiction later
-
    genres_df = pd.DataFrame(genres_encoded, columns=mlb_genres.classes_)
    #print(genres_df)
    #dataset = pd.concat([dataset, genres_df], axis=1)
    #print(dataset)
-
-
    #### convert text to bag of words
-
    ## Count vs Tfidf vectorizer
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(dataset['desc']) # matrix
    tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
    #print(tfidf_df)
-
-
    ##### MODEL
    print("MODEL")
-
-
    X = tfidf_df
    y = genres_df
    # cleanup datapoints that dont have a target value (all target columns are 0)
@@ -87,50 +74,63 @@ def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
    #print((mask == False).sum()) #31 cases with all target columns 0
    X_clean = X[mask]
    y_clean = y[mask]
-
    # Split dataset
    return train_test_split(X_clean, y_clean, random_state=0)
-
 def comparison(X_train, X_test, y_train, y_test, estimator, jobs: int = 1): #returns class_report
    multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
-
    # model training
    multi_target_clf.fit(X_train, y_train)
-
    # predict against test data
    y_pred = multi_target_clf.predict(X_test)
    return classification_report(y_test, y_pred, zero_division=0.0)
-
 datasets = [
    'games_march2025_cleaned_2k.csv',
-    'games_march2025_cleaned_10k.csv',
-    'games_march2025_cleaned.csv'
+    #'games_march2025_cleaned_10k.csv',
+    #'games_march2025_cleaned.csv'
 ]

+max_iter = 3000  # <-- set your desired value here
+
 estimators = {
-    "LogisticRegression-i1000": LogisticRegression(max_iter=1000, random_state=0),
-    "LogisticRegression-i10000": LogisticRegression(max_iter=10000, random_state=0),
-    "LinearSVC-i5000": LinearSVC(max_iter=5000),
-    "SVC-RBF-i10000": SVC(kernel="rbf", max_iter=10000),
+    "LogisticRegression": LogisticRegression(random_state=0, max_iter=max_iter),
+    "RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
+    "PassiveAggressiveClassifier": PassiveAggressiveClassifier(random_state=0, max_iter=max_iter),
+    "Perceptron": Perceptron(random_state=0, max_iter=max_iter),
+    "SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
+    "KNeighborsClassifier": KNeighborsClassifier(),
+    "NearestCentroid": NearestCentroid(),
+    "RadiusNeighborsClassifier": RadiusNeighborsClassifier(),
+    "LinearSVC-i5000": LinearSVC(random_state=0, max_iter=max_iter),
+    "SVC": SVC(random_state=0, max_iter=max_iter),
    "DecisionTreeClassifier": DecisionTreeClassifier(random_state=0),
    "RandomForestClassifier": RandomForestClassifier(random_state=0),
+    "ExtraTreesClassifier": ExtraTreesClassifier(random_state=0),
+    "BaggingClassifier": BaggingClassifier(random_state=0),
+    "AdaBoostClassifier": AdaBoostClassifier(random_state=0),
    "GradientBoostingClassifier": GradientBoostingClassifier(random_state=0),
+    "HistGradientBoostingClassifier": HistGradientBoostingClassifier(random_state=0, max_iter=max_iter),
    "GaussianNB": GaussianNB(),
    "MultinomialNB": MultinomialNB(),
    "BernoulliNB": BernoulliNB(),
+    "ComplementNB": ComplementNB(),
+    "LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
+    "QuadraticDiscriminantAnalysis": QuadraticDiscriminantAnalysis(),
    "MLPClassifier-i10000": MLPClassifier(max_iter=10000, random_state=0),
+    "DummyClassifier": DummyClassifier(random_state=0)
 }

+#"VotingClassifier": VotingClassifier(estimators=[('lr', LogisticRegression()), ('rf', RandomForestClassifier())]),
+#"StackingClassifier": StackingClassifier(estimators=[('lr', LogisticRegression()), ('rf', RandomForestClassifier())]),
 for dataset in datasets:
    print("-" * 60)
    print("dataset -> " + dataset)
-    print("-" * 60)
    print("mkdir")
    folder = dataset.split(".csv")[0]
    if not os.path.isdir(folder):
        os.mkdir(folder)
    X_train, X_test, y_train, y_test = prepDataset(dataset)
    for esti in estimators:
+        print("model: " + esti)
        compari = comparison(X_train, X_test, y_train, y_test, estimators[esti], 1) #TODO: change the job count if you can
        print("open")
        f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
--- a/notebook.ipynb
+++ b/notebook.ipynb
@@ -23,36 +23,7 @@
     "is_executing": true
    }
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   appid              name release_date  required_age  price  dlc_count  \\\n",
-      "0    730  Counter-Strike 2   2012-08-21             0    0.0          1   \n",
-      "\n",
-      "                                detailed_description  \\\n",
-      "0  For over two decades, Counter-Strike has offer...   \n",
-      "\n",
-      "                                      about_the_game  \\\n",
-      "0  For over two decades, Counter-Strike has offer...   \n",
-      "\n",
-      "                                   short_description reviews  ...  \\\n",
-      "0  For over two decades, Counter-Strike has offer...     NaN  ...   \n",
-      "\n",
-      "  average_playtime_2weeks median_playtime_forever median_playtime_2weeks  \\\n",
-      "0                     879                    5174                    350   \n",
-      "\n",
-      "  discount  peak_ccu                                               tags  \\\n",
-      "0        0   1212356  {'FPS': 90857, 'Shooter': 65397, 'Multiplayer'...   \n",
-      "\n",
-      "   pct_pos_total  num_reviews_total pct_pos_recent  num_reviews_recent  \n",
-      "0             86            8632939             82               96473  \n",
-      "\n",
-      "[1 rows x 47 columns]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
@@ -120,27 +91,7 @@
     "is_executing": true
    }
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                                                desc  \\\n",
-      "0  For over two decades, Counter-Strike has offer...   \n",
-      "1  LAND, LOOT, SURVIVE! Play PUBG: BATTLEGROUNDS ...   \n",
-      "2  The most-played game on Steam. Every day, mill...   \n",
-      "3  When a young street hustler, a retired bank ro...   \n",
-      "4  Edition Comparison Ultimate Edition The Tom Cl...   \n",
-      "\n",
-      "                                              genres  \n",
-      "0                         ['Action', 'Free To Play']  \n",
-      "1  ['Action', 'Adventure', 'Massively Multiplayer...  \n",
-      "2             ['Action', 'Strategy', 'Free To Play']  \n",
-      "3                            ['Action', 'Adventure']  \n",
-      "4                                         ['Action']  \n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.preprocessing import FunctionTransformer\n",
@@ -200,20 +151,7 @@
   "execution_count": null,
   "id": "ebc5a24e9bc87fdd",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0                               [Action, Free To Play]\n",
-      "1    [Action, Adventure, Massively Multiplayer, Fre...\n",
-      "2                     [Action, Strategy, Free To Play]\n",
-      "3                                  [Action, Adventure]\n",
-      "4                                             [Action]\n",
-      "Name: genres, dtype: object\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import ast\n",
    "\n",
@@ -236,27 +174,7 @@
   "execution_count": null,
   "id": "d2c3527a5fc876bf",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   Action  Adventure  Casual  Early Access  Free To Play  Gore  Indie  \\\n",
-      "0       1          0       0             0             1     0      0   \n",
-      "1       1          1       0             0             1     0      0   \n",
-      "2       1          0       0             0             1     0      0   \n",
-      "3       1          1       0             0             0     0      0   \n",
-      "4       1          0       0             0             0     0      0   \n",
-      "\n",
-      "   Massively Multiplayer  RPG  Racing  Simulation  Sports  Strategy  Violent  \n",
-      "0                      0    0       0           0       0         0        0  \n",
-      "1                      1    0       0           0       0         0        0  \n",
-      "2                      0    0       0           0       0         1        0  \n",
-      "3                      0    0       0           0       0         0        0  \n",
-      "4                      0    0       0           0       0         0        0  \n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MultiLabelBinarizer\n",
    "\n",
@@ -288,29 +206,7 @@
   "execution_count": null,
   "id": "4e8b407c",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    00  000  000km    000th  00am  00f  00i  00p  00v   01  ...  이터널  이터널리턴  \\\n",
-      "0  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "1  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "2  0.0  0.0    0.0  0.14649   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "3  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "4  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "\n",
-      "   이현준  정대찬  중입니다   철권  토탈워  페르소나  한국어  한글을  \n",
-      "0  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "1  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "2  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "3  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "4  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "\n",
-      "[5 rows x 29351 columns]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
@@ -356,15 +252,7 @@
   "execution_count": null,
   "id": "4919bf1b37d171a7",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "13\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "mask = y.sum(axis=1).map(lambda x: x > 0)\n",
    "print((mask == False).sum()) # count of unpredictable datapoints\n",
@@ -399,12 +287,38 @@
    "X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, random_state=0)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "84f56229",
+   "metadata": {},
+   "source": [
+    "Now that all data is prepared, we need to choose a Classification Model that meets our stanadrds."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "917ba82f",
+   "metadata": {},
+   "source": [
+    "# Excursion: Choosing a classification Model\n",
+    "``sklearn`` has many different classification Models to choose from, but we only have limited time and computing power.\n",
+    "As such, we tested many different models on the 2k Dataset and chose the 5 best performing ones for the big dataset.\n",
+    "\n",
+    "### The comparison\n",
+    "We won't put the comparison script in this notebook, but you can find it in the ``compare_models.py`` file and try it out yourself.\n",
+    "There were some rules as a baseline for comparison:\n",
+    "- All Hyperparameters are set to default\n",
+    "- All iteration limits are set to 3000\n",
+    "\n",
+    "![Comparison Image](./compare_models_2k.png)"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "12b5283d",
   "metadata": {},
   "source": [
-    "# Model Selection\n",
+    "## Model Selection\n",
    "**TODO Deciding which model to use for this task**\n",
    "\n",
    "As a game can have multiple genres, our Model(s) has to be capable of multi-label-classification. sklearn's ``MultiOutputClassifier`` can do this. As a backend for ``MultiOutputClassifier`` we use ``LogisticRegression``"
@@ -442,36 +356,7 @@
   "execution_count": null,
   "id": "e2ebea6945193e07",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.78      0.91      0.84       300\n",
-      "           1       0.78      0.62      0.69       216\n",
-      "           2       1.00      0.03      0.07        86\n",
-      "           3       0.00      0.00      0.00        46\n",
-      "           4       1.00      0.04      0.07        83\n",
-      "           5       0.00      0.00      0.00         0\n",
-      "           6       0.79      0.81      0.80       245\n",
-      "           7       0.00      0.00      0.00        42\n",
-      "           8       0.90      0.34      0.49       127\n",
-      "           9       0.00      0.00      0.00        12\n",
-      "          10       0.89      0.25      0.39       127\n",
-      "          11       0.00      0.00      0.00        14\n",
-      "          12       0.88      0.14      0.24       106\n",
-      "          13       0.00      0.00      0.00         0\n",
-      "\n",
-      "   micro avg       0.79      0.50      0.61      1404\n",
-      "   macro avg       0.50      0.22      0.26      1404\n",
-      "weighted avg       0.77      0.50      0.53      1404\n",
-      " samples avg       0.77      0.56      0.60      1404\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "\n",