From ee6a31972b0aac6e3ce4ffda382f6751c3ce92cb Mon Sep 17 00:00:00 2001
From: Maximilian Kany <maximilian@kany-malyhina.de>
Date: Fri, 15 Aug 2025 11:38:46 +0200
Subject: [PATCH] first version of the plot

---
 README.md                                     |  18 -
 comparison.py                                 | 140 -----
 games_march2025_cleaned.csv                   |   3 -
 games_march2025_cleaned/BernoulliNB.txt       |  21 -
 .../DecisionTreeClassifier.txt                |  21 -
 games_march2025_cleaned/GaussianNB.txt        |  21 -
 .../GradientBoostingClassifier.txt            |  21 -
 games_march2025_cleaned/LinearSVC-i5000.txt   |  21 -
 .../LogisticRegression-i1000.txt              |  21 -
 .../LogisticRegression-i10000.txt             |  21 -
 .../MLPClassifier-i10000.txt                  |  21 -
 games_march2025_cleaned/MultinomialNB.txt     |  21 -
 .../RandomForestClassifier.txt                |  21 -
 games_march2025_cleaned/SVC-RBF-i10000.txt    |  21 -
 games_march2025_cleaned_10k.csv               |   3 -
 games_march2025_cleaned_10k/BernoulliNB.txt   |  21 -
 .../DecisionTreeClassifier.txt                |  21 -
 games_march2025_cleaned_10k/GaussianNB.txt    |  21 -
 .../GradientBoostingClassifier.txt            |  21 -
 .../LinearSVC-i5000.txt                       |  21 -
 .../LogisticRegression-i1000.txt              |  21 -
 .../LogisticRegression-i10000.txt             |  21 -
 .../MLPClassifier-i10000.txt                  |  21 -
 games_march2025_cleaned_10k/MultinomialNB.txt |  21 -
 .../RandomForestClassifier.txt                |  21 -
 .../SVC-RBF-i10000.txt                        |  21 -
 games_march2025_cleaned_2k.csv                |   3 -
 games_march2025_cleaned_2k/BernoulliNB.txt    |  21 -
 .../DecisionTreeClassifier.txt                |  21 -
 games_march2025_cleaned_2k/GaussianNB.txt     |  21 -
 .../GradientBoostingClassifier.txt            |  21 -
 .../LinearSVC-i5000.txt                       |  21 -
 .../LogisticRegression-i1000.txt              |  21 -
 .../LogisticRegression-i10000.txt             |  21 -
 .../MLPClassifier-i10000.txt                  |  21 -
 games_march2025_cleaned_2k/MultinomialNB.txt  |  21 -
 .../RandomForestClassifier.txt                |  21 -
 games_march2025_cleaned_2k/SVC-RBF-i10000.txt |  21 -
 notebook.ipynb                                | 530 ------------------
 plot_maker.py                                 |  38 ++
 test_script.py                                | 133 -----
 41 files changed, 38 insertions(+), 1523 deletions(-)
 delete mode 100644 README.md
 delete mode 100644 comparison.py
 delete mode 100644 games_march2025_cleaned.csv
 delete mode 100644 games_march2025_cleaned/BernoulliNB.txt
 delete mode 100644 games_march2025_cleaned/DecisionTreeClassifier.txt
 delete mode 100644 games_march2025_cleaned/GaussianNB.txt
 delete mode 100644 games_march2025_cleaned/GradientBoostingClassifier.txt
 delete mode 100644 games_march2025_cleaned/LinearSVC-i5000.txt
 delete mode 100644 games_march2025_cleaned/LogisticRegression-i1000.txt
 delete mode 100644 games_march2025_cleaned/LogisticRegression-i10000.txt
 delete mode 100644 games_march2025_cleaned/MLPClassifier-i10000.txt
 delete mode 100644 games_march2025_cleaned/MultinomialNB.txt
 delete mode 100644 games_march2025_cleaned/RandomForestClassifier.txt
 delete mode 100644 games_march2025_cleaned/SVC-RBF-i10000.txt
 delete mode 100644 games_march2025_cleaned_10k.csv
 delete mode 100644 games_march2025_cleaned_10k/BernoulliNB.txt
 delete mode 100644 games_march2025_cleaned_10k/DecisionTreeClassifier.txt
 delete mode 100644 games_march2025_cleaned_10k/GaussianNB.txt
 delete mode 100644 games_march2025_cleaned_10k/GradientBoostingClassifier.txt
 delete mode 100644 games_march2025_cleaned_10k/LinearSVC-i5000.txt
 delete mode 100644 games_march2025_cleaned_10k/LogisticRegression-i1000.txt
 delete mode 100644 games_march2025_cleaned_10k/LogisticRegression-i10000.txt
 delete mode 100644 games_march2025_cleaned_10k/MLPClassifier-i10000.txt
 delete mode 100644 games_march2025_cleaned_10k/MultinomialNB.txt
 delete mode 100644 games_march2025_cleaned_10k/RandomForestClassifier.txt
 delete mode 100644 games_march2025_cleaned_10k/SVC-RBF-i10000.txt
 delete mode 100644 games_march2025_cleaned_2k.csv
 delete mode 100644 games_march2025_cleaned_2k/BernoulliNB.txt
 delete mode 100644 games_march2025_cleaned_2k/DecisionTreeClassifier.txt
 delete mode 100644 games_march2025_cleaned_2k/GaussianNB.txt
 delete mode 100644 games_march2025_cleaned_2k/GradientBoostingClassifier.txt
 delete mode 100644 games_march2025_cleaned_2k/LinearSVC-i5000.txt
 delete mode 100644 games_march2025_cleaned_2k/LogisticRegression-i1000.txt
 delete mode 100644 games_march2025_cleaned_2k/LogisticRegression-i10000.txt
 delete mode 100644 games_march2025_cleaned_2k/MLPClassifier-i10000.txt
 delete mode 100644 games_march2025_cleaned_2k/MultinomialNB.txt
 delete mode 100644 games_march2025_cleaned_2k/RandomForestClassifier.txt
 delete mode 100644 games_march2025_cleaned_2k/SVC-RBF-i10000.txt
 delete mode 100644 notebook.ipynb
 create mode 100644 plot_maker.py
 delete mode 100644 test_script.py

diff --git a/README.md b/README.md
deleted file mode 100644
index 4417fc3..0000000
--- a/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-# Machine Learning Project – Summer Semester 2025
-
-This project was created as part of the "Machine Learning" course at HTW Saar in the Practical Computer Science study program.
-
-## Objective
-
-We are developing a Jupyter Notebook that automatically predicts the genre of Steam games based on their descriptions.  
-As a data basis, we use a publicly available Steam Games dataset that we found on Kaggle.
-
-## Dataset
-
-We use the [Steam Games Dataset from Kaggle](https://www.kaggle.com/datasets/artermiloff/steam-games-dataset/data).
-
-## Contributors
-
-- Maximilian Kany
-- Florian Speicher
-- Tim Wall
\ No newline at end of file
diff --git a/comparison.py b/comparison.py
deleted file mode 100644
index fcced39..0000000
--- a/comparison.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import os
-import numpy as np
-import pandas as pd
-from sklearn import set_config
-
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import FunctionTransformer
-
-from sklearn.preprocessing import MultiLabelBinarizer
-import ast
-
-
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.multioutput import MultiOutputClassifier
-from sklearn.metrics import classification_report
-from sklearn.model_selection import train_test_split
-from sklearn.datasets import load_iris
-from sklearn.metrics import accuracy_score, classification_report
-from sklearn.svm import SVC, LinearSVC
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
-from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.neural_network import MLPClassifier
-
-
-set_config(transform_output="pandas") # dataframe supremacy
-
-def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
-    dataset = pd.read_csv("./games_march2025_cleaned_2k.csv",sep=",")
-    # desc, genres, tags
-    column_transformer = ColumnTransformer([
-            # merge all descriptions
-            ('desc', FunctionTransformer(lambda X: X.fillna('').agg(' '.join, axis=1).to_frame(name="desc")),
-                ['detailed_description', 'about_the_game', 'short_description']),
-            ('pass', 'passthrough', ['genres']),#, 'tags'
-        ],
-        verbose_feature_names_out=False
-    )
-    dataset = column_transformer.fit_transform(dataset)
-
-
-
-    #### SET MISSING VALUES
-    print("SETMISS")
-    # Setting missing numeric values to the mean
-    dataset.fillna(dataset.mean(numeric_only=True), inplace=True)
-    # Setting missing text values to 'Unknown'
-    dataset.fillna('', inplace=True)
-    # Setting missing values in other columns to NaN
-    dataset.dropna(inplace=True)
-
-    ##### STRUCTURIZE GENRES to onehot
-    #serialize array
-    dataset['genres'] = dataset['genres'].map(lambda s: ast.literal_eval(s)) 
-    #print(dataset['genres']) # in py but not yet onehotenc
-
-    # MultiLabelBinarizer does onehotenc for arrays
-    mlb_genres = MultiLabelBinarizer()
-    genres_encoded = mlb_genres.fit_transform(dataset.pop('genres'))
-    #genres_count = len(mlb_genres.classes_) # for multi-label classifiction later
-
-    genres_df = pd.DataFrame(genres_encoded, columns=mlb_genres.classes_)
-    #print(genres_df)
-    #dataset = pd.concat([dataset, genres_df], axis=1)
-    #print(dataset)
-
-
-    #### convert text to bag of words
-
-    ## Count vs Tfidf vectorizer
-    vectorizer = TfidfVectorizer()
-    tfidf_matrix = vectorizer.fit_transform(dataset['desc']) # matrix
-    tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
-    #print(tfidf_df)
-
-
-    ##### MODEL
-    print("MODEL")
-
-
-    X = tfidf_df
-    y = genres_df
-    # cleanup datapoints that dont have a target value (all target columns are 0)
-    mask = y.sum(axis=1).map(lambda x: x > 0)
-    #print((mask == False).sum()) #31 cases with all target columns 0
-    X_clean = X[mask]
-    y_clean = y[mask]
-
-    # Split dataset
-    return train_test_split(X_clean, y_clean, random_state=0)
-
-def comparison(X_train, X_test, y_train, y_test, estimator, jobs: int = 1): #returns class_report
-    multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
-
-    # model training
-    multi_target_clf.fit(X_train, y_train)
-
-    # predict against test data
-    y_pred = multi_target_clf.predict(X_test)
-    return classification_report(y_test, y_pred, zero_division=0.0)
-
-datasets = [
-    'games_march2025_cleaned_2k.csv',
-    'games_march2025_cleaned_10k.csv',
-    'games_march2025_cleaned.csv'
-]
-
-estimators = {
-    "LogisticRegression-i1000": LogisticRegression(max_iter=1000, random_state=0),
-    "LogisticRegression-i10000": LogisticRegression(max_iter=10000, random_state=0),
-    "LinearSVC-i5000": LinearSVC(max_iter=5000),
-    "SVC-RBF-i10000": SVC(kernel="rbf", max_iter=10000),
-    "DecisionTreeClassifier": DecisionTreeClassifier(random_state=0),
-    "RandomForestClassifier": RandomForestClassifier(random_state=0),
-    "GradientBoostingClassifier": GradientBoostingClassifier(random_state=0),
-    "GaussianNB": GaussianNB(),
-    "MultinomialNB": MultinomialNB(),
-    "BernoulliNB": BernoulliNB(),
-    "MLPClassifier-i10000": MLPClassifier(max_iter=10000, random_state=0),
-}
-
-for dataset in datasets:
-    print("-" * 60)
-    print("dataset -> " + dataset)
-    print("-" * 60)
-    print("mkdir")
-    folder = dataset.split(".csv")[0]
-    if not os.path.isdir(folder):
-        os.mkdir(folder)
-    X_train, X_test, y_train, y_test = prepDataset(dataset)
-    for esti in estimators:
-        compari = comparison(X_train, X_test, y_train, y_test, estimators[esti], 1) #TODO: change the job count if you can
-        print("open")
-        f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
-        f.write(compari)
-        print("write")
-        f.close()
-        print("close")
\ No newline at end of file
diff --git a/games_march2025_cleaned.csv b/games_march2025_cleaned.csv
deleted file mode 100644
index 4d046d1..0000000
--- a/games_march2025_cleaned.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:04d8df2778aaa8f8b575934b7072d55224d37a2ded23e8261583f0fcf668dfab
-size 468641107
diff --git a/games_march2025_cleaned/BernoulliNB.txt b/games_march2025_cleaned/BernoulliNB.txt
deleted file mode 100644
index f2237d4..0000000
--- a/games_march2025_cleaned/BernoulliNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.75      0.90      0.82       300
-           1       0.72      0.68      0.70       216
-           2       0.50      0.08      0.14        86
-           3       0.27      0.07      0.11        46
-           4       0.40      0.07      0.12        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.82      0.79       245
-           7       0.33      0.10      0.15        42
-           8       0.67      0.40      0.50       127
-           9       0.00      0.00      0.00        12
-          10       0.71      0.37      0.49       127
-          11       0.00      0.00      0.00        14
-          12       0.49      0.31      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.70      0.55      0.62      1404
-   macro avg       0.40      0.27      0.30      1404
-weighted avg       0.64      0.55      0.56      1404
- samples avg       0.73      0.59      0.61      1404
diff --git a/games_march2025_cleaned/DecisionTreeClassifier.txt b/games_march2025_cleaned/DecisionTreeClassifier.txt
deleted file mode 100644
index 900c256..0000000
--- a/games_march2025_cleaned/DecisionTreeClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.73      0.75       300
-           1       0.56      0.53      0.54       216
-           2       0.36      0.33      0.34        86
-           3       0.33      0.26      0.29        46
-           4       0.40      0.46      0.43        83
-           5       0.00      0.00      0.00         0
-           6       0.65      0.61      0.63       245
-           7       0.39      0.40      0.40        42
-           8       0.59      0.57      0.58       127
-           9       0.60      0.25      0.35        12
-          10       0.56      0.51      0.53       127
-          11       0.39      0.50      0.44        14
-          12       0.52      0.49      0.50       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.58      0.55      0.57      1404
-   macro avg       0.44      0.40      0.41      1404
-weighted avg       0.58      0.55      0.57      1404
- samples avg       0.59      0.59      0.55      1404
diff --git a/games_march2025_cleaned/GaussianNB.txt b/games_march2025_cleaned/GaussianNB.txt
deleted file mode 100644
index 83d7a2e..0000000
--- a/games_march2025_cleaned/GaussianNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.80      0.78       300
-           1       0.62      0.51      0.56       216
-           2       0.63      0.14      0.23        86
-           3       0.17      0.02      0.04        46
-           4       0.42      0.10      0.16        83
-           5       0.00      0.00      0.00         0
-           6       0.68      0.66      0.67       245
-           7       0.56      0.12      0.20        42
-           8       0.55      0.33      0.41       127
-           9       0.67      0.17      0.27        12
-          10       0.65      0.31      0.42       127
-          11       1.00      0.14      0.25        14
-          12       0.53      0.29      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.66      0.47      0.55      1404
-   macro avg       0.52      0.26      0.31      1404
-weighted avg       0.62      0.47      0.51      1404
- samples avg       0.67      0.53      0.55      1404
diff --git a/games_march2025_cleaned/GradientBoostingClassifier.txt b/games_march2025_cleaned/GradientBoostingClassifier.txt
deleted file mode 100644
index 7c8ce6e..0000000
--- a/games_march2025_cleaned/GradientBoostingClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.80      0.83       300
-           1       0.77      0.61      0.68       216
-           2       0.55      0.13      0.21        86
-           3       0.42      0.11      0.17        46
-           4       0.68      0.33      0.44        83
-           5       0.00      0.00      0.00         0
-           6       0.71      0.76      0.74       245
-           7       0.61      0.26      0.37        42
-           8       0.81      0.50      0.61       127
-           9       0.75      0.25      0.38        12
-          10       0.81      0.54      0.65       127
-          11       0.40      0.43      0.41        14
-          12       0.69      0.42      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.76      0.57      0.65      1404
-   macro avg       0.57      0.37      0.43      1404
-weighted avg       0.74      0.57      0.63      1404
- samples avg       0.76      0.63      0.65      1404
diff --git a/games_march2025_cleaned/LinearSVC-i5000.txt b/games_march2025_cleaned/LinearSVC-i5000.txt
deleted file mode 100644
index df82b40..0000000
--- a/games_march2025_cleaned/LinearSVC-i5000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.87      0.86       300
-           1       0.76      0.66      0.70       216
-           2       0.77      0.20      0.31        86
-           3       0.00      0.00      0.00        46
-           4       0.76      0.27      0.39        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.81      0.79       245
-           7       0.89      0.19      0.31        42
-           8       0.77      0.60      0.67       127
-           9       1.00      0.58      0.74        12
-          10       0.85      0.54      0.66       127
-          11       1.00      0.29      0.44        14
-          12       0.82      0.42      0.56       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.61      0.69      1404
-   macro avg       0.66      0.39      0.46      1404
-weighted avg       0.78      0.61      0.66      1404
- samples avg       0.81      0.67      0.69      1404
diff --git a/games_march2025_cleaned/LogisticRegression-i1000.txt b/games_march2025_cleaned/LogisticRegression-i1000.txt
deleted file mode 100644
index b7926d4..0000000
--- a/games_march2025_cleaned/LogisticRegression-i1000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
diff --git a/games_march2025_cleaned/LogisticRegression-i10000.txt b/games_march2025_cleaned/LogisticRegression-i10000.txt
deleted file mode 100644
index b7926d4..0000000
--- a/games_march2025_cleaned/LogisticRegression-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
diff --git a/games_march2025_cleaned/MLPClassifier-i10000.txt b/games_march2025_cleaned/MLPClassifier-i10000.txt
deleted file mode 100644
index c4634dc..0000000
--- a/games_march2025_cleaned/MLPClassifier-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.84      0.85      0.84       300
-           1       0.73      0.67      0.70       216
-           2       0.74      0.30      0.43        86
-           3       0.50      0.02      0.04        46
-           4       0.69      0.24      0.36        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.79      0.79       245
-           7       0.86      0.14      0.24        42
-           8       0.76      0.63      0.69       127
-           9       1.00      0.33      0.50        12
-          10       0.81      0.52      0.63       127
-          11       1.00      0.14      0.25        14
-          12       0.75      0.41      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.60      0.68      1404
-   macro avg       0.68      0.36      0.43      1404
-weighted avg       0.78      0.60      0.65      1404
- samples avg       0.80      0.66      0.68      1404
diff --git a/games_march2025_cleaned/MultinomialNB.txt b/games_march2025_cleaned/MultinomialNB.txt
deleted file mode 100644
index bc74cf3..0000000
--- a/games_march2025_cleaned/MultinomialNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.64      0.99      0.78       300
-           1       0.85      0.24      0.37       216
-           2       0.60      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       0.80      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.80      0.79       245
-           7       0.40      0.05      0.09        42
-           8       1.00      0.04      0.08       127
-           9       0.00      0.00      0.00        12
-          10       0.20      0.01      0.02       127
-          11       0.00      0.00      0.00        14
-          12       1.00      0.05      0.09       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.69      0.40      0.51      1404
-   macro avg       0.45      0.16      0.17      1404
-weighted avg       0.68      0.40      0.39      1404
- samples avg       0.70      0.44      0.50      1404
diff --git a/games_march2025_cleaned/RandomForestClassifier.txt b/games_march2025_cleaned/RandomForestClassifier.txt
deleted file mode 100644
index 6fbe546..0000000
--- a/games_march2025_cleaned/RandomForestClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.80      0.88      0.84       300
-           1       0.78      0.55      0.64       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.06      0.11        83
-           5       0.00      0.00      0.00         0
-           6       0.74      0.78      0.76       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.24      0.38       127
-           9       0.00      0.00      0.00        12
-          10       0.91      0.24      0.38       127
-          11       1.00      0.14      0.25        14
-          12       1.00      0.25      0.39       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.48      0.59      1404
-   macro avg       0.58      0.23      0.27      1404
-weighted avg       0.78      0.48      0.52      1404
- samples avg       0.77      0.54      0.60      1404
diff --git a/games_march2025_cleaned/SVC-RBF-i10000.txt b/games_march2025_cleaned/SVC-RBF-i10000.txt
deleted file mode 100644
index ff0c7b7..0000000
--- a/games_march2025_cleaned/SVC-RBF-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.81      0.90      0.85       300
-           1       0.76      0.63      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.83      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.40      0.54       127
-           9       1.00      0.17      0.29        12
-          10       0.90      0.34      0.49       127
-          11       1.00      0.14      0.25        14
-          12       0.92      0.21      0.34       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.53      0.63      1404
-   macro avg       0.64      0.26      0.32      1404
-weighted avg       0.79      0.53      0.56      1404
- samples avg       0.79      0.59      0.63      1404
diff --git a/games_march2025_cleaned_10k.csv b/games_march2025_cleaned_10k.csv
deleted file mode 100644
index 2c3c073..0000000
--- a/games_march2025_cleaned_10k.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:12cf598a6e41d83cfa9c16e99d4d9578cb4ee7c3594fae9f9b921772887a08d7
-size 68658136
diff --git a/games_march2025_cleaned_10k/BernoulliNB.txt b/games_march2025_cleaned_10k/BernoulliNB.txt
deleted file mode 100644
index f2237d4..0000000
--- a/games_march2025_cleaned_10k/BernoulliNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.75      0.90      0.82       300
-           1       0.72      0.68      0.70       216
-           2       0.50      0.08      0.14        86
-           3       0.27      0.07      0.11        46
-           4       0.40      0.07      0.12        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.82      0.79       245
-           7       0.33      0.10      0.15        42
-           8       0.67      0.40      0.50       127
-           9       0.00      0.00      0.00        12
-          10       0.71      0.37      0.49       127
-          11       0.00      0.00      0.00        14
-          12       0.49      0.31      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.70      0.55      0.62      1404
-   macro avg       0.40      0.27      0.30      1404
-weighted avg       0.64      0.55      0.56      1404
- samples avg       0.73      0.59      0.61      1404
diff --git a/games_march2025_cleaned_10k/DecisionTreeClassifier.txt b/games_march2025_cleaned_10k/DecisionTreeClassifier.txt
deleted file mode 100644
index 900c256..0000000
--- a/games_march2025_cleaned_10k/DecisionTreeClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.73      0.75       300
-           1       0.56      0.53      0.54       216
-           2       0.36      0.33      0.34        86
-           3       0.33      0.26      0.29        46
-           4       0.40      0.46      0.43        83
-           5       0.00      0.00      0.00         0
-           6       0.65      0.61      0.63       245
-           7       0.39      0.40      0.40        42
-           8       0.59      0.57      0.58       127
-           9       0.60      0.25      0.35        12
-          10       0.56      0.51      0.53       127
-          11       0.39      0.50      0.44        14
-          12       0.52      0.49      0.50       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.58      0.55      0.57      1404
-   macro avg       0.44      0.40      0.41      1404
-weighted avg       0.58      0.55      0.57      1404
- samples avg       0.59      0.59      0.55      1404
diff --git a/games_march2025_cleaned_10k/GaussianNB.txt b/games_march2025_cleaned_10k/GaussianNB.txt
deleted file mode 100644
index 83d7a2e..0000000
--- a/games_march2025_cleaned_10k/GaussianNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.80      0.78       300
-           1       0.62      0.51      0.56       216
-           2       0.63      0.14      0.23        86
-           3       0.17      0.02      0.04        46
-           4       0.42      0.10      0.16        83
-           5       0.00      0.00      0.00         0
-           6       0.68      0.66      0.67       245
-           7       0.56      0.12      0.20        42
-           8       0.55      0.33      0.41       127
-           9       0.67      0.17      0.27        12
-          10       0.65      0.31      0.42       127
-          11       1.00      0.14      0.25        14
-          12       0.53      0.29      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.66      0.47      0.55      1404
-   macro avg       0.52      0.26      0.31      1404
-weighted avg       0.62      0.47      0.51      1404
- samples avg       0.67      0.53      0.55      1404
diff --git a/games_march2025_cleaned_10k/GradientBoostingClassifier.txt b/games_march2025_cleaned_10k/GradientBoostingClassifier.txt
deleted file mode 100644
index 7c8ce6e..0000000
--- a/games_march2025_cleaned_10k/GradientBoostingClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.80      0.83       300
-           1       0.77      0.61      0.68       216
-           2       0.55      0.13      0.21        86
-           3       0.42      0.11      0.17        46
-           4       0.68      0.33      0.44        83
-           5       0.00      0.00      0.00         0
-           6       0.71      0.76      0.74       245
-           7       0.61      0.26      0.37        42
-           8       0.81      0.50      0.61       127
-           9       0.75      0.25      0.38        12
-          10       0.81      0.54      0.65       127
-          11       0.40      0.43      0.41        14
-          12       0.69      0.42      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.76      0.57      0.65      1404
-   macro avg       0.57      0.37      0.43      1404
-weighted avg       0.74      0.57      0.63      1404
- samples avg       0.76      0.63      0.65      1404
diff --git a/games_march2025_cleaned_10k/LinearSVC-i5000.txt b/games_march2025_cleaned_10k/LinearSVC-i5000.txt
deleted file mode 100644
index df82b40..0000000
--- a/games_march2025_cleaned_10k/LinearSVC-i5000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.87      0.86       300
-           1       0.76      0.66      0.70       216
-           2       0.77      0.20      0.31        86
-           3       0.00      0.00      0.00        46
-           4       0.76      0.27      0.39        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.81      0.79       245
-           7       0.89      0.19      0.31        42
-           8       0.77      0.60      0.67       127
-           9       1.00      0.58      0.74        12
-          10       0.85      0.54      0.66       127
-          11       1.00      0.29      0.44        14
-          12       0.82      0.42      0.56       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.61      0.69      1404
-   macro avg       0.66      0.39      0.46      1404
-weighted avg       0.78      0.61      0.66      1404
- samples avg       0.81      0.67      0.69      1404
diff --git a/games_march2025_cleaned_10k/LogisticRegression-i1000.txt b/games_march2025_cleaned_10k/LogisticRegression-i1000.txt
deleted file mode 100644
index b7926d4..0000000
--- a/games_march2025_cleaned_10k/LogisticRegression-i1000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
diff --git a/games_march2025_cleaned_10k/LogisticRegression-i10000.txt b/games_march2025_cleaned_10k/LogisticRegression-i10000.txt
deleted file mode 100644
index b7926d4..0000000
--- a/games_march2025_cleaned_10k/LogisticRegression-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
diff --git a/games_march2025_cleaned_10k/MLPClassifier-i10000.txt b/games_march2025_cleaned_10k/MLPClassifier-i10000.txt
deleted file mode 100644
index c4634dc..0000000
--- a/games_march2025_cleaned_10k/MLPClassifier-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.84      0.85      0.84       300
-           1       0.73      0.67      0.70       216
-           2       0.74      0.30      0.43        86
-           3       0.50      0.02      0.04        46
-           4       0.69      0.24      0.36        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.79      0.79       245
-           7       0.86      0.14      0.24        42
-           8       0.76      0.63      0.69       127
-           9       1.00      0.33      0.50        12
-          10       0.81      0.52      0.63       127
-          11       1.00      0.14      0.25        14
-          12       0.75      0.41      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.60      0.68      1404
-   macro avg       0.68      0.36      0.43      1404
-weighted avg       0.78      0.60      0.65      1404
- samples avg       0.80      0.66      0.68      1404
diff --git a/games_march2025_cleaned_10k/MultinomialNB.txt b/games_march2025_cleaned_10k/MultinomialNB.txt
deleted file mode 100644
index bc74cf3..0000000
--- a/games_march2025_cleaned_10k/MultinomialNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.64      0.99      0.78       300
-           1       0.85      0.24      0.37       216
-           2       0.60      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       0.80      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.80      0.79       245
-           7       0.40      0.05      0.09        42
-           8       1.00      0.04      0.08       127
-           9       0.00      0.00      0.00        12
-          10       0.20      0.01      0.02       127
-          11       0.00      0.00      0.00        14
-          12       1.00      0.05      0.09       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.69      0.40      0.51      1404
-   macro avg       0.45      0.16      0.17      1404
-weighted avg       0.68      0.40      0.39      1404
- samples avg       0.70      0.44      0.50      1404
diff --git a/games_march2025_cleaned_10k/RandomForestClassifier.txt b/games_march2025_cleaned_10k/RandomForestClassifier.txt
deleted file mode 100644
index 6fbe546..0000000
--- a/games_march2025_cleaned_10k/RandomForestClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.80      0.88      0.84       300
-           1       0.78      0.55      0.64       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.06      0.11        83
-           5       0.00      0.00      0.00         0
-           6       0.74      0.78      0.76       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.24      0.38       127
-           9       0.00      0.00      0.00        12
-          10       0.91      0.24      0.38       127
-          11       1.00      0.14      0.25        14
-          12       1.00      0.25      0.39       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.48      0.59      1404
-   macro avg       0.58      0.23      0.27      1404
-weighted avg       0.78      0.48      0.52      1404
- samples avg       0.77      0.54      0.60      1404
diff --git a/games_march2025_cleaned_10k/SVC-RBF-i10000.txt b/games_march2025_cleaned_10k/SVC-RBF-i10000.txt
deleted file mode 100644
index ff0c7b7..0000000
--- a/games_march2025_cleaned_10k/SVC-RBF-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.81      0.90      0.85       300
-           1       0.76      0.63      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.83      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.40      0.54       127
-           9       1.00      0.17      0.29        12
-          10       0.90      0.34      0.49       127
-          11       1.00      0.14      0.25        14
-          12       0.92      0.21      0.34       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.53      0.63      1404
-   macro avg       0.64      0.26      0.32      1404
-weighted avg       0.79      0.53      0.56      1404
- samples avg       0.79      0.59      0.63      1404
diff --git a/games_march2025_cleaned_2k.csv b/games_march2025_cleaned_2k.csv
deleted file mode 100644
index 806e982..0000000
--- a/games_march2025_cleaned_2k.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:75ba38404995149bcb8e5a321459f73b4adf58597f85bab396dd054cc78c145d
-size 15455174
diff --git a/games_march2025_cleaned_2k/BernoulliNB.txt b/games_march2025_cleaned_2k/BernoulliNB.txt
deleted file mode 100644
index f2237d4..0000000
--- a/games_march2025_cleaned_2k/BernoulliNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.75      0.90      0.82       300
-           1       0.72      0.68      0.70       216
-           2       0.50      0.08      0.14        86
-           3       0.27      0.07      0.11        46
-           4       0.40      0.07      0.12        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.82      0.79       245
-           7       0.33      0.10      0.15        42
-           8       0.67      0.40      0.50       127
-           9       0.00      0.00      0.00        12
-          10       0.71      0.37      0.49       127
-          11       0.00      0.00      0.00        14
-          12       0.49      0.31      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.70      0.55      0.62      1404
-   macro avg       0.40      0.27      0.30      1404
-weighted avg       0.64      0.55      0.56      1404
- samples avg       0.73      0.59      0.61      1404
diff --git a/games_march2025_cleaned_2k/DecisionTreeClassifier.txt b/games_march2025_cleaned_2k/DecisionTreeClassifier.txt
deleted file mode 100644
index 900c256..0000000
--- a/games_march2025_cleaned_2k/DecisionTreeClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.73      0.75       300
-           1       0.56      0.53      0.54       216
-           2       0.36      0.33      0.34        86
-           3       0.33      0.26      0.29        46
-           4       0.40      0.46      0.43        83
-           5       0.00      0.00      0.00         0
-           6       0.65      0.61      0.63       245
-           7       0.39      0.40      0.40        42
-           8       0.59      0.57      0.58       127
-           9       0.60      0.25      0.35        12
-          10       0.56      0.51      0.53       127
-          11       0.39      0.50      0.44        14
-          12       0.52      0.49      0.50       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.58      0.55      0.57      1404
-   macro avg       0.44      0.40      0.41      1404
-weighted avg       0.58      0.55      0.57      1404
- samples avg       0.59      0.59      0.55      1404
diff --git a/games_march2025_cleaned_2k/GaussianNB.txt b/games_march2025_cleaned_2k/GaussianNB.txt
deleted file mode 100644
index 83d7a2e..0000000
--- a/games_march2025_cleaned_2k/GaussianNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.76      0.80      0.78       300
-           1       0.62      0.51      0.56       216
-           2       0.63      0.14      0.23        86
-           3       0.17      0.02      0.04        46
-           4       0.42      0.10      0.16        83
-           5       0.00      0.00      0.00         0
-           6       0.68      0.66      0.67       245
-           7       0.56      0.12      0.20        42
-           8       0.55      0.33      0.41       127
-           9       0.67      0.17      0.27        12
-          10       0.65      0.31      0.42       127
-          11       1.00      0.14      0.25        14
-          12       0.53      0.29      0.38       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.66      0.47      0.55      1404
-   macro avg       0.52      0.26      0.31      1404
-weighted avg       0.62      0.47      0.51      1404
- samples avg       0.67      0.53      0.55      1404
diff --git a/games_march2025_cleaned_2k/GradientBoostingClassifier.txt b/games_march2025_cleaned_2k/GradientBoostingClassifier.txt
deleted file mode 100644
index 7c8ce6e..0000000
--- a/games_march2025_cleaned_2k/GradientBoostingClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.80      0.83       300
-           1       0.77      0.61      0.68       216
-           2       0.55      0.13      0.21        86
-           3       0.42      0.11      0.17        46
-           4       0.68      0.33      0.44        83
-           5       0.00      0.00      0.00         0
-           6       0.71      0.76      0.74       245
-           7       0.61      0.26      0.37        42
-           8       0.81      0.50      0.61       127
-           9       0.75      0.25      0.38        12
-          10       0.81      0.54      0.65       127
-          11       0.40      0.43      0.41        14
-          12       0.69      0.42      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.76      0.57      0.65      1404
-   macro avg       0.57      0.37      0.43      1404
-weighted avg       0.74      0.57      0.63      1404
- samples avg       0.76      0.63      0.65      1404
diff --git a/games_march2025_cleaned_2k/LinearSVC-i5000.txt b/games_march2025_cleaned_2k/LinearSVC-i5000.txt
deleted file mode 100644
index df82b40..0000000
--- a/games_march2025_cleaned_2k/LinearSVC-i5000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.85      0.87      0.86       300
-           1       0.76      0.66      0.70       216
-           2       0.77      0.20      0.31        86
-           3       0.00      0.00      0.00        46
-           4       0.76      0.27      0.39        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.81      0.79       245
-           7       0.89      0.19      0.31        42
-           8       0.77      0.60      0.67       127
-           9       1.00      0.58      0.74        12
-          10       0.85      0.54      0.66       127
-          11       1.00      0.29      0.44        14
-          12       0.82      0.42      0.56       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.61      0.69      1404
-   macro avg       0.66      0.39      0.46      1404
-weighted avg       0.78      0.61      0.66      1404
- samples avg       0.81      0.67      0.69      1404
diff --git a/games_march2025_cleaned_2k/LogisticRegression-i1000.txt b/games_march2025_cleaned_2k/LogisticRegression-i1000.txt
deleted file mode 100644
index b7926d4..0000000
--- a/games_march2025_cleaned_2k/LogisticRegression-i1000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
diff --git a/games_march2025_cleaned_2k/LogisticRegression-i10000.txt b/games_march2025_cleaned_2k/LogisticRegression-i10000.txt
deleted file mode 100644
index b7926d4..0000000
--- a/games_march2025_cleaned_2k/LogisticRegression-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.78      0.91      0.84       300
-           1       0.78      0.62      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.04      0.07        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.81      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.90      0.34      0.49       127
-           9       0.00      0.00      0.00        12
-          10       0.89      0.25      0.39       127
-          11       0.00      0.00      0.00        14
-          12       0.88      0.14      0.24       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.50      0.61      1404
-   macro avg       0.50      0.22      0.26      1404
-weighted avg       0.77      0.50      0.53      1404
- samples avg       0.77      0.56      0.60      1404
diff --git a/games_march2025_cleaned_2k/MLPClassifier-i10000.txt b/games_march2025_cleaned_2k/MLPClassifier-i10000.txt
deleted file mode 100644
index c4634dc..0000000
--- a/games_march2025_cleaned_2k/MLPClassifier-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.84      0.85      0.84       300
-           1       0.73      0.67      0.70       216
-           2       0.74      0.30      0.43        86
-           3       0.50      0.02      0.04        46
-           4       0.69      0.24      0.36        83
-           5       0.00      0.00      0.00         0
-           6       0.79      0.79      0.79       245
-           7       0.86      0.14      0.24        42
-           8       0.76      0.63      0.69       127
-           9       1.00      0.33      0.50        12
-          10       0.81      0.52      0.63       127
-          11       1.00      0.14      0.25        14
-          12       0.75      0.41      0.53       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.60      0.68      1404
-   macro avg       0.68      0.36      0.43      1404
-weighted avg       0.78      0.60      0.65      1404
- samples avg       0.80      0.66      0.68      1404
diff --git a/games_march2025_cleaned_2k/MultinomialNB.txt b/games_march2025_cleaned_2k/MultinomialNB.txt
deleted file mode 100644
index bc74cf3..0000000
--- a/games_march2025_cleaned_2k/MultinomialNB.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.64      0.99      0.78       300
-           1       0.85      0.24      0.37       216
-           2       0.60      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       0.80      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.78      0.80      0.79       245
-           7       0.40      0.05      0.09        42
-           8       1.00      0.04      0.08       127
-           9       0.00      0.00      0.00        12
-          10       0.20      0.01      0.02       127
-          11       0.00      0.00      0.00        14
-          12       1.00      0.05      0.09       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.69      0.40      0.51      1404
-   macro avg       0.45      0.16      0.17      1404
-weighted avg       0.68      0.40      0.39      1404
- samples avg       0.70      0.44      0.50      1404
diff --git a/games_march2025_cleaned_2k/RandomForestClassifier.txt b/games_march2025_cleaned_2k/RandomForestClassifier.txt
deleted file mode 100644
index 6fbe546..0000000
--- a/games_march2025_cleaned_2k/RandomForestClassifier.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.80      0.88      0.84       300
-           1       0.78      0.55      0.64       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.06      0.11        83
-           5       0.00      0.00      0.00         0
-           6       0.74      0.78      0.76       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.24      0.38       127
-           9       0.00      0.00      0.00        12
-          10       0.91      0.24      0.38       127
-          11       1.00      0.14      0.25        14
-          12       1.00      0.25      0.39       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.79      0.48      0.59      1404
-   macro avg       0.58      0.23      0.27      1404
-weighted avg       0.78      0.48      0.52      1404
- samples avg       0.77      0.54      0.60      1404
diff --git a/games_march2025_cleaned_2k/SVC-RBF-i10000.txt b/games_march2025_cleaned_2k/SVC-RBF-i10000.txt
deleted file mode 100644
index ff0c7b7..0000000
--- a/games_march2025_cleaned_2k/SVC-RBF-i10000.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-              precision    recall  f1-score   support
-
-           0       0.81      0.90      0.85       300
-           1       0.76      0.63      0.69       216
-           2       1.00      0.03      0.07        86
-           3       0.00      0.00      0.00        46
-           4       1.00      0.05      0.09        83
-           5       0.00      0.00      0.00         0
-           6       0.77      0.83      0.80       245
-           7       0.00      0.00      0.00        42
-           8       0.84      0.40      0.54       127
-           9       1.00      0.17      0.29        12
-          10       0.90      0.34      0.49       127
-          11       1.00      0.14      0.25        14
-          12       0.92      0.21      0.34       106
-          13       0.00      0.00      0.00         0
-
-   micro avg       0.80      0.53      0.63      1404
-   macro avg       0.64      0.26      0.32      1404
-weighted avg       0.79      0.53      0.56      1404
- samples avg       0.79      0.59      0.63      1404
diff --git a/notebook.ipynb b/notebook.ipynb
deleted file mode 100644
index 3307ceb..0000000
--- a/notebook.ipynb
+++ /dev/null
@@ -1,530 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "a3a7634f",
-   "metadata": {},
-   "source": [
-    "# Machine Learning project in SoSe 2025 at HTW Saar\n",
-    "## Idea\n",
-    "The goal of this project is predicting the genre(s) of a game/bundle through its given description(s)\n",
-    "\n",
-    "## Dataset\n",
-    "For our project we use a Steam Dataset provided on moodle, since it has all information we plan on using.\n",
-    "The Dataset has been cut to only 2000 data points to be runnable on weaker devices."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3116b75f",
-   "metadata": {
-    "jupyter": {
-     "is_executing": true
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   appid              name release_date  required_age  price  dlc_count  \\\n",
-      "0    730  Counter-Strike 2   2012-08-21             0    0.0          1   \n",
-      "\n",
-      "                                detailed_description  \\\n",
-      "0  For over two decades, Counter-Strike has offer...   \n",
-      "\n",
-      "                                      about_the_game  \\\n",
-      "0  For over two decades, Counter-Strike has offer...   \n",
-      "\n",
-      "                                   short_description reviews  ...  \\\n",
-      "0  For over two decades, Counter-Strike has offer...     NaN  ...   \n",
-      "\n",
-      "  average_playtime_2weeks median_playtime_forever median_playtime_2weeks  \\\n",
-      "0                     879                    5174                    350   \n",
-      "\n",
-      "  discount  peak_ccu                                               tags  \\\n",
-      "0        0   1212356  {'FPS': 90857, 'Shooter': 65397, 'Multiplayer'...   \n",
-      "\n",
-      "   pct_pos_total  num_reviews_total pct_pos_recent  num_reviews_recent  \n",
-      "0             86            8632939             82               96473  \n",
-      "\n",
-      "[1 rows x 47 columns]\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from sklearn import set_config\n",
-    "\n",
-    "set_config(transform_output=\"pandas\")\n",
-    "\n",
-    "dataset = pd.read_csv(\"./games_march2025_cleaned_2k.csv\",sep=\",\")\n",
-    "print(dataset.head(1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cba9750a",
-   "metadata": {},
-   "source": [
-    "## Preparation of the Dataset\n",
-    "### Removing Uniques\n",
-    "We would remove the following features from the Training-Set as they can/could uniquely identify a datapoint, but we don't as they will be removed in the next step anyway\n",
-    "- AppId\n",
-    "- Name of the Game\n",
-    "- Realease Date\n",
-    "- Reviews\n",
-    "- Header Image\n",
-    "- Website\n",
-    "- Support URL\n",
-    "- Support Email\n",
-    "- MetaCritic URL\n",
-    "- Developer\n",
-    "- Publisher\n",
-    "- Screenshots\n",
-    "- Movies\n",
-    "- Estimated Owners"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d159117377f3633c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#dataset.drop(['appid', 'name', 'release_date', 'reviews', 'header_image', 'website', 'support_url', 'support_email', 'metacritic_url', 'notes', 'developers', 'publishers', 'screenshots', 'movies', 'estimated_owners'], axis=1, inplace=True)\n",
-    "#print(dataset.head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e1b28ddd69f1e9a6",
-   "metadata": {},
-   "source": [
-    "## Hold onto necessary information\n",
-    "Our model should turn a textual description of a game into its genre. For that we need all the textual information a game has, as well as the genres of the game.\n",
-    "We use a ColumnTransformer to drop all unnecessary lines, merge all descriptions of a game into one big description and hold onto the genres\n",
-    "\n",
-    "It is important to use ``verbose_feature_names_out=False`` so the feature names don't get changed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "986fbb31a7ae0d8b",
-   "metadata": {
-    "jupyter": {
-     "is_executing": true
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                                                desc  \\\n",
-      "0  For over two decades, Counter-Strike has offer...   \n",
-      "1  LAND, LOOT, SURVIVE! Play PUBG: BATTLEGROUNDS ...   \n",
-      "2  The most-played game on Steam. Every day, mill...   \n",
-      "3  When a young street hustler, a retired bank ro...   \n",
-      "4  Edition Comparison Ultimate Edition The Tom Cl...   \n",
-      "\n",
-      "                                              genres  \n",
-      "0                         ['Action', 'Free To Play']  \n",
-      "1  ['Action', 'Adventure', 'Massively Multiplayer...  \n",
-      "2             ['Action', 'Strategy', 'Free To Play']  \n",
-      "3                            ['Action', 'Adventure']  \n",
-      "4                                         ['Action']  \n"
-     ]
-    }
-   ],
-   "source": [
-    "from sklearn.compose import ColumnTransformer\n",
-    "from sklearn.preprocessing import FunctionTransformer\n",
-    "\n",
-    "# desc, genres\n",
-    "column_transformer = ColumnTransformer([\n",
-    "        # merge all descriptions\n",
-    "        ('desc', FunctionTransformer(lambda X: X.fillna('').agg(' '.join, axis=1).to_frame(name=\"desc\")),\n",
-    "            ['detailed_description', 'about_the_game', 'short_description']),\n",
-    "        ('pass', 'passthrough', ['genres']),\n",
-    "    ],\n",
-    "    verbose_feature_names_out=False\n",
-    ")\n",
-    "dataset = column_transformer.fit_transform(dataset)\n",
-    "print(dataset.head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9b89c0645811564",
-   "metadata": {},
-   "source": [
-    "### Adding missing Information\n",
-    "Some Games might not have any descriptions. For these we Input an Empty String\n",
-    "**TODO: check if dropna and fillna numeric_only is needed, as we dont have any numbers**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "44239f6b7fd23cde",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# missing numeric values => mean\n",
-    "dataset.fillna(dataset.mean(numeric_only=True), inplace=True)\n",
-    "# missing strings => empty string?\n",
-    "dataset.fillna('', inplace=True)\n",
-    "# drop all lines with missing values\n",
-    "dataset.dropna(inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ca5b59b9fa8160a0",
-   "metadata": {},
-   "source": [
-    "## Transform Genres\n",
-    "The genre information currently is a string holding a python array of genres. While this is machine-readable, we need One-Hot-Encoding for our model to work.\n",
-    "\n",
-    "#### Serializing the String-Array\n",
-    "The \"ast\" library can interpret python strings as python code, and as such will be used for serializing the genres."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ebc5a24e9bc87fdd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0                               [Action, Free To Play]\n",
-      "1    [Action, Adventure, Massively Multiplayer, Fre...\n",
-      "2                     [Action, Strategy, Free To Play]\n",
-      "3                                  [Action, Adventure]\n",
-      "4                                             [Action]\n",
-      "Name: genres, dtype: object\n"
-     ]
-    }
-   ],
-   "source": [
-    "import ast\n",
-    "\n",
-    "dataset['genres'] = dataset['genres'].map(lambda s: ast.literal_eval(s))\n",
-    "print(dataset['genres'].head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f90756f9ad9211f4",
-   "metadata": {},
-   "source": [
-    "#### One-Hot-Encoding an Python-Array\n",
-    "The sklearn ``OneHotEncoder()`` is only able to work with an 1D Array of different classes, such as ``['Politics', 'Sport', 'Culture']``. Every datapoint can only have one concurrent classification.\n",
-    "Steam allows an app/bundle to have multiple genres. As such, our dataset has an 2D Array of different classes, which sklearn's ``MultiLabelBinarizer()`` does support."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2c3527a5fc876bf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   Action  Adventure  Casual  Early Access  Free To Play  Gore  Indie  \\\n",
-      "0       1          0       0             0             1     0      0   \n",
-      "1       1          1       0             0             1     0      0   \n",
-      "2       1          0       0             0             1     0      0   \n",
-      "3       1          1       0             0             0     0      0   \n",
-      "4       1          0       0             0             0     0      0   \n",
-      "\n",
-      "   Massively Multiplayer  RPG  Racing  Simulation  Sports  Strategy  Violent  \n",
-      "0                      0    0       0           0       0         0        0  \n",
-      "1                      1    0       0           0       0         0        0  \n",
-      "2                      0    0       0           0       0         1        0  \n",
-      "3                      0    0       0           0       0         0        0  \n",
-      "4                      0    0       0           0       0         0        0  \n"
-     ]
-    }
-   ],
-   "source": [
-    "from sklearn.preprocessing import MultiLabelBinarizer\n",
-    "\n",
-    "mlb_genres = MultiLabelBinarizer()\n",
-    "genres_encoded = mlb_genres.fit_transform(dataset.pop('genres'))\n",
-    "genres_df = pd.DataFrame(genres_encoded, columns=mlb_genres.classes_)\n",
-    "print(genres_df.head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "671c01f9f4ae66d9",
-   "metadata": {},
-   "source": [
-    "With this, our target matrix is completed."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f5436c87",
-   "metadata": {},
-   "source": [
-    "### Structurizing Text\n",
-    "If we want our Model to be able to use text as an input, we have to vectorize the text. TF-IDF (Inverse Document Frequency) is an easy way of transforming each word into a feature with a 0 to 1 value. **TODO: filter out stopwords**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4e8b407c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    00  000  000km    000th  00am  00f  00i  00p  00v   01  ...  이터널  이터널리턴  \\\n",
-      "0  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "1  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "2  0.0  0.0    0.0  0.14649   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "3  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "4  0.0  0.0    0.0  0.00000   0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0    0.0   \n",
-      "\n",
-      "   이현준  정대찬  중입니다   철권  토탈워  페르소나  한국어  한글을  \n",
-      "0  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "1  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "2  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "3  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "4  0.0  0.0   0.0  0.0  0.0   0.0  0.0  0.0  \n",
-      "\n",
-      "[5 rows x 29351 columns]\n"
-     ]
-    }
-   ],
-   "source": [
-    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
-    "\n",
-    "vectorizer = TfidfVectorizer()\n",
-    "tfidf_matrix = vectorizer.fit_transform(dataset['desc']) # matrix, not pandas df\n",
-    "tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())\n",
-    "print(tfidf_df.head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ad84e777",
-   "metadata": {},
-   "source": [
-    "With this our feature matrix is completed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "86d9da42f4df8e49",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X = tfidf_df\n",
-    "y = genres_df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aeb782668f311cd8",
-   "metadata": {},
-   "source": [
-    "## The Model\n",
-    "\n",
-    "####  Removing unpredicatble Datapoints\n",
-    "Some Datapoints don't have a genre assigned (all feature values in y are 0). The model we use can't handle such cases, thus they have to be removed.\n",
-    "We filter after all values that we can use with a mask, and apply that mask to our matrices."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4919bf1b37d171a7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "13\n"
-     ]
-    }
-   ],
-   "source": [
-    "mask = y.sum(axis=1).map(lambda x: x > 0)\n",
-    "print((mask == False).sum()) # count of unpredictable datapoints\n",
-    "\n",
-    "X_clean = X[mask]\n",
-    "y_clean = y[mask]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "091d7e13",
-   "metadata": {},
-   "source": [
-    "# Splitting up data\n",
-    "We have to split up our data into training and testing data.\n",
-    "Using random_state=0 guarantees reproducability."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cfbf3787",
-   "metadata": {
-    "jupyter": {
-     "is_executing": true
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, random_state=0)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "12b5283d",
-   "metadata": {},
-   "source": [
-    "# Model Selection\n",
-    "**TODO Deciding which model to use for this task**\n",
-    "\n",
-    "As a game can have multiple genres, our Model(s) has to be capable of multi-label-classification. sklearn's ``MultiOutputClassifier`` can do this. As a backend for ``MultiOutputClassifier`` we use ``LogisticRegression``"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8c1d72c4532bd509",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.multioutput import MultiOutputClassifier\n",
-    "\n",
-    "# n_jobs=1 since there seems to be some multithreading join issue in sklearn (or my pc is to bad)\n",
-    "multi_target_clf = MultiOutputClassifier(LogisticRegression(max_iter=1337, random_state=0), n_jobs=1)\n",
-    "\n",
-    "multi_target_clf.fit(X_train, y_train)\n",
-    "\n",
-    "y_pred = multi_target_clf.predict(X_test)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0faa9856",
-   "metadata": {},
-   "source": [
-    "# Evaluation\n",
-    "**TODO Test the Model with the test data**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2ebea6945193e07",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.78      0.91      0.84       300\n",
-      "           1       0.78      0.62      0.69       216\n",
-      "           2       1.00      0.03      0.07        86\n",
-      "           3       0.00      0.00      0.00        46\n",
-      "           4       1.00      0.04      0.07        83\n",
-      "           5       0.00      0.00      0.00         0\n",
-      "           6       0.79      0.81      0.80       245\n",
-      "           7       0.00      0.00      0.00        42\n",
-      "           8       0.90      0.34      0.49       127\n",
-      "           9       0.00      0.00      0.00        12\n",
-      "          10       0.89      0.25      0.39       127\n",
-      "          11       0.00      0.00      0.00        14\n",
-      "          12       0.88      0.14      0.24       106\n",
-      "          13       0.00      0.00      0.00         0\n",
-      "\n",
-      "   micro avg       0.79      0.50      0.61      1404\n",
-      "   macro avg       0.50      0.22      0.26      1404\n",
-      "weighted avg       0.77      0.50      0.53      1404\n",
-      " samples avg       0.77      0.56      0.60      1404\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from sklearn.metrics import classification_report\n",
-    "\n",
-    "print(classification_report(y_test, y_pred, zero_division=0.0))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2aeb6fc2",
-   "metadata": {},
-   "source": [
-    "# Optimization\n",
-    "**TODO optimize the model based on the test results**"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "79b20645",
-   "metadata": {},
-   "source": [
-    "# Validation\n",
-    "**TODO Predict actual values**"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3b709fb7",
-   "metadata": {},
-   "source": [
-    "# Conclusion and outlook\n",
-    "**TODO Write a conclusion and outlook what can be done and where the issues were.**"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/plot_maker.py b/plot_maker.py
new file mode 100644
index 0000000..331ae94
--- /dev/null
+++ b/plot_maker.py
@@ -0,0 +1,38 @@
+import os
+import matplotlib.pyplot as plt
+
+datasets = {
+    "cleaned": "games_march2025_cleaned",
+    "cleaned_2k": "games_march2025_cleaned_2k",
+    "cleaned_10k": "games_march2025_cleaned_10k"
+}
+# def results
+results = {}
+
+for dataset_name, folder in datasets.items():
+    results[dataset_name] = {}
+    for filename in os.listdir(folder):
+        if filename.endswith(".txt"):
+            model_name = filename.replace(".txt", "")
+            with open(os.path.join(folder, filename), "r") as f:
+                for line in f:
+                    if line.strip().startswith("weighted avg"):
+                        parts = line.split()
+                        f1_score = float(parts[3])  # precision recall f1-score support
+                        results[dataset_name][model_name] = f1_score
+
+# Plot
+models = sorted(results["cleaned"].keys())  # alphabetisch sortieren für gleiche Reihenfolge
+x = range(len(models))
+
+plt.figure(figsize=(12,6))
+plt.bar([i - 0.25 for i in x], [results["cleaned"][m] for m in models], width=0.25, label="cleaned")
+plt.bar(x, [results["cleaned_2k"][m] for m in models], width=0.25, label="cleaned_2k")
+plt.bar([i + 0.25 for i in x], [results["cleaned_10k"][m] for m in models], width=0.25, label="cleaned_10k")
+
+plt.xticks(x, models, rotation=45)
+plt.ylabel("Weighted F1-Score")
+plt.title("Model Performance across Datasets")
+plt.legend()
+plt.tight_layout()
+plt.show()
diff --git a/test_script.py b/test_script.py
deleted file mode 100644
index de7e833..0000000
--- a/test_script.py
+++ /dev/null
@@ -1,133 +0,0 @@
-
-
-#### INITIALIZE
-
-import numpy as np
-import pandas as pd
-from sklearn import set_config
-set_config(transform_output="pandas") # dataframe supremacy
-
-# load data
-# appid,name,release_date,required_age,price,dlc_count,detailed_description,about_the_game,short_description,reviews,header_image,website,support_url,support_email,windows,mac,linux,metacritic_score,metacritic_url,achievements,recommendations,notes,supported_languages,full_audio_languages,packages,developers,publishers,categories,genres,screenshots,movies,user_score,score_rank,positive,negative,estimated_owners,average_playtime_forever,average_playtime_2weeks,median_playtime_forever,median_playtime_2weeks,discount,peak_ccu,tags,pct_pos_total,num_reviews_total,pct_pos_recent,num_reviews_recent
-dataset = pd.read_csv("./games_march2025_cleaned_2k.csv",sep=",")
-print(dataset.head())
-
-
-
-
-#### DROP UNIQUES
-print("DROP")
-
-#TODO: wird eh unten beim transformer deleted
-
-# appid,name,release_date,required_age,price,dlc_count,detailed_description,about_the_game,short_description,reviews,header_image,website,support_url,support_email,windows,mac,linux,metacritic_score,metacritic_url,achievements,recommendations,notes,supported_languages,full_audio_languages,packages,developers,publishers,categories,genres,screenshots,movies,user_score,score_rank,positive,negative,estimated_owners,average_playtime_forever,average_playtime_2weeks,median_playtime_forever,median_playtime_2weeks,discount,peak_ccu,tags,pct_pos_total,num_reviews_total,pct_pos_recent,num_reviews_recent
-#dataset.drop(['appid', 'name', 'release_date', 'reviews', 'header_image', 'website', 'support_url', 'support_email',
-#              'metacritic_url', 'notes', 'developers', 'publishers', 'screenshots', 'movies', 'estimated_owners'],
-#              axis=1, inplace=True)
-#print(dataset.head())
-
-#### STRUCTURIZE AND STANDARDIZE
-print("STRUCTURE")
-
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import FunctionTransformer
-
-
-# desc, genres, tags
-column_transformer = ColumnTransformer([
-        # merge all descriptions
-        ('desc', FunctionTransformer(lambda X: X.fillna('').agg(' '.join, axis=1).to_frame(name="desc")),
-            ['detailed_description', 'about_the_game', 'short_description']),
-        # genre -> actual genre, but very coarse
-        # tags -> user defined tags; title num list
-        #TODO: decide whether we drop tags
-        ('pass', 'passthrough', ['genres']),#, 'tags'
-    ],
-    verbose_feature_names_out=False
-)
-dataset = column_transformer.fit_transform(dataset)
-print(dataset)
-
-
-
-#### SET MISSING VALUES
-print("SETMISS")
-
-
-# Setting missing numeric values to the mean
-dataset.fillna(dataset.mean(numeric_only=True), inplace=True)
-# Setting missing text values to 'Unknown'
-dataset.fillna('', inplace=True)
-# Setting missing values in other columns to NaN
-dataset.dropna(inplace=True)
-
-
-
-
-##### STRUCTURIZE GENRES to onehot
-from sklearn.preprocessing import MultiLabelBinarizer
-import ast
-#serialize array
-dataset['genres'] = dataset['genres'].map(lambda s: ast.literal_eval(s)) 
-print(dataset['genres']) # in py but not yet onehotenc
-
-# MultiLabelBinarizer does onehotenc for arrays
-mlb_genres = MultiLabelBinarizer()
-genres_encoded = mlb_genres.fit_transform(dataset.pop('genres'))
-genres_count = len(mlb_genres.classes_) # for multi-label classifiction later
-
-genres_df = pd.DataFrame(genres_encoded, columns=mlb_genres.classes_)
-print(genres_df)
-#dataset = pd.concat([dataset, genres_df], axis=1)
-#print(dataset)
-
-
-#### convert text to bag of words
-
-## Count vs Tfidf vectorizer
-from sklearn.feature_extraction.text import TfidfVectorizer
-vectorizer = TfidfVectorizer()
-tfidf_matrix = vectorizer.fit_transform(dataset['desc']) # matrix
-tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
-print(tfidf_df)
-
-
-##### MODEL
-print("MODEL")
-
-from sklearn.linear_model import LogisticRegression
-from sklearn.multioutput import MultiOutputClassifier
-from sklearn.metrics import classification_report
-
-
-X = tfidf_df
-y = genres_df
-
-
-# cleanup datapoints that dont have a target value (all target columns are 0)
-mask = y.sum(axis=1).map(lambda x: x > 0)
-#print((mask == False).sum()) #31 cases with all target columns 0
-X_clean = X[mask]
-y_clean = y[mask]
-
-# Split dataset
-from sklearn.model_selection import train_test_split
-X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, random_state=0)
-
-
-# we want to have multiple possible outputs (multi-label-classficiation) -> multioutputclassifier
-# logi regression is our base system
-# n_jobs=1 since there seems to be some multithreading join issue in sklearn (or my pc is too bad)
-multi_target_clf = MultiOutputClassifier(LogisticRegression(max_iter=1337, random_state=0), n_jobs=1)
-
-# model training
-multi_target_clf.fit(X_train, y_train)
-
-# predict against test data
-y_pred = multi_target_clf.predict(X_test)
-
-# print prec, recall, f1 etc
-print(classification_report(y_test, y_pred, zero_division=0.0))
-
-
-#print(f"Trainingsdaten: {X_train.shape}, Testdaten: {X_test.shape}")