changes
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 62 KiB |
@@ -22,17 +22,20 @@ for dataset_name, folder in datasets.items():
|
||||
results[dataset_name][model_name] = f1_score
|
||||
|
||||
# Plot
|
||||
models = sorted(results["cleaned"].keys()) # alphabetisch sortieren für gleiche Reihenfolge
|
||||
#models = sorted(results["cleaned_2k"].keys()) # alphabetisch sortieren für gleiche Reihenfolge
|
||||
models = dict(sorted(results["cleaned_2k"].items(), key=lambda i: i[1], reverse=True)) # nach values sortieren
|
||||
x = range(len(models))
|
||||
|
||||
plt.figure(figsize=(12,6))
|
||||
plt.bar([i - 0.25 for i in x], [results["cleaned"][m] for m in models], width=0.25, label="cleaned")
|
||||
plt.bar(x, [results["cleaned_2k"][m] for m in models], width=0.25, label="cleaned_2k")
|
||||
plt.bar([i + 0.25 for i in x], [results["cleaned_10k"][m] for m in models], width=0.25, label="cleaned_10k")
|
||||
#plt.bar([i - 0.25 for i in x], [results["cleaned"][m] for m in models], width=0.25, label="cleaned")
|
||||
plt.bar(x, [results["cleaned_2k"][m] for m in models], width=0.5)#, label="cleaned_2k")
|
||||
#plt.bar([i + 0.25 for i in x], [results["cleaned_10k"][m] for m in models], width=0.25, label="cleaned_10k")
|
||||
|
||||
plt.xticks(x, models, rotation=45)
|
||||
plt.ylabel("F1-Score")
|
||||
plt.xticks(x, models, rotation=90)
|
||||
plt.ylim(0, 1) # min max
|
||||
plt.ylabel("Weighted F1-Score")
|
||||
plt.title("Model Performance across Datasets")
|
||||
plt.legend()
|
||||
#plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig('compare_graph_latest.png')
|
||||
plt.show()
|
||||
59
compare_graph_maker_3.py
Normal file
59
compare_graph_maker_3.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
datasets = {
|
||||
#"cleaned": "games_march2025_cleaned",
|
||||
#"cleaned_2k": "games_march2025_cleaned_2k",
|
||||
#"cleaned_10k": "games_march2025_cleaned_10k"
|
||||
"cleaned_2k": "games_march2025_cleaned_2k_i3k",
|
||||
}
|
||||
# def results
|
||||
results = {}
|
||||
|
||||
for dataset_name, folder in datasets.items():
|
||||
results[dataset_name] = {}
|
||||
for filename in os.listdir(folder):
|
||||
if filename.endswith(".txt"):
|
||||
model_name = filename.replace(".txt", "")
|
||||
print("model " + model_name)
|
||||
results[dataset_name][model_name] = {}
|
||||
with open(os.path.join(folder, filename), "r") as f:
|
||||
for line in f:
|
||||
if line.strip().startswith("micro avg"):
|
||||
print("micro")
|
||||
results[dataset_name][model_name][0] = float(line.split()[4]) # micro f1
|
||||
if line.strip().startswith("macro avg"):
|
||||
print("macro")
|
||||
results[dataset_name][model_name][1] = float(line.split()[4]) # macro f1
|
||||
if line.strip().startswith("weighted avg"):
|
||||
print("weight")
|
||||
results[dataset_name][model_name][2] = float(line.split()[4]) # weighted avg f1
|
||||
|
||||
# Plot
|
||||
#models = sorted(results["cleaned_2k"].keys()) # alphabetisch sortieren für gleiche Reihenfolge
|
||||
models = dict(sorted(results["cleaned_2k"].items(), key=lambda i: i[1][2], reverse=True)) # nach values sortieren
|
||||
print(models)
|
||||
x = range(len(models))
|
||||
|
||||
fig = plt.figure()
|
||||
#ax = fig.add_subplot(projection='3d')
|
||||
|
||||
plt.bar([i - 0.25 for i in x], [results["cleaned_2k"][m][0] for m in models], width=0.25, label="Micro")
|
||||
plt.bar(x, [results["cleaned_2k"][m][1] for m in models], width=0.25, label="Macro")
|
||||
plt.bar([i + 0.25 for i in x], [results["cleaned_2k"][m][2] for m in models], width=0.25, label="Weighted")
|
||||
|
||||
plt.xticks(x, models, rotation=90)
|
||||
plt.ylabel("F1 Score")
|
||||
#ax.set_zlabel("F1 Value")
|
||||
plt.ylim(0,1)
|
||||
plt.title("Model Performance - 2k Dataset")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig('compare_graph_latest_3.png')
|
||||
plt.show()
|
||||
|
||||
# On the y-axis let's only label the discrete values that we have data for.
|
||||
#ax.set_yticks(yticks)
|
||||
|
||||
plt.show()
|
||||
126
compare_models_10k.py
Normal file
126
compare_models_10k.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn import set_config
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.preprocessing import FunctionTransformer, MultiLabelBinarizer
|
||||
import ast
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.multioutput import MultiOutputClassifier
|
||||
from sklearn.metrics import classification_report
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.metrics import accuracy_score, classification_report
|
||||
from sklearn.svm import SVC, LinearSVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
||||
from sklearn.linear_model import LogisticRegression, RidgeClassifier, PassiveAggressiveClassifier, Perceptron, SGDClassifier
|
||||
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid, RadiusNeighborsClassifier
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, VotingClassifier, StackingClassifier
|
||||
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, ComplementNB
|
||||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
|
||||
set_config(transform_output="pandas") # dataframe supremacy
|
||||
|
||||
jobs = 12
|
||||
max_iter = 3000
|
||||
|
||||
def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
||||
dataset = pd.read_csv(dataset,sep=",")
|
||||
# desc, genres, tags
|
||||
column_transformer = ColumnTransformer([
|
||||
# merge all descriptions
|
||||
('desc', FunctionTransformer(lambda X: X.fillna('').agg(' '.join, axis=1).to_frame(name="desc")),
|
||||
['detailed_description', 'about_the_game', 'short_description']),
|
||||
('pass', 'passthrough', ['genres']),#, 'tags'
|
||||
],
|
||||
verbose_feature_names_out=False
|
||||
)
|
||||
dataset = column_transformer.fit_transform(dataset)
|
||||
#### SET MISSING VALUES
|
||||
print("SETMISS")
|
||||
# Setting missing numeric values to the mean
|
||||
dataset.fillna(dataset.mean(numeric_only=True), inplace=True)
|
||||
# Setting missing text values to 'Unknown'
|
||||
dataset.fillna('', inplace=True)
|
||||
# Setting missing values in other columns to NaN
|
||||
dataset.dropna(inplace=True)
|
||||
##### STRUCTURIZE GENRES to onehot
|
||||
#serialize array
|
||||
dataset['genres'] = dataset['genres'].map(lambda s: ast.literal_eval(s))
|
||||
#print(dataset['genres']) # in py but not yet onehotenc
|
||||
# MultiLabelBinarizer does onehotenc for arrays
|
||||
mlb_genres = MultiLabelBinarizer()
|
||||
genres_encoded = mlb_genres.fit_transform(dataset.pop('genres'))
|
||||
#genres_count = len(mlb_genres.classes_) # for multi-label classifiction later
|
||||
genres_df = pd.DataFrame(genres_encoded, columns=mlb_genres.classes_)
|
||||
#print(genres_df)
|
||||
#dataset = pd.concat([dataset, genres_df], axis=1)
|
||||
#print(dataset)
|
||||
#### convert text to bag of words
|
||||
## Count vs Tfidf vectorizer
|
||||
vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = vectorizer.fit_transform(dataset['desc']) # matrix
|
||||
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
|
||||
#print(tfidf_df)
|
||||
##### MODEL
|
||||
print("MODEL")
|
||||
X = tfidf_df
|
||||
y = genres_df
|
||||
# cleanup datapoints that dont have a target value (all target columns are 0)
|
||||
mask = y.sum(axis=1).map(lambda x: x > 0)
|
||||
#print((mask == False).sum()) #31 cases with all target columns 0
|
||||
X_clean = X[mask]
|
||||
y_clean = y[mask]
|
||||
# Split dataset
|
||||
return train_test_split(X_clean, y_clean, random_state=0)
|
||||
def comparison(X_train, X_test, y_train, y_test, estimator,): #returns class_report
|
||||
multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
|
||||
# model training
|
||||
multi_target_clf.fit(X_train, y_train)
|
||||
# predict against test data
|
||||
y_pred = multi_target_clf.predict(X_test)
|
||||
return classification_report(y_test, y_pred, zero_division=0.0)
|
||||
datasets = [
|
||||
#'games_march2025_cleaned_2k.csv',
|
||||
'games_march2025_cleaned_10k.csv',
|
||||
#'games_march2025_cleaned.csv'
|
||||
]
|
||||
estimators = {
|
||||
"RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
|
||||
"PassiveAggressiveClassifier": PassiveAggressiveClassifier(random_state=0, max_iter=max_iter),
|
||||
"Perceptron": Perceptron(random_state=0, max_iter=max_iter),
|
||||
"SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
|
||||
"NearestCentroid": NearestCentroid(),
|
||||
"LinearSVC": LinearSVC(random_state=0, max_iter=max_iter),
|
||||
"GradientBoostingClassifier": GradientBoostingClassifier(random_state=0),
|
||||
"HistGradientBoostingClassifier": HistGradientBoostingClassifier(random_state=0, max_iter=max_iter),
|
||||
"LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
|
||||
"MLPClassifier": MLPClassifier(random_state=0, max_iter=int(max_iter/20), early_stopping=True),
|
||||
}
|
||||
|
||||
#"VotingClassifier": VotingClassifier(estimators=[('lr', LogisticRegression()), ('rf', RandomForestClassifier())]),
|
||||
#"StackingClassifier": StackingClassifier(estimators=[('lr', LogisticRegression()), ('rf', RandomForestClassifier())]),
|
||||
for dataset in datasets:
|
||||
print("-" * 60)
|
||||
print("dataset -> " + dataset)
|
||||
print("mkdir")
|
||||
folder = dataset.split(".csv")[0]
|
||||
if not os.path.isdir(folder):
|
||||
os.mkdir(folder)
|
||||
X_train, X_test, y_train, y_test = prepDataset(dataset)
|
||||
for esti in estimators:
|
||||
print("model: " + esti)
|
||||
compari = comparison(X_train, X_test, y_train, y_test, estimators[esti])
|
||||
print("open")
|
||||
f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
|
||||
f.write(compari)
|
||||
print("write")
|
||||
f.close()
|
||||
print("close")
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 343 KiB After Width: | Height: | Size: 66 KiB |
@@ -27,6 +27,10 @@ from sklearn.dummy import DummyClassifier
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
|
||||
set_config(transform_output="pandas") # dataframe supremacy
|
||||
|
||||
jobs = 12
|
||||
max_iter = 3000
|
||||
|
||||
def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
||||
dataset = pd.read_csv(dataset,sep=",")
|
||||
# desc, genres, tags
|
||||
@@ -76,7 +80,7 @@ def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
||||
y_clean = y[mask]
|
||||
# Split dataset
|
||||
return train_test_split(X_clean, y_clean, random_state=0)
|
||||
def comparison(X_train, X_test, y_train, y_test, estimator, jobs: int = 1): #returns class_report
|
||||
def comparison(X_train, X_test, y_train, y_test, estimator,): #returns class_report
|
||||
multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
|
||||
# model training
|
||||
multi_target_clf.fit(X_train, y_train)
|
||||
@@ -88,9 +92,6 @@ datasets = [
|
||||
#'games_march2025_cleaned_10k.csv',
|
||||
#'games_march2025_cleaned.csv'
|
||||
]
|
||||
|
||||
max_iter = 3000 # <-- set your desired value here
|
||||
|
||||
estimators = {
|
||||
"LogisticRegression": LogisticRegression(random_state=0, max_iter=max_iter),
|
||||
"RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
|
||||
@@ -99,8 +100,8 @@ estimators = {
|
||||
"SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
|
||||
"KNeighborsClassifier": KNeighborsClassifier(),
|
||||
"NearestCentroid": NearestCentroid(),
|
||||
"RadiusNeighborsClassifier": RadiusNeighborsClassifier(),
|
||||
"LinearSVC-i5000": LinearSVC(random_state=0, max_iter=max_iter),
|
||||
# "RadiusNeighborsClassifier": RadiusNeighborsClassifier(), # failed bcs no neighbours in range :sob:
|
||||
"LinearSVC": LinearSVC(random_state=0, max_iter=max_iter),
|
||||
"SVC": SVC(random_state=0, max_iter=max_iter),
|
||||
"DecisionTreeClassifier": DecisionTreeClassifier(random_state=0),
|
||||
"RandomForestClassifier": RandomForestClassifier(random_state=0),
|
||||
@@ -114,8 +115,7 @@ estimators = {
|
||||
"BernoulliNB": BernoulliNB(),
|
||||
"ComplementNB": ComplementNB(),
|
||||
"LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
|
||||
"QuadraticDiscriminantAnalysis": QuadraticDiscriminantAnalysis(),
|
||||
"MLPClassifier-i10000": MLPClassifier(max_iter=10000, random_state=0),
|
||||
"MLPClassifier": MLPClassifier(random_state=0, max_iter=int(max_iter/5), verbose=True),
|
||||
"DummyClassifier": DummyClassifier(random_state=0)
|
||||
}
|
||||
|
||||
@@ -131,7 +131,7 @@ for dataset in datasets:
|
||||
X_train, X_test, y_train, y_test = prepDataset(dataset)
|
||||
for esti in estimators:
|
||||
print("model: " + esti)
|
||||
compari = comparison(X_train, X_test, y_train, y_test, estimators[esti], 1) #TODO: change the job count if you can
|
||||
compari = comparison(X_train, X_test, y_train, y_test, estimators[esti])
|
||||
print("open")
|
||||
f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
|
||||
f.write(compari)
|
||||
BIN
compare_models_2k_3.png
Normal file
BIN
compare_models_2k_3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 60 KiB |
21
games_march2025_cleaned_2k_i3k/AdaBoostClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/AdaBoostClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.87 0.76 0.81 300
|
||||
1 0.70 0.59 0.64 216
|
||||
2 0.58 0.13 0.21 86
|
||||
3 0.56 0.11 0.18 46
|
||||
4 0.71 0.30 0.42 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.69 0.70 0.69 245
|
||||
7 0.62 0.31 0.41 42
|
||||
8 0.76 0.41 0.53 127
|
||||
9 1.00 0.50 0.67 12
|
||||
10 0.67 0.50 0.57 127
|
||||
11 0.40 0.29 0.33 14
|
||||
12 0.74 0.45 0.56 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.74 0.54 0.62 1404
|
||||
macro avg 0.59 0.36 0.43 1404
|
||||
weighted avg 0.73 0.54 0.60 1404
|
||||
samples avg 0.74 0.59 0.61 1404
|
||||
21
games_march2025_cleaned_2k_i3k/BaggingClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/BaggingClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.86 0.70 0.77 300
|
||||
1 0.72 0.50 0.59 216
|
||||
2 0.47 0.09 0.16 86
|
||||
3 0.50 0.04 0.08 46
|
||||
4 0.58 0.23 0.33 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.71 0.64 0.67 245
|
||||
7 0.80 0.29 0.42 42
|
||||
8 0.79 0.46 0.58 127
|
||||
9 1.00 0.25 0.40 12
|
||||
10 0.71 0.43 0.53 127
|
||||
11 0.40 0.29 0.33 14
|
||||
12 0.68 0.42 0.52 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.74 0.49 0.59 1404
|
||||
macro avg 0.59 0.31 0.39 1404
|
||||
weighted avg 0.72 0.49 0.56 1404
|
||||
samples avg 0.70 0.54 0.57 1404
|
||||
21
games_march2025_cleaned_2k_i3k/BernoulliNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/BernoulliNB.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.75 0.90 0.82 300
|
||||
1 0.72 0.68 0.70 216
|
||||
2 0.50 0.08 0.14 86
|
||||
3 0.27 0.07 0.11 46
|
||||
4 0.40 0.07 0.12 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.77 0.82 0.79 245
|
||||
7 0.33 0.10 0.15 42
|
||||
8 0.67 0.40 0.50 127
|
||||
9 0.00 0.00 0.00 12
|
||||
10 0.71 0.37 0.49 127
|
||||
11 0.00 0.00 0.00 14
|
||||
12 0.49 0.31 0.38 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.70 0.55 0.62 1404
|
||||
macro avg 0.40 0.27 0.30 1404
|
||||
weighted avg 0.64 0.55 0.56 1404
|
||||
samples avg 0.73 0.59 0.61 1404
|
||||
21
games_march2025_cleaned_2k_i3k/ComplementNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/ComplementNB.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.67 0.98 0.80 300
|
||||
1 0.81 0.36 0.50 216
|
||||
2 0.67 0.05 0.09 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 0.80 0.05 0.09 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.77 0.81 0.79 245
|
||||
7 0.40 0.05 0.09 42
|
||||
8 0.83 0.04 0.08 127
|
||||
9 0.00 0.00 0.00 12
|
||||
10 0.43 0.02 0.04 127
|
||||
11 0.00 0.00 0.00 14
|
||||
12 1.00 0.05 0.09 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.70 0.42 0.53 1404
|
||||
macro avg 0.46 0.17 0.18 1404
|
||||
weighted avg 0.69 0.42 0.42 1404
|
||||
samples avg 0.71 0.46 0.52 1404
|
||||
21
games_march2025_cleaned_2k_i3k/DecisionTreeClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/DecisionTreeClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.76 0.73 0.75 300
|
||||
1 0.56 0.53 0.54 216
|
||||
2 0.36 0.33 0.34 86
|
||||
3 0.33 0.26 0.29 46
|
||||
4 0.40 0.46 0.43 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.65 0.61 0.63 245
|
||||
7 0.39 0.40 0.40 42
|
||||
8 0.59 0.57 0.58 127
|
||||
9 0.60 0.25 0.35 12
|
||||
10 0.56 0.51 0.53 127
|
||||
11 0.39 0.50 0.44 14
|
||||
12 0.52 0.49 0.50 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.58 0.55 0.57 1404
|
||||
macro avg 0.44 0.40 0.41 1404
|
||||
weighted avg 0.58 0.55 0.57 1404
|
||||
samples avg 0.59 0.59 0.55 1404
|
||||
21
games_march2025_cleaned_2k_i3k/DummyClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/DummyClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.60 1.00 0.75 300
|
||||
1 0.00 0.00 0.00 216
|
||||
2 0.00 0.00 0.00 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 0.00 0.00 0.00 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.00 0.00 0.00 245
|
||||
7 0.00 0.00 0.00 42
|
||||
8 0.00 0.00 0.00 127
|
||||
9 0.00 0.00 0.00 12
|
||||
10 0.00 0.00 0.00 127
|
||||
11 0.00 0.00 0.00 14
|
||||
12 0.00 0.00 0.00 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.60 0.21 0.32 1404
|
||||
macro avg 0.04 0.07 0.05 1404
|
||||
weighted avg 0.13 0.21 0.16 1404
|
||||
samples avg 0.60 0.26 0.34 1404
|
||||
21
games_march2025_cleaned_2k_i3k/ExtraTreesClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/ExtraTreesClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.81 0.91 0.86 300
|
||||
1 0.78 0.62 0.69 216
|
||||
2 1.00 0.03 0.07 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 1.00 0.04 0.07 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.78 0.73 0.75 245
|
||||
7 0.00 0.00 0.00 42
|
||||
8 0.84 0.24 0.38 127
|
||||
9 1.00 0.17 0.29 12
|
||||
10 0.90 0.21 0.34 127
|
||||
11 1.00 0.14 0.25 14
|
||||
12 0.83 0.18 0.29 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.80 0.48 0.60 1404
|
||||
macro avg 0.64 0.23 0.29 1404
|
||||
weighted avg 0.79 0.48 0.52 1404
|
||||
samples avg 0.78 0.54 0.60 1404
|
||||
21
games_march2025_cleaned_2k_i3k/GaussianNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/GaussianNB.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.76 0.80 0.78 300
|
||||
1 0.62 0.51 0.56 216
|
||||
2 0.63 0.14 0.23 86
|
||||
3 0.17 0.02 0.04 46
|
||||
4 0.42 0.10 0.16 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.68 0.66 0.67 245
|
||||
7 0.56 0.12 0.20 42
|
||||
8 0.55 0.33 0.41 127
|
||||
9 0.67 0.17 0.27 12
|
||||
10 0.65 0.31 0.42 127
|
||||
11 1.00 0.14 0.25 14
|
||||
12 0.53 0.29 0.38 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.66 0.47 0.55 1404
|
||||
macro avg 0.52 0.26 0.31 1404
|
||||
weighted avg 0.62 0.47 0.51 1404
|
||||
samples avg 0.67 0.53 0.55 1404
|
||||
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.87 0.80 0.83 300
|
||||
1 0.77 0.61 0.68 216
|
||||
2 0.55 0.13 0.21 86
|
||||
3 0.42 0.11 0.17 46
|
||||
4 0.68 0.33 0.44 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.71 0.76 0.74 245
|
||||
7 0.61 0.26 0.37 42
|
||||
8 0.81 0.50 0.61 127
|
||||
9 0.75 0.25 0.38 12
|
||||
10 0.81 0.54 0.65 127
|
||||
11 0.40 0.43 0.41 14
|
||||
12 0.69 0.42 0.53 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.76 0.57 0.65 1404
|
||||
macro avg 0.58 0.37 0.43 1404
|
||||
weighted avg 0.74 0.57 0.63 1404
|
||||
samples avg 0.77 0.63 0.65 1404
|
||||
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.83 0.83 0.83 300
|
||||
1 0.74 0.69 0.72 216
|
||||
2 0.80 0.28 0.41 86
|
||||
3 1.00 0.04 0.08 46
|
||||
4 0.70 0.39 0.50 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.72 0.76 0.74 245
|
||||
7 0.73 0.19 0.30 42
|
||||
8 0.85 0.59 0.70 127
|
||||
9 1.00 0.33 0.50 12
|
||||
10 0.78 0.54 0.64 127
|
||||
11 0.43 0.21 0.29 14
|
||||
12 0.77 0.52 0.62 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.78 0.61 0.68 1404
|
||||
macro avg 0.67 0.38 0.45 1404
|
||||
weighted avg 0.78 0.61 0.66 1404
|
||||
samples avg 0.79 0.67 0.69 1404
|
||||
21
games_march2025_cleaned_2k_i3k/KNeighborsClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/KNeighborsClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.82 0.62 0.70 300
|
||||
1 0.69 0.46 0.55 216
|
||||
2 0.62 0.06 0.11 86
|
||||
3 0.20 0.02 0.04 46
|
||||
4 0.72 0.16 0.26 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.78 0.55 0.64 245
|
||||
7 0.38 0.12 0.18 42
|
||||
8 0.59 0.65 0.62 127
|
||||
9 1.00 0.67 0.80 12
|
||||
10 0.68 0.44 0.54 127
|
||||
11 1.00 0.29 0.44 14
|
||||
12 0.34 0.76 0.48 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.64 0.48 0.55 1404
|
||||
macro avg 0.56 0.34 0.38 1404
|
||||
weighted avg 0.68 0.48 0.53 1404
|
||||
samples avg 0.64 0.54 0.55 1404
|
||||
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.63 0.68 0.66 300
|
||||
1 0.47 0.56 0.51 216
|
||||
2 0.27 0.59 0.37 86
|
||||
3 0.06 0.28 0.10 46
|
||||
4 0.21 0.52 0.30 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.63 0.67 0.65 245
|
||||
7 0.06 0.29 0.10 42
|
||||
8 0.28 0.52 0.36 127
|
||||
9 0.03 0.42 0.06 12
|
||||
10 0.29 0.52 0.38 127
|
||||
11 0.04 0.43 0.07 14
|
||||
12 0.53 0.44 0.48 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.30 0.57 0.39 1404
|
||||
macro avg 0.25 0.42 0.29 1404
|
||||
weighted avg 0.44 0.57 0.48 1404
|
||||
samples avg 0.42 0.62 0.40 1404
|
||||
21
games_march2025_cleaned_2k_i3k/LinearSVC.txt
Normal file
21
games_march2025_cleaned_2k_i3k/LinearSVC.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.85 0.87 0.86 300
|
||||
1 0.76 0.66 0.70 216
|
||||
2 0.77 0.20 0.31 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 0.76 0.27 0.39 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.78 0.81 0.79 245
|
||||
7 0.89 0.19 0.31 42
|
||||
8 0.77 0.60 0.67 127
|
||||
9 1.00 0.58 0.74 12
|
||||
10 0.85 0.54 0.66 127
|
||||
11 1.00 0.29 0.44 14
|
||||
12 0.82 0.42 0.56 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.80 0.61 0.69 1404
|
||||
macro avg 0.66 0.39 0.46 1404
|
||||
weighted avg 0.78 0.61 0.66 1404
|
||||
samples avg 0.81 0.67 0.69 1404
|
||||
21
games_march2025_cleaned_2k_i3k/LogisticRegression.txt
Normal file
21
games_march2025_cleaned_2k_i3k/LogisticRegression.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.78 0.91 0.84 300
|
||||
1 0.78 0.62 0.69 216
|
||||
2 1.00 0.03 0.07 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 1.00 0.04 0.07 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.79 0.81 0.80 245
|
||||
7 0.00 0.00 0.00 42
|
||||
8 0.90 0.34 0.49 127
|
||||
9 0.00 0.00 0.00 12
|
||||
10 0.89 0.25 0.39 127
|
||||
11 0.00 0.00 0.00 14
|
||||
12 0.88 0.14 0.24 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.79 0.50 0.61 1404
|
||||
macro avg 0.50 0.22 0.26 1404
|
||||
weighted avg 0.77 0.50 0.53 1404
|
||||
samples avg 0.77 0.56 0.60 1404
|
||||
21
games_march2025_cleaned_2k_i3k/MLPClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/MLPClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.84 0.85 0.84 300
|
||||
1 0.73 0.67 0.70 216
|
||||
2 0.74 0.30 0.43 86
|
||||
3 0.50 0.02 0.04 46
|
||||
4 0.69 0.24 0.36 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.79 0.79 0.79 245
|
||||
7 0.86 0.14 0.24 42
|
||||
8 0.76 0.63 0.69 127
|
||||
9 1.00 0.33 0.50 12
|
||||
10 0.81 0.52 0.63 127
|
||||
11 1.00 0.14 0.25 14
|
||||
12 0.75 0.41 0.53 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.79 0.60 0.68 1404
|
||||
macro avg 0.68 0.36 0.43 1404
|
||||
weighted avg 0.78 0.60 0.65 1404
|
||||
samples avg 0.80 0.66 0.68 1404
|
||||
21
games_march2025_cleaned_2k_i3k/MultinomialNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/MultinomialNB.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.64 0.99 0.78 300
|
||||
1 0.85 0.24 0.37 216
|
||||
2 0.60 0.03 0.07 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 0.80 0.05 0.09 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.78 0.80 0.79 245
|
||||
7 0.40 0.05 0.09 42
|
||||
8 1.00 0.04 0.08 127
|
||||
9 0.00 0.00 0.00 12
|
||||
10 0.20 0.01 0.02 127
|
||||
11 0.00 0.00 0.00 14
|
||||
12 1.00 0.05 0.09 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.69 0.40 0.51 1404
|
||||
macro avg 0.45 0.16 0.17 1404
|
||||
weighted avg 0.68 0.40 0.39 1404
|
||||
samples avg 0.70 0.44 0.50 1404
|
||||
21
games_march2025_cleaned_2k_i3k/NearestCentroid.txt
Normal file
21
games_march2025_cleaned_2k_i3k/NearestCentroid.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.83 0.75 0.79 300
|
||||
1 0.65 0.75 0.70 216
|
||||
2 0.43 0.72 0.54 86
|
||||
3 0.18 0.33 0.23 46
|
||||
4 0.46 0.61 0.53 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.74 0.76 0.75 245
|
||||
7 0.31 0.62 0.41 42
|
||||
8 0.47 0.69 0.55 127
|
||||
9 1.00 0.67 0.80 12
|
||||
10 0.59 0.69 0.64 127
|
||||
11 0.60 0.64 0.62 14
|
||||
12 0.42 0.66 0.52 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.57 0.70 0.63 1404
|
||||
macro avg 0.48 0.56 0.50 1404
|
||||
weighted avg 0.62 0.70 0.65 1404
|
||||
samples avg 0.63 0.74 0.64 1404
|
||||
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.84 0.86 0.85 300
|
||||
1 0.74 0.63 0.68 216
|
||||
2 0.77 0.31 0.45 86
|
||||
3 0.50 0.04 0.08 46
|
||||
4 0.69 0.33 0.44 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.79 0.80 0.79 245
|
||||
7 0.69 0.26 0.38 42
|
||||
8 0.74 0.62 0.68 127
|
||||
9 1.00 0.67 0.80 12
|
||||
10 0.80 0.57 0.67 127
|
||||
11 1.00 0.50 0.67 14
|
||||
12 0.79 0.46 0.58 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.79 0.62 0.69 1404
|
||||
macro avg 0.67 0.43 0.50 1404
|
||||
weighted avg 0.77 0.62 0.67 1404
|
||||
samples avg 0.80 0.68 0.70 1404
|
||||
21
games_march2025_cleaned_2k_i3k/Perceptron.txt
Normal file
21
games_march2025_cleaned_2k_i3k/Perceptron.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.78 0.94 0.85 300
|
||||
1 0.60 0.88 0.71 216
|
||||
2 0.54 0.60 0.57 86
|
||||
3 0.33 0.04 0.08 46
|
||||
4 0.68 0.16 0.25 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.74 0.86 0.80 245
|
||||
7 0.63 0.29 0.39 42
|
||||
8 0.62 0.80 0.69 127
|
||||
9 1.00 0.67 0.80 12
|
||||
10 0.89 0.43 0.58 127
|
||||
11 0.70 0.50 0.58 14
|
||||
12 0.88 0.27 0.42 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.70 0.68 0.69 1404
|
||||
macro avg 0.60 0.46 0.48 1404
|
||||
weighted avg 0.71 0.68 0.66 1404
|
||||
samples avg 0.72 0.74 0.69 1404
|
||||
21
games_march2025_cleaned_2k_i3k/RandomForestClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/RandomForestClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.80 0.88 0.84 300
|
||||
1 0.78 0.55 0.64 216
|
||||
2 1.00 0.03 0.07 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 1.00 0.06 0.11 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.74 0.78 0.76 245
|
||||
7 0.00 0.00 0.00 42
|
||||
8 0.84 0.24 0.38 127
|
||||
9 0.00 0.00 0.00 12
|
||||
10 0.91 0.24 0.38 127
|
||||
11 1.00 0.14 0.25 14
|
||||
12 1.00 0.25 0.39 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.79 0.48 0.59 1404
|
||||
macro avg 0.58 0.23 0.27 1404
|
||||
weighted avg 0.78 0.48 0.52 1404
|
||||
samples avg 0.77 0.54 0.60 1404
|
||||
21
games_march2025_cleaned_2k_i3k/RidgeClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/RidgeClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.84 0.88 0.86 300
|
||||
1 0.76 0.66 0.70 216
|
||||
2 0.80 0.14 0.24 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 0.85 0.20 0.33 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.78 0.82 0.80 245
|
||||
7 0.86 0.14 0.24 42
|
||||
8 0.79 0.54 0.64 127
|
||||
9 1.00 0.42 0.59 12
|
||||
10 0.88 0.50 0.64 127
|
||||
11 1.00 0.14 0.25 14
|
||||
12 0.83 0.38 0.52 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.81 0.59 0.68 1404
|
||||
macro avg 0.67 0.34 0.42 1404
|
||||
weighted avg 0.79 0.59 0.63 1404
|
||||
samples avg 0.81 0.65 0.68 1404
|
||||
21
games_march2025_cleaned_2k_i3k/SGDClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/SGDClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.86 0.84 0.85 300
|
||||
1 0.80 0.52 0.63 216
|
||||
2 0.68 0.35 0.46 86
|
||||
3 0.44 0.09 0.15 46
|
||||
4 0.68 0.34 0.45 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.77 0.80 0.79 245
|
||||
7 0.71 0.24 0.36 42
|
||||
8 0.75 0.55 0.64 127
|
||||
9 1.00 0.58 0.74 12
|
||||
10 0.85 0.52 0.64 127
|
||||
11 0.89 0.57 0.70 14
|
||||
12 0.60 0.64 0.62 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.77 0.61 0.68 1404
|
||||
macro avg 0.65 0.43 0.50 1404
|
||||
weighted avg 0.77 0.61 0.66 1404
|
||||
samples avg 0.79 0.67 0.69 1404
|
||||
21
games_march2025_cleaned_2k_i3k/SVC.txt
Normal file
21
games_march2025_cleaned_2k_i3k/SVC.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
precision recall f1-score support
|
||||
|
||||
0 0.81 0.90 0.85 300
|
||||
1 0.76 0.63 0.69 216
|
||||
2 1.00 0.03 0.07 86
|
||||
3 0.00 0.00 0.00 46
|
||||
4 1.00 0.05 0.09 83
|
||||
5 0.00 0.00 0.00 0
|
||||
6 0.77 0.83 0.80 245
|
||||
7 0.00 0.00 0.00 42
|
||||
8 0.84 0.40 0.54 127
|
||||
9 1.00 0.17 0.29 12
|
||||
10 0.90 0.34 0.49 127
|
||||
11 1.00 0.14 0.25 14
|
||||
12 0.92 0.21 0.34 106
|
||||
13 0.00 0.00 0.00 0
|
||||
|
||||
micro avg 0.80 0.53 0.63 1404
|
||||
macro avg 0.64 0.26 0.32 1404
|
||||
weighted avg 0.79 0.53 0.56 1404
|
||||
samples avg 0.79 0.59 0.63 1404
|
||||
@@ -304,13 +304,47 @@
|
||||
"``sklearn`` has many different classification Models to choose from, but we only have limited time and computing power.\n",
|
||||
"As such, we tested many different models on the 2k Dataset and chose the 5 best performing ones for the big dataset.\n",
|
||||
"\n",
|
||||
"### The comparison\n",
|
||||
"We won't put the comparison script in this notebook, but you can find it in the ``compare_models.py`` file and try it out yourself.\n",
|
||||
"### Initial Comparison\n",
|
||||
"We won't put the comparison script in this notebook, but you can find it in the ``compare_models_2k.py`` file and try it out yourself.\n",
|
||||
"There were some rules as a baseline for comparison:\n",
|
||||
"- All Hyperparameters are set to default\n",
|
||||
"- All iteration limits are set to 3000\n",
|
||||
"- All iteration limits are set to 3000 (exception: MLPClassifier with 300, where i-limit are epochs instead of iterations )\n",
|
||||
"- All ``random_state``s are set to 0\n",
|
||||
"\n",
|
||||
""
|
||||
"Running all models with that configuration yields the following weighted F1-Scores (results as seen in the ``games_march2025_cleaned_2k_i3k`` folder): \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If we also compare Micro/Macro values, we see that all models have a much lower Macro-F1 than Micro/Weighted-F1. That is because the 2k Dataset does not contain enough datapoints for every class (test data for 2 classes is 0), so we should proceed to the 10k Dataset before making major choices.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The 10 best performing models which will run on the 10k Dataset with the same rules as before:\n",
|
||||
"1. NearestCentroid\n",
|
||||
"2. Perceptron\n",
|
||||
"3. PassiveAggressiveClassifier\n",
|
||||
"4. LinearSVC\n",
|
||||
"5. SDGClassifer\n",
|
||||
"6. HistGradientBoostingClassifier\n",
|
||||
"7. MLPClassifier\n",
|
||||
"8. RidgeClassifier\n",
|
||||
"9. GradientBoostingClassifier\n",
|
||||
"10. LinearDiscriminationAnalysis\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"We can also compare these models between datasets, to see if a bigger dataset always improves the performance.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The final contenders are:\n",
|
||||
"1.\n",
|
||||
"2.\n",
|
||||
"3.\n",
|
||||
"4.\n",
|
||||
"5.\n",
|
||||
"\n",
|
||||
"..."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user