changes
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 62 KiB |
@@ -22,17 +22,20 @@ for dataset_name, folder in datasets.items():
|
|||||||
results[dataset_name][model_name] = f1_score
|
results[dataset_name][model_name] = f1_score
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
models = sorted(results["cleaned"].keys()) # alphabetisch sortieren für gleiche Reihenfolge
|
#models = sorted(results["cleaned_2k"].keys()) # alphabetisch sortieren für gleiche Reihenfolge
|
||||||
|
models = dict(sorted(results["cleaned_2k"].items(), key=lambda i: i[1], reverse=True)) # nach values sortieren
|
||||||
x = range(len(models))
|
x = range(len(models))
|
||||||
|
|
||||||
plt.figure(figsize=(12,6))
|
plt.figure(figsize=(12,6))
|
||||||
plt.bar([i - 0.25 for i in x], [results["cleaned"][m] for m in models], width=0.25, label="cleaned")
|
#plt.bar([i - 0.25 for i in x], [results["cleaned"][m] for m in models], width=0.25, label="cleaned")
|
||||||
plt.bar(x, [results["cleaned_2k"][m] for m in models], width=0.25, label="cleaned_2k")
|
plt.bar(x, [results["cleaned_2k"][m] for m in models], width=0.5)#, label="cleaned_2k")
|
||||||
plt.bar([i + 0.25 for i in x], [results["cleaned_10k"][m] for m in models], width=0.25, label="cleaned_10k")
|
#plt.bar([i + 0.25 for i in x], [results["cleaned_10k"][m] for m in models], width=0.25, label="cleaned_10k")
|
||||||
|
|
||||||
plt.xticks(x, models, rotation=45)
|
plt.xticks(x, models, rotation=90)
|
||||||
plt.ylabel("F1-Score")
|
plt.ylim(0, 1) # min max
|
||||||
|
plt.ylabel("Weighted F1-Score")
|
||||||
plt.title("Model Performance across Datasets")
|
plt.title("Model Performance across Datasets")
|
||||||
plt.legend()
|
#plt.legend()
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
|
plt.savefig('compare_graph_latest.png')
|
||||||
plt.show()
|
plt.show()
|
||||||
59
compare_graph_maker_3.py
Normal file
59
compare_graph_maker_3.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
import os
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
datasets = {
|
||||||
|
#"cleaned": "games_march2025_cleaned",
|
||||||
|
#"cleaned_2k": "games_march2025_cleaned_2k",
|
||||||
|
#"cleaned_10k": "games_march2025_cleaned_10k"
|
||||||
|
"cleaned_2k": "games_march2025_cleaned_2k_i3k",
|
||||||
|
}
|
||||||
|
# def results
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for dataset_name, folder in datasets.items():
|
||||||
|
results[dataset_name] = {}
|
||||||
|
for filename in os.listdir(folder):
|
||||||
|
if filename.endswith(".txt"):
|
||||||
|
model_name = filename.replace(".txt", "")
|
||||||
|
print("model " + model_name)
|
||||||
|
results[dataset_name][model_name] = {}
|
||||||
|
with open(os.path.join(folder, filename), "r") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.strip().startswith("micro avg"):
|
||||||
|
print("micro")
|
||||||
|
results[dataset_name][model_name][0] = float(line.split()[4]) # micro f1
|
||||||
|
if line.strip().startswith("macro avg"):
|
||||||
|
print("macro")
|
||||||
|
results[dataset_name][model_name][1] = float(line.split()[4]) # macro f1
|
||||||
|
if line.strip().startswith("weighted avg"):
|
||||||
|
print("weight")
|
||||||
|
results[dataset_name][model_name][2] = float(line.split()[4]) # weighted avg f1
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
#models = sorted(results["cleaned_2k"].keys()) # alphabetisch sortieren für gleiche Reihenfolge
|
||||||
|
models = dict(sorted(results["cleaned_2k"].items(), key=lambda i: i[1][2], reverse=True)) # nach values sortieren
|
||||||
|
print(models)
|
||||||
|
x = range(len(models))
|
||||||
|
|
||||||
|
fig = plt.figure()
|
||||||
|
#ax = fig.add_subplot(projection='3d')
|
||||||
|
|
||||||
|
plt.bar([i - 0.25 for i in x], [results["cleaned_2k"][m][0] for m in models], width=0.25, label="Micro")
|
||||||
|
plt.bar(x, [results["cleaned_2k"][m][1] for m in models], width=0.25, label="Macro")
|
||||||
|
plt.bar([i + 0.25 for i in x], [results["cleaned_2k"][m][2] for m in models], width=0.25, label="Weighted")
|
||||||
|
|
||||||
|
plt.xticks(x, models, rotation=90)
|
||||||
|
plt.ylabel("F1 Score")
|
||||||
|
#ax.set_zlabel("F1 Value")
|
||||||
|
plt.ylim(0,1)
|
||||||
|
plt.title("Model Performance - 2k Dataset")
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig('compare_graph_latest_3.png')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# On the y-axis let's only label the discrete values that we have data for.
|
||||||
|
#ax.set_yticks(yticks)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
126
compare_models_10k.py
Normal file
126
compare_models_10k.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn import set_config
|
||||||
|
|
||||||
|
from sklearn.compose import ColumnTransformer
|
||||||
|
from sklearn.preprocessing import FunctionTransformer, MultiLabelBinarizer
|
||||||
|
import ast
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.multioutput import MultiOutputClassifier
|
||||||
|
from sklearn.metrics import classification_report
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.metrics import accuracy_score, classification_report
|
||||||
|
from sklearn.svm import SVC, LinearSVC
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
||||||
|
from sklearn.linear_model import LogisticRegression, RidgeClassifier, PassiveAggressiveClassifier, Perceptron, SGDClassifier
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid, RadiusNeighborsClassifier
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, VotingClassifier, StackingClassifier
|
||||||
|
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, ComplementNB
|
||||||
|
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
|
||||||
|
from sklearn.dummy import DummyClassifier
|
||||||
|
from sklearn.neural_network import MLPClassifier
|
||||||
|
|
||||||
|
set_config(transform_output="pandas") # dataframe supremacy
|
||||||
|
|
||||||
|
jobs = 12
|
||||||
|
max_iter = 3000
|
||||||
|
|
||||||
|
def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
||||||
|
dataset = pd.read_csv(dataset,sep=",")
|
||||||
|
# desc, genres, tags
|
||||||
|
column_transformer = ColumnTransformer([
|
||||||
|
# merge all descriptions
|
||||||
|
('desc', FunctionTransformer(lambda X: X.fillna('').agg(' '.join, axis=1).to_frame(name="desc")),
|
||||||
|
['detailed_description', 'about_the_game', 'short_description']),
|
||||||
|
('pass', 'passthrough', ['genres']),#, 'tags'
|
||||||
|
],
|
||||||
|
verbose_feature_names_out=False
|
||||||
|
)
|
||||||
|
dataset = column_transformer.fit_transform(dataset)
|
||||||
|
#### SET MISSING VALUES
|
||||||
|
print("SETMISS")
|
||||||
|
# Setting missing numeric values to the mean
|
||||||
|
dataset.fillna(dataset.mean(numeric_only=True), inplace=True)
|
||||||
|
# Setting missing text values to 'Unknown'
|
||||||
|
dataset.fillna('', inplace=True)
|
||||||
|
# Setting missing values in other columns to NaN
|
||||||
|
dataset.dropna(inplace=True)
|
||||||
|
##### STRUCTURIZE GENRES to onehot
|
||||||
|
#serialize array
|
||||||
|
dataset['genres'] = dataset['genres'].map(lambda s: ast.literal_eval(s))
|
||||||
|
#print(dataset['genres']) # in py but not yet onehotenc
|
||||||
|
# MultiLabelBinarizer does onehotenc for arrays
|
||||||
|
mlb_genres = MultiLabelBinarizer()
|
||||||
|
genres_encoded = mlb_genres.fit_transform(dataset.pop('genres'))
|
||||||
|
#genres_count = len(mlb_genres.classes_) # for multi-label classifiction later
|
||||||
|
genres_df = pd.DataFrame(genres_encoded, columns=mlb_genres.classes_)
|
||||||
|
#print(genres_df)
|
||||||
|
#dataset = pd.concat([dataset, genres_df], axis=1)
|
||||||
|
#print(dataset)
|
||||||
|
#### convert text to bag of words
|
||||||
|
## Count vs Tfidf vectorizer
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
tfidf_matrix = vectorizer.fit_transform(dataset['desc']) # matrix
|
||||||
|
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())
|
||||||
|
#print(tfidf_df)
|
||||||
|
##### MODEL
|
||||||
|
print("MODEL")
|
||||||
|
X = tfidf_df
|
||||||
|
y = genres_df
|
||||||
|
# cleanup datapoints that dont have a target value (all target columns are 0)
|
||||||
|
mask = y.sum(axis=1).map(lambda x: x > 0)
|
||||||
|
#print((mask == False).sum()) #31 cases with all target columns 0
|
||||||
|
X_clean = X[mask]
|
||||||
|
y_clean = y[mask]
|
||||||
|
# Split dataset
|
||||||
|
return train_test_split(X_clean, y_clean, random_state=0)
|
||||||
|
def comparison(X_train, X_test, y_train, y_test, estimator,): #returns class_report
|
||||||
|
multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
|
||||||
|
# model training
|
||||||
|
multi_target_clf.fit(X_train, y_train)
|
||||||
|
# predict against test data
|
||||||
|
y_pred = multi_target_clf.predict(X_test)
|
||||||
|
return classification_report(y_test, y_pred, zero_division=0.0)
|
||||||
|
datasets = [
|
||||||
|
#'games_march2025_cleaned_2k.csv',
|
||||||
|
'games_march2025_cleaned_10k.csv',
|
||||||
|
#'games_march2025_cleaned.csv'
|
||||||
|
]
|
||||||
|
estimators = {
|
||||||
|
"RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
|
||||||
|
"PassiveAggressiveClassifier": PassiveAggressiveClassifier(random_state=0, max_iter=max_iter),
|
||||||
|
"Perceptron": Perceptron(random_state=0, max_iter=max_iter),
|
||||||
|
"SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
|
||||||
|
"NearestCentroid": NearestCentroid(),
|
||||||
|
"LinearSVC": LinearSVC(random_state=0, max_iter=max_iter),
|
||||||
|
"GradientBoostingClassifier": GradientBoostingClassifier(random_state=0),
|
||||||
|
"HistGradientBoostingClassifier": HistGradientBoostingClassifier(random_state=0, max_iter=max_iter),
|
||||||
|
"LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
|
||||||
|
"MLPClassifier": MLPClassifier(random_state=0, max_iter=int(max_iter/20), early_stopping=True),
|
||||||
|
}
|
||||||
|
|
||||||
|
#"VotingClassifier": VotingClassifier(estimators=[('lr', LogisticRegression()), ('rf', RandomForestClassifier())]),
|
||||||
|
#"StackingClassifier": StackingClassifier(estimators=[('lr', LogisticRegression()), ('rf', RandomForestClassifier())]),
|
||||||
|
for dataset in datasets:
|
||||||
|
print("-" * 60)
|
||||||
|
print("dataset -> " + dataset)
|
||||||
|
print("mkdir")
|
||||||
|
folder = dataset.split(".csv")[0]
|
||||||
|
if not os.path.isdir(folder):
|
||||||
|
os.mkdir(folder)
|
||||||
|
X_train, X_test, y_train, y_test = prepDataset(dataset)
|
||||||
|
for esti in estimators:
|
||||||
|
print("model: " + esti)
|
||||||
|
compari = comparison(X_train, X_test, y_train, y_test, estimators[esti])
|
||||||
|
print("open")
|
||||||
|
f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
|
||||||
|
f.write(compari)
|
||||||
|
print("write")
|
||||||
|
f.close()
|
||||||
|
print("close")
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 343 KiB After Width: | Height: | Size: 66 KiB |
@@ -27,6 +27,10 @@ from sklearn.dummy import DummyClassifier
|
|||||||
from sklearn.neural_network import MLPClassifier
|
from sklearn.neural_network import MLPClassifier
|
||||||
|
|
||||||
set_config(transform_output="pandas") # dataframe supremacy
|
set_config(transform_output="pandas") # dataframe supremacy
|
||||||
|
|
||||||
|
jobs = 12
|
||||||
|
max_iter = 3000
|
||||||
|
|
||||||
def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
||||||
dataset = pd.read_csv(dataset,sep=",")
|
dataset = pd.read_csv(dataset,sep=",")
|
||||||
# desc, genres, tags
|
# desc, genres, tags
|
||||||
@@ -76,7 +80,7 @@ def prepDataset(dataset): #returns X_train, X_test, y_train, y_test
|
|||||||
y_clean = y[mask]
|
y_clean = y[mask]
|
||||||
# Split dataset
|
# Split dataset
|
||||||
return train_test_split(X_clean, y_clean, random_state=0)
|
return train_test_split(X_clean, y_clean, random_state=0)
|
||||||
def comparison(X_train, X_test, y_train, y_test, estimator, jobs: int = 1): #returns class_report
|
def comparison(X_train, X_test, y_train, y_test, estimator,): #returns class_report
|
||||||
multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
|
multi_target_clf = MultiOutputClassifier(estimator, n_jobs=jobs) # LogisticRegression(max_iter=1337, random_state=0)
|
||||||
# model training
|
# model training
|
||||||
multi_target_clf.fit(X_train, y_train)
|
multi_target_clf.fit(X_train, y_train)
|
||||||
@@ -88,9 +92,6 @@ datasets = [
|
|||||||
#'games_march2025_cleaned_10k.csv',
|
#'games_march2025_cleaned_10k.csv',
|
||||||
#'games_march2025_cleaned.csv'
|
#'games_march2025_cleaned.csv'
|
||||||
]
|
]
|
||||||
|
|
||||||
max_iter = 3000 # <-- set your desired value here
|
|
||||||
|
|
||||||
estimators = {
|
estimators = {
|
||||||
"LogisticRegression": LogisticRegression(random_state=0, max_iter=max_iter),
|
"LogisticRegression": LogisticRegression(random_state=0, max_iter=max_iter),
|
||||||
"RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
|
"RidgeClassifier": RidgeClassifier(random_state=0, max_iter=max_iter),
|
||||||
@@ -99,8 +100,8 @@ estimators = {
|
|||||||
"SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
|
"SGDClassifier": SGDClassifier(random_state=0, max_iter=max_iter),
|
||||||
"KNeighborsClassifier": KNeighborsClassifier(),
|
"KNeighborsClassifier": KNeighborsClassifier(),
|
||||||
"NearestCentroid": NearestCentroid(),
|
"NearestCentroid": NearestCentroid(),
|
||||||
"RadiusNeighborsClassifier": RadiusNeighborsClassifier(),
|
# "RadiusNeighborsClassifier": RadiusNeighborsClassifier(), # failed bcs no neighbours in range :sob:
|
||||||
"LinearSVC-i5000": LinearSVC(random_state=0, max_iter=max_iter),
|
"LinearSVC": LinearSVC(random_state=0, max_iter=max_iter),
|
||||||
"SVC": SVC(random_state=0, max_iter=max_iter),
|
"SVC": SVC(random_state=0, max_iter=max_iter),
|
||||||
"DecisionTreeClassifier": DecisionTreeClassifier(random_state=0),
|
"DecisionTreeClassifier": DecisionTreeClassifier(random_state=0),
|
||||||
"RandomForestClassifier": RandomForestClassifier(random_state=0),
|
"RandomForestClassifier": RandomForestClassifier(random_state=0),
|
||||||
@@ -114,8 +115,7 @@ estimators = {
|
|||||||
"BernoulliNB": BernoulliNB(),
|
"BernoulliNB": BernoulliNB(),
|
||||||
"ComplementNB": ComplementNB(),
|
"ComplementNB": ComplementNB(),
|
||||||
"LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
|
"LinearDiscriminantAnalysis": LinearDiscriminantAnalysis(),
|
||||||
"QuadraticDiscriminantAnalysis": QuadraticDiscriminantAnalysis(),
|
"MLPClassifier": MLPClassifier(random_state=0, max_iter=int(max_iter/5), verbose=True),
|
||||||
"MLPClassifier-i10000": MLPClassifier(max_iter=10000, random_state=0),
|
|
||||||
"DummyClassifier": DummyClassifier(random_state=0)
|
"DummyClassifier": DummyClassifier(random_state=0)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,7 +131,7 @@ for dataset in datasets:
|
|||||||
X_train, X_test, y_train, y_test = prepDataset(dataset)
|
X_train, X_test, y_train, y_test = prepDataset(dataset)
|
||||||
for esti in estimators:
|
for esti in estimators:
|
||||||
print("model: " + esti)
|
print("model: " + esti)
|
||||||
compari = comparison(X_train, X_test, y_train, y_test, estimators[esti], 1) #TODO: change the job count if you can
|
compari = comparison(X_train, X_test, y_train, y_test, estimators[esti])
|
||||||
print("open")
|
print("open")
|
||||||
f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
|
f = open(folder + "/" + esti +".txt", mode="w+", encoding="utf-8")
|
||||||
f.write(compari)
|
f.write(compari)
|
||||||
BIN
compare_models_2k_3.png
Normal file
BIN
compare_models_2k_3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 60 KiB |
21
games_march2025_cleaned_2k_i3k/AdaBoostClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/AdaBoostClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.87 0.76 0.81 300
|
||||||
|
1 0.70 0.59 0.64 216
|
||||||
|
2 0.58 0.13 0.21 86
|
||||||
|
3 0.56 0.11 0.18 46
|
||||||
|
4 0.71 0.30 0.42 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.69 0.70 0.69 245
|
||||||
|
7 0.62 0.31 0.41 42
|
||||||
|
8 0.76 0.41 0.53 127
|
||||||
|
9 1.00 0.50 0.67 12
|
||||||
|
10 0.67 0.50 0.57 127
|
||||||
|
11 0.40 0.29 0.33 14
|
||||||
|
12 0.74 0.45 0.56 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.74 0.54 0.62 1404
|
||||||
|
macro avg 0.59 0.36 0.43 1404
|
||||||
|
weighted avg 0.73 0.54 0.60 1404
|
||||||
|
samples avg 0.74 0.59 0.61 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/BaggingClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/BaggingClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.86 0.70 0.77 300
|
||||||
|
1 0.72 0.50 0.59 216
|
||||||
|
2 0.47 0.09 0.16 86
|
||||||
|
3 0.50 0.04 0.08 46
|
||||||
|
4 0.58 0.23 0.33 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.71 0.64 0.67 245
|
||||||
|
7 0.80 0.29 0.42 42
|
||||||
|
8 0.79 0.46 0.58 127
|
||||||
|
9 1.00 0.25 0.40 12
|
||||||
|
10 0.71 0.43 0.53 127
|
||||||
|
11 0.40 0.29 0.33 14
|
||||||
|
12 0.68 0.42 0.52 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.74 0.49 0.59 1404
|
||||||
|
macro avg 0.59 0.31 0.39 1404
|
||||||
|
weighted avg 0.72 0.49 0.56 1404
|
||||||
|
samples avg 0.70 0.54 0.57 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/BernoulliNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/BernoulliNB.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.75 0.90 0.82 300
|
||||||
|
1 0.72 0.68 0.70 216
|
||||||
|
2 0.50 0.08 0.14 86
|
||||||
|
3 0.27 0.07 0.11 46
|
||||||
|
4 0.40 0.07 0.12 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.77 0.82 0.79 245
|
||||||
|
7 0.33 0.10 0.15 42
|
||||||
|
8 0.67 0.40 0.50 127
|
||||||
|
9 0.00 0.00 0.00 12
|
||||||
|
10 0.71 0.37 0.49 127
|
||||||
|
11 0.00 0.00 0.00 14
|
||||||
|
12 0.49 0.31 0.38 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.70 0.55 0.62 1404
|
||||||
|
macro avg 0.40 0.27 0.30 1404
|
||||||
|
weighted avg 0.64 0.55 0.56 1404
|
||||||
|
samples avg 0.73 0.59 0.61 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/ComplementNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/ComplementNB.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.67 0.98 0.80 300
|
||||||
|
1 0.81 0.36 0.50 216
|
||||||
|
2 0.67 0.05 0.09 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 0.80 0.05 0.09 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.77 0.81 0.79 245
|
||||||
|
7 0.40 0.05 0.09 42
|
||||||
|
8 0.83 0.04 0.08 127
|
||||||
|
9 0.00 0.00 0.00 12
|
||||||
|
10 0.43 0.02 0.04 127
|
||||||
|
11 0.00 0.00 0.00 14
|
||||||
|
12 1.00 0.05 0.09 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.70 0.42 0.53 1404
|
||||||
|
macro avg 0.46 0.17 0.18 1404
|
||||||
|
weighted avg 0.69 0.42 0.42 1404
|
||||||
|
samples avg 0.71 0.46 0.52 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/DecisionTreeClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/DecisionTreeClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.76 0.73 0.75 300
|
||||||
|
1 0.56 0.53 0.54 216
|
||||||
|
2 0.36 0.33 0.34 86
|
||||||
|
3 0.33 0.26 0.29 46
|
||||||
|
4 0.40 0.46 0.43 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.65 0.61 0.63 245
|
||||||
|
7 0.39 0.40 0.40 42
|
||||||
|
8 0.59 0.57 0.58 127
|
||||||
|
9 0.60 0.25 0.35 12
|
||||||
|
10 0.56 0.51 0.53 127
|
||||||
|
11 0.39 0.50 0.44 14
|
||||||
|
12 0.52 0.49 0.50 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.58 0.55 0.57 1404
|
||||||
|
macro avg 0.44 0.40 0.41 1404
|
||||||
|
weighted avg 0.58 0.55 0.57 1404
|
||||||
|
samples avg 0.59 0.59 0.55 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/DummyClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/DummyClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.60 1.00 0.75 300
|
||||||
|
1 0.00 0.00 0.00 216
|
||||||
|
2 0.00 0.00 0.00 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 0.00 0.00 0.00 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.00 0.00 0.00 245
|
||||||
|
7 0.00 0.00 0.00 42
|
||||||
|
8 0.00 0.00 0.00 127
|
||||||
|
9 0.00 0.00 0.00 12
|
||||||
|
10 0.00 0.00 0.00 127
|
||||||
|
11 0.00 0.00 0.00 14
|
||||||
|
12 0.00 0.00 0.00 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.60 0.21 0.32 1404
|
||||||
|
macro avg 0.04 0.07 0.05 1404
|
||||||
|
weighted avg 0.13 0.21 0.16 1404
|
||||||
|
samples avg 0.60 0.26 0.34 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/ExtraTreesClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/ExtraTreesClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.81 0.91 0.86 300
|
||||||
|
1 0.78 0.62 0.69 216
|
||||||
|
2 1.00 0.03 0.07 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 1.00 0.04 0.07 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.78 0.73 0.75 245
|
||||||
|
7 0.00 0.00 0.00 42
|
||||||
|
8 0.84 0.24 0.38 127
|
||||||
|
9 1.00 0.17 0.29 12
|
||||||
|
10 0.90 0.21 0.34 127
|
||||||
|
11 1.00 0.14 0.25 14
|
||||||
|
12 0.83 0.18 0.29 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.80 0.48 0.60 1404
|
||||||
|
macro avg 0.64 0.23 0.29 1404
|
||||||
|
weighted avg 0.79 0.48 0.52 1404
|
||||||
|
samples avg 0.78 0.54 0.60 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/GaussianNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/GaussianNB.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.76 0.80 0.78 300
|
||||||
|
1 0.62 0.51 0.56 216
|
||||||
|
2 0.63 0.14 0.23 86
|
||||||
|
3 0.17 0.02 0.04 46
|
||||||
|
4 0.42 0.10 0.16 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.68 0.66 0.67 245
|
||||||
|
7 0.56 0.12 0.20 42
|
||||||
|
8 0.55 0.33 0.41 127
|
||||||
|
9 0.67 0.17 0.27 12
|
||||||
|
10 0.65 0.31 0.42 127
|
||||||
|
11 1.00 0.14 0.25 14
|
||||||
|
12 0.53 0.29 0.38 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.66 0.47 0.55 1404
|
||||||
|
macro avg 0.52 0.26 0.31 1404
|
||||||
|
weighted avg 0.62 0.47 0.51 1404
|
||||||
|
samples avg 0.67 0.53 0.55 1404
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.87 0.80 0.83 300
|
||||||
|
1 0.77 0.61 0.68 216
|
||||||
|
2 0.55 0.13 0.21 86
|
||||||
|
3 0.42 0.11 0.17 46
|
||||||
|
4 0.68 0.33 0.44 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.71 0.76 0.74 245
|
||||||
|
7 0.61 0.26 0.37 42
|
||||||
|
8 0.81 0.50 0.61 127
|
||||||
|
9 0.75 0.25 0.38 12
|
||||||
|
10 0.81 0.54 0.65 127
|
||||||
|
11 0.40 0.43 0.41 14
|
||||||
|
12 0.69 0.42 0.53 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.76 0.57 0.65 1404
|
||||||
|
macro avg 0.58 0.37 0.43 1404
|
||||||
|
weighted avg 0.74 0.57 0.63 1404
|
||||||
|
samples avg 0.77 0.63 0.65 1404
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.83 0.83 0.83 300
|
||||||
|
1 0.74 0.69 0.72 216
|
||||||
|
2 0.80 0.28 0.41 86
|
||||||
|
3 1.00 0.04 0.08 46
|
||||||
|
4 0.70 0.39 0.50 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.72 0.76 0.74 245
|
||||||
|
7 0.73 0.19 0.30 42
|
||||||
|
8 0.85 0.59 0.70 127
|
||||||
|
9 1.00 0.33 0.50 12
|
||||||
|
10 0.78 0.54 0.64 127
|
||||||
|
11 0.43 0.21 0.29 14
|
||||||
|
12 0.77 0.52 0.62 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.78 0.61 0.68 1404
|
||||||
|
macro avg 0.67 0.38 0.45 1404
|
||||||
|
weighted avg 0.78 0.61 0.66 1404
|
||||||
|
samples avg 0.79 0.67 0.69 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/KNeighborsClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/KNeighborsClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.82 0.62 0.70 300
|
||||||
|
1 0.69 0.46 0.55 216
|
||||||
|
2 0.62 0.06 0.11 86
|
||||||
|
3 0.20 0.02 0.04 46
|
||||||
|
4 0.72 0.16 0.26 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.78 0.55 0.64 245
|
||||||
|
7 0.38 0.12 0.18 42
|
||||||
|
8 0.59 0.65 0.62 127
|
||||||
|
9 1.00 0.67 0.80 12
|
||||||
|
10 0.68 0.44 0.54 127
|
||||||
|
11 1.00 0.29 0.44 14
|
||||||
|
12 0.34 0.76 0.48 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.64 0.48 0.55 1404
|
||||||
|
macro avg 0.56 0.34 0.38 1404
|
||||||
|
weighted avg 0.68 0.48 0.53 1404
|
||||||
|
samples avg 0.64 0.54 0.55 1404
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.63 0.68 0.66 300
|
||||||
|
1 0.47 0.56 0.51 216
|
||||||
|
2 0.27 0.59 0.37 86
|
||||||
|
3 0.06 0.28 0.10 46
|
||||||
|
4 0.21 0.52 0.30 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.63 0.67 0.65 245
|
||||||
|
7 0.06 0.29 0.10 42
|
||||||
|
8 0.28 0.52 0.36 127
|
||||||
|
9 0.03 0.42 0.06 12
|
||||||
|
10 0.29 0.52 0.38 127
|
||||||
|
11 0.04 0.43 0.07 14
|
||||||
|
12 0.53 0.44 0.48 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.30 0.57 0.39 1404
|
||||||
|
macro avg 0.25 0.42 0.29 1404
|
||||||
|
weighted avg 0.44 0.57 0.48 1404
|
||||||
|
samples avg 0.42 0.62 0.40 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/LinearSVC.txt
Normal file
21
games_march2025_cleaned_2k_i3k/LinearSVC.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.85 0.87 0.86 300
|
||||||
|
1 0.76 0.66 0.70 216
|
||||||
|
2 0.77 0.20 0.31 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 0.76 0.27 0.39 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.78 0.81 0.79 245
|
||||||
|
7 0.89 0.19 0.31 42
|
||||||
|
8 0.77 0.60 0.67 127
|
||||||
|
9 1.00 0.58 0.74 12
|
||||||
|
10 0.85 0.54 0.66 127
|
||||||
|
11 1.00 0.29 0.44 14
|
||||||
|
12 0.82 0.42 0.56 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.80 0.61 0.69 1404
|
||||||
|
macro avg 0.66 0.39 0.46 1404
|
||||||
|
weighted avg 0.78 0.61 0.66 1404
|
||||||
|
samples avg 0.81 0.67 0.69 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/LogisticRegression.txt
Normal file
21
games_march2025_cleaned_2k_i3k/LogisticRegression.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.78 0.91 0.84 300
|
||||||
|
1 0.78 0.62 0.69 216
|
||||||
|
2 1.00 0.03 0.07 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 1.00 0.04 0.07 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.79 0.81 0.80 245
|
||||||
|
7 0.00 0.00 0.00 42
|
||||||
|
8 0.90 0.34 0.49 127
|
||||||
|
9 0.00 0.00 0.00 12
|
||||||
|
10 0.89 0.25 0.39 127
|
||||||
|
11 0.00 0.00 0.00 14
|
||||||
|
12 0.88 0.14 0.24 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.79 0.50 0.61 1404
|
||||||
|
macro avg 0.50 0.22 0.26 1404
|
||||||
|
weighted avg 0.77 0.50 0.53 1404
|
||||||
|
samples avg 0.77 0.56 0.60 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/MLPClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/MLPClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.84 0.85 0.84 300
|
||||||
|
1 0.73 0.67 0.70 216
|
||||||
|
2 0.74 0.30 0.43 86
|
||||||
|
3 0.50 0.02 0.04 46
|
||||||
|
4 0.69 0.24 0.36 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.79 0.79 0.79 245
|
||||||
|
7 0.86 0.14 0.24 42
|
||||||
|
8 0.76 0.63 0.69 127
|
||||||
|
9 1.00 0.33 0.50 12
|
||||||
|
10 0.81 0.52 0.63 127
|
||||||
|
11 1.00 0.14 0.25 14
|
||||||
|
12 0.75 0.41 0.53 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.79 0.60 0.68 1404
|
||||||
|
macro avg 0.68 0.36 0.43 1404
|
||||||
|
weighted avg 0.78 0.60 0.65 1404
|
||||||
|
samples avg 0.80 0.66 0.68 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/MultinomialNB.txt
Normal file
21
games_march2025_cleaned_2k_i3k/MultinomialNB.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.64 0.99 0.78 300
|
||||||
|
1 0.85 0.24 0.37 216
|
||||||
|
2 0.60 0.03 0.07 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 0.80 0.05 0.09 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.78 0.80 0.79 245
|
||||||
|
7 0.40 0.05 0.09 42
|
||||||
|
8 1.00 0.04 0.08 127
|
||||||
|
9 0.00 0.00 0.00 12
|
||||||
|
10 0.20 0.01 0.02 127
|
||||||
|
11 0.00 0.00 0.00 14
|
||||||
|
12 1.00 0.05 0.09 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.69 0.40 0.51 1404
|
||||||
|
macro avg 0.45 0.16 0.17 1404
|
||||||
|
weighted avg 0.68 0.40 0.39 1404
|
||||||
|
samples avg 0.70 0.44 0.50 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/NearestCentroid.txt
Normal file
21
games_march2025_cleaned_2k_i3k/NearestCentroid.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.83 0.75 0.79 300
|
||||||
|
1 0.65 0.75 0.70 216
|
||||||
|
2 0.43 0.72 0.54 86
|
||||||
|
3 0.18 0.33 0.23 46
|
||||||
|
4 0.46 0.61 0.53 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.74 0.76 0.75 245
|
||||||
|
7 0.31 0.62 0.41 42
|
||||||
|
8 0.47 0.69 0.55 127
|
||||||
|
9 1.00 0.67 0.80 12
|
||||||
|
10 0.59 0.69 0.64 127
|
||||||
|
11 0.60 0.64 0.62 14
|
||||||
|
12 0.42 0.66 0.52 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.57 0.70 0.63 1404
|
||||||
|
macro avg 0.48 0.56 0.50 1404
|
||||||
|
weighted avg 0.62 0.70 0.65 1404
|
||||||
|
samples avg 0.63 0.74 0.64 1404
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.84 0.86 0.85 300
|
||||||
|
1 0.74 0.63 0.68 216
|
||||||
|
2 0.77 0.31 0.45 86
|
||||||
|
3 0.50 0.04 0.08 46
|
||||||
|
4 0.69 0.33 0.44 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.79 0.80 0.79 245
|
||||||
|
7 0.69 0.26 0.38 42
|
||||||
|
8 0.74 0.62 0.68 127
|
||||||
|
9 1.00 0.67 0.80 12
|
||||||
|
10 0.80 0.57 0.67 127
|
||||||
|
11 1.00 0.50 0.67 14
|
||||||
|
12 0.79 0.46 0.58 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.79 0.62 0.69 1404
|
||||||
|
macro avg 0.67 0.43 0.50 1404
|
||||||
|
weighted avg 0.77 0.62 0.67 1404
|
||||||
|
samples avg 0.80 0.68 0.70 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/Perceptron.txt
Normal file
21
games_march2025_cleaned_2k_i3k/Perceptron.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.78 0.94 0.85 300
|
||||||
|
1 0.60 0.88 0.71 216
|
||||||
|
2 0.54 0.60 0.57 86
|
||||||
|
3 0.33 0.04 0.08 46
|
||||||
|
4 0.68 0.16 0.25 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.74 0.86 0.80 245
|
||||||
|
7 0.63 0.29 0.39 42
|
||||||
|
8 0.62 0.80 0.69 127
|
||||||
|
9 1.00 0.67 0.80 12
|
||||||
|
10 0.89 0.43 0.58 127
|
||||||
|
11 0.70 0.50 0.58 14
|
||||||
|
12 0.88 0.27 0.42 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.70 0.68 0.69 1404
|
||||||
|
macro avg 0.60 0.46 0.48 1404
|
||||||
|
weighted avg 0.71 0.68 0.66 1404
|
||||||
|
samples avg 0.72 0.74 0.69 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/RandomForestClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/RandomForestClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.80 0.88 0.84 300
|
||||||
|
1 0.78 0.55 0.64 216
|
||||||
|
2 1.00 0.03 0.07 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 1.00 0.06 0.11 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.74 0.78 0.76 245
|
||||||
|
7 0.00 0.00 0.00 42
|
||||||
|
8 0.84 0.24 0.38 127
|
||||||
|
9 0.00 0.00 0.00 12
|
||||||
|
10 0.91 0.24 0.38 127
|
||||||
|
11 1.00 0.14 0.25 14
|
||||||
|
12 1.00 0.25 0.39 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.79 0.48 0.59 1404
|
||||||
|
macro avg 0.58 0.23 0.27 1404
|
||||||
|
weighted avg 0.78 0.48 0.52 1404
|
||||||
|
samples avg 0.77 0.54 0.60 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/RidgeClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/RidgeClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.84 0.88 0.86 300
|
||||||
|
1 0.76 0.66 0.70 216
|
||||||
|
2 0.80 0.14 0.24 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 0.85 0.20 0.33 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.78 0.82 0.80 245
|
||||||
|
7 0.86 0.14 0.24 42
|
||||||
|
8 0.79 0.54 0.64 127
|
||||||
|
9 1.00 0.42 0.59 12
|
||||||
|
10 0.88 0.50 0.64 127
|
||||||
|
11 1.00 0.14 0.25 14
|
||||||
|
12 0.83 0.38 0.52 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.81 0.59 0.68 1404
|
||||||
|
macro avg 0.67 0.34 0.42 1404
|
||||||
|
weighted avg 0.79 0.59 0.63 1404
|
||||||
|
samples avg 0.81 0.65 0.68 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/SGDClassifier.txt
Normal file
21
games_march2025_cleaned_2k_i3k/SGDClassifier.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.86 0.84 0.85 300
|
||||||
|
1 0.80 0.52 0.63 216
|
||||||
|
2 0.68 0.35 0.46 86
|
||||||
|
3 0.44 0.09 0.15 46
|
||||||
|
4 0.68 0.34 0.45 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.77 0.80 0.79 245
|
||||||
|
7 0.71 0.24 0.36 42
|
||||||
|
8 0.75 0.55 0.64 127
|
||||||
|
9 1.00 0.58 0.74 12
|
||||||
|
10 0.85 0.52 0.64 127
|
||||||
|
11 0.89 0.57 0.70 14
|
||||||
|
12 0.60 0.64 0.62 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.77 0.61 0.68 1404
|
||||||
|
macro avg 0.65 0.43 0.50 1404
|
||||||
|
weighted avg 0.77 0.61 0.66 1404
|
||||||
|
samples avg 0.79 0.67 0.69 1404
|
||||||
21
games_march2025_cleaned_2k_i3k/SVC.txt
Normal file
21
games_march2025_cleaned_2k_i3k/SVC.txt
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
precision recall f1-score support
|
||||||
|
|
||||||
|
0 0.81 0.90 0.85 300
|
||||||
|
1 0.76 0.63 0.69 216
|
||||||
|
2 1.00 0.03 0.07 86
|
||||||
|
3 0.00 0.00 0.00 46
|
||||||
|
4 1.00 0.05 0.09 83
|
||||||
|
5 0.00 0.00 0.00 0
|
||||||
|
6 0.77 0.83 0.80 245
|
||||||
|
7 0.00 0.00 0.00 42
|
||||||
|
8 0.84 0.40 0.54 127
|
||||||
|
9 1.00 0.17 0.29 12
|
||||||
|
10 0.90 0.34 0.49 127
|
||||||
|
11 1.00 0.14 0.25 14
|
||||||
|
12 0.92 0.21 0.34 106
|
||||||
|
13 0.00 0.00 0.00 0
|
||||||
|
|
||||||
|
micro avg 0.80 0.53 0.63 1404
|
||||||
|
macro avg 0.64 0.26 0.32 1404
|
||||||
|
weighted avg 0.79 0.53 0.56 1404
|
||||||
|
samples avg 0.79 0.59 0.63 1404
|
||||||
@@ -304,13 +304,47 @@
|
|||||||
"``sklearn`` has many different classification Models to choose from, but we only have limited time and computing power.\n",
|
"``sklearn`` has many different classification Models to choose from, but we only have limited time and computing power.\n",
|
||||||
"As such, we tested many different models on the 2k Dataset and chose the 5 best performing ones for the big dataset.\n",
|
"As such, we tested many different models on the 2k Dataset and chose the 5 best performing ones for the big dataset.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### The comparison\n",
|
"### Initial Comparison\n",
|
||||||
"We won't put the comparison script in this notebook, but you can find it in the ``compare_models.py`` file and try it out yourself.\n",
|
"We won't put the comparison script in this notebook, but you can find it in the ``compare_models_2k.py`` file and try it out yourself.\n",
|
||||||
"There were some rules as a baseline for comparison:\n",
|
"There were some rules as a baseline for comparison:\n",
|
||||||
"- All Hyperparameters are set to default\n",
|
"- All Hyperparameters are set to default\n",
|
||||||
"- All iteration limits are set to 3000\n",
|
"- All iteration limits are set to 3000 (exception: MLPClassifier with 300, where i-limit are epochs instead of iterations )\n",
|
||||||
|
"- All ``random_state``s are set to 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
""
|
"Running all models with that configuration yields the following weighted F1-Scores (results as seen in the ``games_march2025_cleaned_2k_i3k`` folder): \n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"If we also compare Micro/Macro values, we see that all models have a much lower Macro-F1 than Micro/Weighted-F1. That is because the 2k Dataset does not contain enough datapoints for every class (test data for 2 classes is 0), so we should proceed to the 10k Dataset before making major choices.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"The 10 best performing models which will run on the 10k Dataset with the same rules as before:\n",
|
||||||
|
"1. NearestCentroid\n",
|
||||||
|
"2. Perceptron\n",
|
||||||
|
"3. PassiveAggressiveClassifier\n",
|
||||||
|
"4. LinearSVC\n",
|
||||||
|
"5. SDGClassifer\n",
|
||||||
|
"6. HistGradientBoostingClassifier\n",
|
||||||
|
"7. MLPClassifier\n",
|
||||||
|
"8. RidgeClassifier\n",
|
||||||
|
"9. GradientBoostingClassifier\n",
|
||||||
|
"10. LinearDiscriminationAnalysis\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"We can also compare these models between datasets, to see if a bigger dataset always improves the performance.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"The final contenders are:\n",
|
||||||
|
"1.\n",
|
||||||
|
"2.\n",
|
||||||
|
"3.\n",
|
||||||
|
"4.\n",
|
||||||
|
"5.\n",
|
||||||
|
"\n",
|
||||||
|
"..."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user