diff --git a/notebook.ipynb b/notebook.ipynb index 72f371a..7515a4e 100644 --- a/notebook.ipynb +++ b/notebook.ipynb @@ -6,18 +6,16 @@ "metadata": {}, "source": [ "# Machine Learning project in SoSe 2025 at HTW Saar\n", - "### Contributors\n", - " - Maximilian Kany\n", - " - Florian Speicher\n", - " - Tim Wall\n", + "## Idea\n", + "The goal of this project is getting the genre(s) of a game trough its given metadata\n", "\n", - "#### Dataset\n", - "For our project we use a steam DataSet from kaggle. You can find it under the following url: [Kaggle.com](https://www.kaggle.com/datasets/trolukovich/steam-games-complete-dataset)" + "## Dataset\n", + "For our project we use a Steam DataSet from kaggle. You can find it under the following URL: [Kaggle.com](https://www.kaggle.com/datasets/trolukovich/steam-games-complete-dataset)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 19, "id": "3116b75f", "metadata": {}, "outputs": [ @@ -25,133 +23,300 @@ "name": "stdout", "output_type": "stream", "text": [ - " 0 1 \\\n", - "0 url types \n", - "1 https://store.steampowered.com/app/379720/DOOM/ app \n", - "2 https://store.steampowered.com/app/578080/PLAY... app \n", - "3 https://store.steampowered.com/app/637090/BATT... app \n", - "4 https://store.steampowered.com/app/221100/DayZ/ app \n", + " url types \\\n", + "0 https://store.steampowered.com/app/379720/DOOM/ app \n", + "1 https://store.steampowered.com/app/578080/PLAY... app \n", + "2 https://store.steampowered.com/app/637090/BATT... app \n", + "3 https://store.steampowered.com/app/221100/DayZ/ app \n", + "4 https://store.steampowered.com/app/8500/EVE_On... app \n", "\n", - " 2 \\\n", - "0 name \n", - "1 DOOM \n", - "2 PLAYERUNKNOWN'S BATTLEGROUNDS \n", - "3 BATTLETECH \n", - "4 DayZ \n", + " name \\\n", + "0 DOOM \n", + "1 PLAYERUNKNOWN'S BATTLEGROUNDS \n", + "2 BATTLETECH \n", + "3 DayZ \n", + "4 EVE Online \n", "\n", - " 3 \\\n", - "0 desc_snippet \n", - "1 Now includes all three premium DLC packs (Unto... \n", - "2 PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya... \n", - "3 Take command of your own mercenary outfit of '... \n", - "4 The post-soviet country of Chernarus is struck... \n", + " desc_snippet \\\n", + "0 Now includes all three premium DLC packs (Unto... \n", + "1 PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya... \n", + "2 Take command of your own mercenary outfit of '... \n", + "3 The post-soviet country of Chernarus is struck... \n", + "4 EVE Online is a community-driven spaceship MMO... \n", "\n", - " 4 \\\n", - "0 recent_reviews \n", - "1 Very Positive,(554),- 89% of the 554 user revi... \n", - "2 Mixed,(6,214),- 49% of the 6,214 user reviews ... \n", - "3 Mixed,(166),- 54% of the 166 user reviews in t... \n", - "4 Mixed,(932),- 57% of the 932 user reviews in t... \n", + " recent_reviews \\\n", + "0 Very Positive,(554),- 89% of the 554 user revi... \n", + "1 Mixed,(6,214),- 49% of the 6,214 user reviews ... \n", + "2 Mixed,(166),- 54% of the 166 user reviews in t... \n", + "3 Mixed,(932),- 57% of the 932 user reviews in t... \n", + "4 Mixed,(287),- 54% of the 287 user reviews in t... \n", "\n", - " 5 6 \\\n", - "0 all_reviews release_date \n", - "1 Very Positive,(42,550),- 92% of the 42,550 use... May 12, 2016 \n", - "2 Mixed,(836,608),- 49% of the 836,608 user revi... Dec 21, 2017 \n", - "3 Mostly Positive,(7,030),- 71% of the 7,030 use... Apr 24, 2018 \n", - "4 Mixed,(167,115),- 61% of the 167,115 user revi... Dec 13, 2018 \n", + " all_reviews release_date \\\n", + "0 Very Positive,(42,550),- 92% of the 42,550 use... May 12, 2016 \n", + "1 Mixed,(836,608),- 49% of the 836,608 user revi... Dec 21, 2017 \n", + "2 Mostly Positive,(7,030),- 71% of the 7,030 use... Apr 24, 2018 \n", + "3 Mixed,(167,115),- 61% of the 167,115 user revi... Dec 13, 2018 \n", + "4 Mostly Positive,(11,481),- 74% of the 11,481 u... May 6, 2003 \n", "\n", - " 7 8 \\\n", - "0 developer publisher \n", - "1 id Software Bethesda Softworks,Bethesda Softworks \n", - "2 PUBG Corporation PUBG Corporation,PUBG Corporation \n", - "3 Harebrained Schemes Paradox Interactive,Paradox Interactive \n", - "4 Bohemia Interactive Bohemia Interactive,Bohemia Interactive \n", + " developer publisher \\\n", + "0 id Software Bethesda Softworks,Bethesda Softworks \n", + "1 PUBG Corporation PUBG Corporation,PUBG Corporation \n", + "2 Harebrained Schemes Paradox Interactive,Paradox Interactive \n", + "3 Bohemia Interactive Bohemia Interactive,Bohemia Interactive \n", + "4 CCP CCP,CCP \n", "\n", - " 9 \\\n", - "0 popular_tags \n", - "1 FPS,Gore,Action,Demons,Shooter,First-Person,Gr... \n", - "2 Survival,Shooter,Multiplayer,Battle Royale,PvP... \n", - "3 Mechs,Strategy,Turn-Based,Turn-Based Tactics,S... \n", - "4 Survival,Zombies,Open World,Multiplayer,PvP,Ma... \n", + " popular_tags \\\n", + "0 FPS,Gore,Action,Demons,Shooter,First-Person,Gr... \n", + "1 Survival,Shooter,Multiplayer,Battle Royale,PvP... \n", + "2 Mechs,Strategy,Turn-Based,Turn-Based Tactics,S... \n", + "3 Survival,Zombies,Open World,Multiplayer,PvP,Ma... \n", + "4 Space,Massively Multiplayer,Sci-fi,Sandbox,MMO... \n", "\n", - " 10 \\\n", - "0 game_details \n", - "1 Single-player,Multi-player,Co-op,Steam Achieve... \n", - "2 Multi-player,Online Multi-Player,Stats \n", - "3 Single-player,Multi-player,Online Multi-Player... \n", - "4 Multi-player,Online Multi-Player,Steam Worksho... \n", + " game_details \\\n", + "0 Single-player,Multi-player,Co-op,Steam Achieve... \n", + "1 Multi-player,Online Multi-Player,Stats \n", + "2 Single-player,Multi-player,Online Multi-Player... \n", + "3 Multi-player,Online Multi-Player,Steam Worksho... \n", + "4 Multi-player,Online Multi-Player,MMO,Co-op,Onl... \n", "\n", - " 11 12 \\\n", - "0 languages achievements \n", - "1 English,French,Italian,German,Spanish - Spain,... 54 \n", - "2 English,Korean,Simplified Chinese,French,Germa... 37 \n", - "3 English,French,German,Russian 128 \n", - "4 English,French,Italian,German,Spanish - Spain,... NaN \n", + " languages achievements \\\n", + "0 English,French,Italian,German,Spanish - Spain,... 54.0 \n", + "1 English,Korean,Simplified Chinese,French,Germa... 37.0 \n", + "2 English,French,German,Russian 128.0 \n", + "3 English,French,Italian,German,Spanish - Spain,... NaN \n", + "4 English,German,Russian,French NaN \n", "\n", - " 13 \\\n", - "0 genre \n", - "1 Action \n", - "2 Action,Adventure,Massively Multiplayer \n", - "3 Action,Adventure,Strategy \n", - "4 Action,Adventure,Massively Multiplayer \n", + " genre \\\n", + "0 Action \n", + "1 Action,Adventure,Massively Multiplayer \n", + "2 Action,Adventure,Strategy \n", + "3 Action,Adventure,Massively Multiplayer \n", + "4 Action,Free to Play,Massively Multiplayer,RPG,... \n", "\n", - " 14 \\\n", - "0 game_description \n", - "1 About This Game Developed by id software, the... \n", - "2 About This Game PLAYERUNKNOWN'S BATTLEGROUND... \n", - "3 About This Game From original BATTLETECH/Mec... \n", - "4 About This Game The post-soviet country of Ch... \n", + " game_description \\\n", + "0 About This Game Developed by id software, the... \n", + "1 About This Game PLAYERUNKNOWN'S BATTLEGROUND... \n", + "2 About This Game From original BATTLETECH/Mec... \n", + "3 About This Game The post-soviet country of Ch... \n", + "4 About This Game \n", "\n", - " 15 \\\n", - "0 mature_content \n", - "1 NaN \n", - "2 Mature Content Description The developers de... \n", + " mature_content \\\n", + "0 NaN \n", + "1 Mature Content Description The developers de... \n", + "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", - " 16 \\\n", - "0 minimum_requirements \n", - "1 Minimum:,OS:,Windows 7/8.1/10 (64-bit versions... \n", + " minimum_requirements \\\n", + "0 Minimum:,OS:,Windows 7/8.1/10 (64-bit versions... \n", + "1 Minimum:,Requires a 64-bit processor and opera... \n", "2 Minimum:,Requires a 64-bit processor and opera... \n", - "3 Minimum:,Requires a 64-bit processor and opera... \n", - "4 Minimum:,OS:,Windows 7/8.1 64-bit,Processor:,I... \n", + "3 Minimum:,OS:,Windows 7/8.1 64-bit,Processor:,I... \n", + "4 Minimum:,OS:,Windows 7,Processor:,Intel Dual C... \n", "\n", - " 17 18 \\\n", - "0 recommended_requirements original_price \n", - "1 Recommended:,OS:,Windows 7/8.1/10 (64-bit vers... $19.99 \n", - "2 Recommended:,Requires a 64-bit processor and o... $29.99 \n", - "3 Recommended:,Requires a 64-bit processor and o... $39.99 \n", - "4 Recommended:,OS:,Windows 10 64-bit,Processor:,... $44.99 \n", + " recommended_requirements original_price \\\n", + "0 Recommended:,OS:,Windows 7/8.1/10 (64-bit vers... $19.99 \n", + "1 Recommended:,Requires a 64-bit processor and o... $29.99 \n", + "2 Recommended:,Requires a 64-bit processor and o... $39.99 \n", + "3 Recommended:,OS:,Windows 10 64-bit,Processor:,... $44.99 \n", + "4 Recommended:,OS:,Windows 10,Processor:,Intel i... Free \n", "\n", - " 19 \n", - "0 discount_price \n", - "1 $14.99 \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\FlorianSpeicher\\AppData\\Local\\Temp\\ipykernel_38708\\931669033.py:4: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " cars = pd.read_csv(\"./steam_games.csv\",sep=\",\",header=None)\n" + " discount_price \n", + "0 $14.99 \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", + "\n", "# load data\n", - "cars = pd.read_csv(\"./steam_games.csv\",sep=\",\",header=None)\n", - "print(cars.head())\n" + "# url,types,name,desc_snippet,recent_reviews,all_reviews,release_date,developer,publisher,popular_tags,game_details,languages,achievements,genre,game_description,mature_content,minimum_requirements,recommended_requirements,original_price,discount_price\n", + "dataset = pd.read_csv(\"./steam_games.csv\",sep=\",\")\n", + "print(dataset.head())" + ] + }, + { + "cell_type": "markdown", + "id": "cba9750a", + "metadata": {}, + "source": [ + "## Preparation of the Training-Set\n", + "### Removing Uniques\n", + "We remove the following features from the Training-Set as they can uniquely identify a datapoint:\n", + "- URL\n", + "- Name of the Game\n", + "- Developer\n", + "- Publisher" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06dedcdf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " types desc_snippet \\\n", + "0 app Now includes all three premium DLC packs (Unto... \n", + "1 app PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya... \n", + "2 app Take command of your own mercenary outfit of '... \n", + "3 app The post-soviet country of Chernarus is struck... \n", + "4 app EVE Online is a community-driven spaceship MMO... \n", + "\n", + " recent_reviews \\\n", + "0 Very Positive,(554),- 89% of the 554 user revi... \n", + "1 Mixed,(6,214),- 49% of the 6,214 user reviews ... \n", + "2 Mixed,(166),- 54% of the 166 user reviews in t... \n", + "3 Mixed,(932),- 57% of the 932 user reviews in t... \n", + "4 Mixed,(287),- 54% of the 287 user reviews in t... \n", + "\n", + " all_reviews release_date \\\n", + "0 Very Positive,(42,550),- 92% of the 42,550 use... May 12, 2016 \n", + "1 Mixed,(836,608),- 49% of the 836,608 user revi... Dec 21, 2017 \n", + "2 Mostly Positive,(7,030),- 71% of the 7,030 use... Apr 24, 2018 \n", + "3 Mixed,(167,115),- 61% of the 167,115 user revi... Dec 13, 2018 \n", + "4 Mostly Positive,(11,481),- 74% of the 11,481 u... May 6, 2003 \n", + "\n", + " popular_tags \\\n", + "0 FPS,Gore,Action,Demons,Shooter,First-Person,Gr... \n", + "1 Survival,Shooter,Multiplayer,Battle Royale,PvP... \n", + "2 Mechs,Strategy,Turn-Based,Turn-Based Tactics,S... \n", + "3 Survival,Zombies,Open World,Multiplayer,PvP,Ma... \n", + "4 Space,Massively Multiplayer,Sci-fi,Sandbox,MMO... \n", + "\n", + " game_details \\\n", + "0 Single-player,Multi-player,Co-op,Steam Achieve... \n", + "1 Multi-player,Online Multi-Player,Stats \n", + "2 Single-player,Multi-player,Online Multi-Player... \n", + "3 Multi-player,Online Multi-Player,Steam Worksho... \n", + "4 Multi-player,Online Multi-Player,MMO,Co-op,Onl... \n", + "\n", + " languages achievements \\\n", + "0 English,French,Italian,German,Spanish - Spain,... 54.0 \n", + "1 English,Korean,Simplified Chinese,French,Germa... 37.0 \n", + "2 English,French,German,Russian 128.0 \n", + "3 English,French,Italian,German,Spanish - Spain,... NaN \n", + "4 English,German,Russian,French NaN \n", + "\n", + " genre \\\n", + "0 Action \n", + "1 Action,Adventure,Massively Multiplayer \n", + "2 Action,Adventure,Strategy \n", + "3 Action,Adventure,Massively Multiplayer \n", + "4 Action,Free to Play,Massively Multiplayer,RPG,... \n", + "\n", + " game_description \\\n", + "0 About This Game Developed by id software, the... \n", + "1 About This Game PLAYERUNKNOWN'S BATTLEGROUND... \n", + "2 About This Game From original BATTLETECH/Mec... \n", + "3 About This Game The post-soviet country of Ch... \n", + "4 About This Game \n", + "\n", + " mature_content \\\n", + "0 NaN \n", + "1 Mature Content Description The developers de... \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " minimum_requirements \\\n", + "0 Minimum:,OS:,Windows 7/8.1/10 (64-bit versions... \n", + "1 Minimum:,Requires a 64-bit processor and opera... \n", + "2 Minimum:,Requires a 64-bit processor and opera... \n", + "3 Minimum:,OS:,Windows 7/8.1 64-bit,Processor:,I... \n", + "4 Minimum:,OS:,Windows 7,Processor:,Intel Dual C... \n", + "\n", + " recommended_requirements original_price \\\n", + "0 Recommended:,OS:,Windows 7/8.1/10 (64-bit vers... $19.99 \n", + "1 Recommended:,Requires a 64-bit processor and o... $29.99 \n", + "2 Recommended:,Requires a 64-bit processor and o... $39.99 \n", + "3 Recommended:,OS:,Windows 10 64-bit,Processor:,... $44.99 \n", + "4 Recommended:,OS:,Windows 10,Processor:,Intel i... Free \n", + "\n", + " discount_price \n", + "0 $14.99 \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n" + ] + } + ], + "source": [ + "# types,desc_snippet,recent_reviews,all_reviews,release_date,popular_tags,game_details,languages,achievements,genre,game_description,mature_content,minimum_requirements,recommended_requirements,original_price,discount_price\n", + "dataset.drop(['url', 'name', 'developer', 'publisher'], axis=1, inplace=True)\n", + "print(dataset.head())" + ] + }, + { + "cell_type": "markdown", + "id": "f5436c87", + "metadata": {}, + "source": [ + "### Structurize Text\n", + "**TODO: check if makes sense**\n", + "The dataset holds a lot of unstructured data, we use Term Frequency-Inverse Document Frequency to structurize most Text-Features.\n", + "It is important to use an new Instance for each feature so they don't overlap with each other. \n", + "\n", + "### Standardize Numbers\n", + "We standardize the prices so they can " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8b407c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1. 1. 1.]]\n" + ] + } + ], + "source": [ + "from sklearn.compose import make_column_transformer\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "# types,desc_snippet,recent_reviews,all_reviews,release_date,popular_tags,game_details,languages,achievements,genre,game_description,mature_content,minimum_requirements,recommended_requirements,original_price,discount_price\n", + "column_transformer = make_column_transformer(\n", + " (TfidfVectorizer(stop_words='english'), ['desc_snippet']),\n", + " (TfidfVectorizer(stop_words='english'), ['mature_content']),\n", + " (TfidfVectorizer(stop_words='english'), ['game_description']),\n", + " (StandardScaler(), ['original_price','discount_price']) # use the same scaling for both\n", + " ('passthrough', ['price']),\n", + " #TODO: add transformer for every feature @flo @max\n", + " #TODO: check why not working\n", + ")\n", + "#\n", + "dataset2 = column_transformer.fit_transform(dataset)\n", + "print(dataset2)" + ] + }, + { + "cell_type": "markdown", + "id": "ad84e777", + "metadata": {}, + "source": [ + "\n", + "### Removing Bundles\n", + "**(TODO: decide whether yes or no), not as important as i thought**\n", + "As bundles don't have clear genre(s) defined (e.g. publisher bundles )" ] } ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -165,7 +330,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.13.3" } }, "nbformat": 4,