diff --git a/CHANGES.rst b/CHANGES.rst index 656718c..614198b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,12 +4,18 @@ Features -------- +- Includes initial support for ``retool`` compilations - Added Game Boy Advance - ROMPatcher now supports RomPatcher.js Fixes ----- +ROMChooser +~~~~~~~~~~ + +- Fixed bug where versions weren't parsed correctly + ROMCleaner ~~~~~~~~~~ diff --git a/docs/1g1r.rst b/docs/1g1r.rst index f14bd62..99d1dd1 100644 --- a/docs/1g1r.rst +++ b/docs/1g1r.rst @@ -38,6 +38,9 @@ There are also some demotions that go on. The priority is (from most to least de * Alternate versions * Demoted versions (e.g. arcade versions) +Because some ROMs are compilations, if we end up with two equal scores given everything above, the single game will +be preferred over the compilation. + End result ---------- diff --git a/romsearch/configs/regex.yml b/romsearch/configs/regex.yml index 175e35b..7f751e4 100644 --- a/romsearch/configs/regex.yml +++ b/romsearch/configs/regex.yml @@ -675,6 +675,10 @@ kaiser: pattern: "\\(Kaiser\\)" group: "demoted_version" +ka_sheng: + pattern: "\\(Ka Sheng\\)" + group: "demoted_version" + kickstarter: pattern: "\\(Kickstarter\\)" group: "demoted_version" diff --git a/romsearch/modules/dupeparser.py b/romsearch/modules/dupeparser.py index 1fbc257..77b9500 100644 --- a/romsearch/modules/dupeparser.py +++ b/romsearch/modules/dupeparser.py @@ -226,16 +226,17 @@ def get_dat_dupes(self, dupe_dict=None): if parent_game_name == clone_short_name: continue - found_parent_name = get_parent_name( + found_parent_names = get_parent_name( game_name=parent_game_name, dupe_dict=dupe_dict, ) - if found_parent_name not in dupe_dict: - dupe_dict[found_parent_name] = {} + for found_parent_name in found_parent_names: + if found_parent_name not in dupe_dict: + dupe_dict[found_parent_name] = {} - # Don't overwrite priority if it's already set - if clone_short_name not in dupe_dict[found_parent_name]: - dupe_dict[found_parent_name][clone_short_name] = {"priority": 1} + # Don't overwrite priority if it's already set + if clone_short_name not in dupe_dict[found_parent_name]: + dupe_dict[found_parent_name][clone_short_name] = {"priority": 1} return dupe_dict @@ -269,15 +270,30 @@ def get_retool_dupes(self, dupe_dict=None): regex_config=self.regex_config, ) - found_parent_name = get_parent_name( + found_parent_names = get_parent_name( game_name=group_parsed, dupe_dict=dupe_dict, ) - if found_parent_name not in dupe_dict: - dupe_dict[found_parent_name] = {} - for i, g in enumerate(group_titles): - dupe_dict[found_parent_name][g] = {"priority": priorities[i]} + for found_parent_name in found_parent_names: + if found_parent_name not in dupe_dict: + dupe_dict[found_parent_name] = {} + + for i, g in enumerate(group_titles): + dupe_dict[found_parent_name][g] = {"priority": priorities[i]} + + # Next, check for compilations. If we have them, pull them out and optionally the title position + if "compilations" in retool_dupe: + for compilation in retool_dupe["compilations"]: + comp_g = compilation["searchTerm"] + title_pos = compilation.get("titlePosition", None) + priority = compilation.get("priority", 1) + + dupe_dict[found_parent_name][comp_g] = { + "is_compilation": True, + "priority": priority, + "title_pos": title_pos, + } return dupe_dict, retool_dupes diff --git a/romsearch/modules/gamefinder.py b/romsearch/modules/gamefinder.py index 7edd21f..483157f 100644 --- a/romsearch/modules/gamefinder.py +++ b/romsearch/modules/gamefinder.py @@ -15,6 +15,12 @@ load_json, ) +DUPE_DEFAULT = { + "is_compilation": False, + "priority": 1, + "title_pos": None +} + def get_all_games( files, @@ -35,12 +41,22 @@ def get_all_games( return games -def get_priority(dupe_dict, parent_name, game_name): - """Get priority from a dupe dictionary""" +def get_dupe_entry( + dupe_dict, + parent_name, + game_name, +): + """Get dupe entry from a dupe dictionary + + Args: + dupe_dict (dict): dupe dictionary + parent_name (str): parent game name + game_name (str): game name + """ # First case: parent name doesn't exist in the dupe dict if parent_name not in dupe_dict: - return 1 + return DUPE_DEFAULT # Second case: it does (potentially can be lowercase) dupes = [dupe.lower() for dupe in dupe_dict[parent_name]] @@ -48,12 +64,13 @@ def get_priority(dupe_dict, parent_name, game_name): if game_name.lower() in dupes: found_parent_idx = dupes.index(game_name.lower()) - priority = dupe_dict[parent_name][reg_dupes[found_parent_idx]]["priority"] - return priority + dupe_entry = dupe_dict[parent_name][reg_dupes[found_parent_idx]] + + return dupe_entry - # Otherwise, just return 1 - return 1 + # Otherwise, return defaults + return DUPE_DEFAULT class GameFinder: @@ -63,6 +80,7 @@ def __init__( platform, config_file=None, config=None, + dupe_dict=None, default_config=None, regex_config=None, logger=None, @@ -78,6 +96,7 @@ def __init__( platform (str): Platform name config_file (str, optional): Path to config file. Defaults to None. config (dict, optional): Configuration dictionary. Defaults to None. + dupe_dict (dict, optional): Dupe dictionary. Defaults to None. default_config (dict, optional): Default configuration dictionary. Defaults to None. regex_config (dict, optional): Dictionary of regex config. Defaults to None. logger (logging.Logger, optional): Logger instance. Defaults to None. @@ -126,6 +145,7 @@ def __init__( self.regex_config = regex_config # Info for dupes + self.dupe_dict = dupe_dict self.dupe_dir = config.get("dirs", {}).get("dupe_dir", None) self.filter_dupes = config.get("gamefinder", {}).get("filter_dupes", True) @@ -289,45 +309,64 @@ def get_game_matches( def get_filter_dupes(self, games): """Parse down a list of files based on an input dupe list""" - if self.dupe_dir is None: - raise ValueError("dupe_dir must be specified if filtering dupes") - - dupe_file = os.path.join(self.dupe_dir, f"{self.platform} (dupes).json") - if not os.path.exists(dupe_file): - self.logger.warning(f"{self.log_line_sep * self.log_line_length}") - self.logger.warning( - centred_string("No dupe files found", total_length=self.log_line_length) + if self.dupe_dict is None and self.dupe_dir is None: + raise ValueError( + "dupe_dict or dupe_dir must be specified if filtering dupes" ) - self.logger.warning(f"{self.log_line_sep * self.log_line_length}") - return None - game_dict = {} + if self.dupe_dict is None: + dupe_file = os.path.join(self.dupe_dir, f"{self.platform} (dupes).json") + if not os.path.exists(dupe_file): + self.logger.warning(f"{self.log_line_sep * self.log_line_length}") + self.logger.warning( + centred_string( + "No dupe files found", total_length=self.log_line_length + ) + ) + self.logger.warning(f"{self.log_line_sep * self.log_line_length}") + return None + self.dupe_dict = load_json(dupe_file) - dupes = load_json(dupe_file) + game_dict = {} - # Loop over games, and the dupes dictionary. Also pull out priority + # Loop over games, and the dupes dictionary. Also pull out various other important info for g in games: - found_parent_name = get_parent_name( + # Because we have compilations, these can be lists + found_parent_names = get_parent_name( game_name=g, - dupe_dict=dupes, + dupe_dict=self.dupe_dict, ) - found_parent_name_lower = found_parent_name.lower() - game_dict_keys = [key for key in game_dict.keys()] - game_dict_keys_lower = [key.lower() for key in game_dict.keys()] + for found_parent_name in found_parent_names: - if found_parent_name_lower not in game_dict_keys_lower: - game_dict[found_parent_name] = {} - final_parent_name = copy.deepcopy(found_parent_name) - else: - final_parent_idx = game_dict_keys_lower.index(found_parent_name_lower) - final_parent_name = game_dict_keys[final_parent_idx] + found_parent_name_lower = found_parent_name.lower() + game_dict_keys = [key for key in game_dict.keys()] + game_dict_keys_lower = [key.lower() for key in game_dict.keys()] - priority = get_priority( - dupe_dict=dupes, parent_name=found_parent_name, game_name=g - ) + if found_parent_name_lower not in game_dict_keys_lower: + game_dict[found_parent_name] = {} + final_parent_name = copy.deepcopy(found_parent_name) + else: + final_parent_idx = game_dict_keys_lower.index(found_parent_name_lower) + final_parent_name = game_dict_keys[final_parent_idx] + + dupe_entry = get_dupe_entry( + dupe_dict=self.dupe_dict, + parent_name=found_parent_name, + game_name=g, + ) + + # We want to make sure we also don't duplicate on the names being upper/lowercase + g_names = [g_dict for g_dict in game_dict[final_parent_name]] + g_names_lower = [g_name.lower() for g_name in g_names] + if g.lower() in g_names_lower: + g_idx = g_names_lower.index(g.lower()) + g = g_names[g_idx] + + if g not in game_dict[final_parent_name]: + game_dict[final_parent_name][g] = {} - game_dict[final_parent_name][g] = {"priority": priority} + game_dict[final_parent_name][g].update(dupe_entry) return game_dict diff --git a/romsearch/modules/romchooser.py b/romsearch/modules/romchooser.py index ffdae4e..d668d8b 100644 --- a/romsearch/modules/romchooser.py +++ b/romsearch/modules/romchooser.py @@ -131,7 +131,7 @@ def add_versioned_score(files, rom_dict, key): rom_dict[f][key] = get_sanitized_version(rom_dict[f][key]) versions = [version.parse(rom_dict[f][key]) for f in files] - versions_sorted = sorted(versions) + versions_sorted = np.unique(sorted(versions)) file_scores_version = np.zeros(len(files)) for i, v in enumerate(versions_sorted): @@ -537,19 +537,20 @@ def get_best_roms( """Get the best ROM(s) from a list, using a scoring system""" # Positive scores - improved_version_score = 1 - version_score = 1e2 - revision_score = 1e4 - budget_edition_score = 1e6 - language_score = 1e8 - region_score = 1e10 - cheevo_score = 1e12 + improved_version_score = 1e2 + version_score = 1e4 + revision_score = 1e6 + budget_edition_score = 1e8 + language_score = 1e10 + region_score = 1e12 + cheevo_score = 1e14 # Negative scores - demoted_version_score = -1 - alternate_version_score = -1 - modern_version_score = -1e2 - priority_score = -1e4 + compilation_score = -1 + demoted_version_score = -1e2 + alternate_version_score = -1e2 + modern_version_score = -1e4 + priority_score = -1e6 file_scores = np.zeros(len(files)) @@ -597,6 +598,11 @@ def get_best_roms( # Negative scores + # Compilation score + file_scores += compilation_score * np.array( + [rom_dict[f].get("is_compilation", False) for f in files] + ) + # Demoted version file_scores += demoted_version_score * np.array( [int(rom_dict[f]["demoted_version"]) for f in files] @@ -612,7 +618,7 @@ def get_best_roms( [int(rom_dict[f]["modern_version"]) for f in files] ) - # Priority scoring. We subtract 1 so that the highest priority has no changed + # Priority scoring. We subtract 1 so that the highest priority has no change file_scores += priority_score * ( np.array([int(rom_dict[f]["priority"]) for f in files]) - 1 ) diff --git a/romsearch/modules/romcleaner.py b/romsearch/modules/romcleaner.py index cdba82b..27d0f21 100644 --- a/romsearch/modules/romcleaner.py +++ b/romsearch/modules/romcleaner.py @@ -9,7 +9,8 @@ load_yml, setup_logger, load_json, - save_json, ) + save_json, +) class ROMCleaner: @@ -205,7 +206,8 @@ def clean_roms( os.remove(rom_on_disk) self.logger.info( centred_string( - f"Removed {rom_short} from disk", total_length=self.log_line_length + f"Removed {rom_short} from disk", + total_length=self.log_line_length, ) ) @@ -251,7 +253,8 @@ def clean_roms( self.logger.info( centred_string( - f"Removed {d_i_to_remove} from cache", total_length=self.log_line_length + f"Removed {d_i_to_remove} from cache", + total_length=self.log_line_length, ) ) diff --git a/romsearch/modules/romparser.py b/romsearch/modules/romparser.py index 5d8a46f..789e356 100644 --- a/romsearch/modules/romparser.py +++ b/romsearch/modules/romparser.py @@ -15,6 +15,9 @@ ) DICT_DEFAULT_VALS = {"bool": False, "str": "", "list": []} +USE_TITLE_POS = [ + "languages", +] def find_pattern(regex, search_str, group_number=0): @@ -30,14 +33,34 @@ def find_pattern(regex, search_str, group_number=0): return regex_search_str -def get_pattern_val(regex, tag, regex_type, pattern_mappings=None): - """Get values out from a regex pattern, optionally mapping back to something more readable for lists""" +def get_pattern_val( + regex, + tag, + regex_type, + pattern_mappings=None, + title_pos=None, + use_title_pos=False, +): + """Get values out from a regex pattern, optionally mapping back to something more readable for lists + + Args: + regex: Regex pattern + tag: Found tag + regex_type: Regex pattern type. Can be str, bool, list + pattern_mappings: Mapping from regex pattern to more readable values + title_pos: Position of title for compilations. Defaults to None + use_title_pos: Use title_pos? Defaults to False + """ pattern_string = find_pattern(regex, tag) if pattern_string is not None: pattern_string = pattern_string.strip("()") + # Split out to the specific languages, but only if they're marked correctly + if title_pos is not None and use_title_pos and "+" in pattern_string: + pattern_string = pattern_string.split("+")[title_pos - 1] + if regex_type == "bool": pattern_val = True elif regex_type == "str": @@ -63,6 +86,7 @@ def get_pattern_val(regex, tag, regex_type, pattern_mappings=None): return pattern_val + def is_ra_subset(name): """Check if a name is a RetroAchievements subset @@ -79,6 +103,7 @@ def is_ra_subset(name): return is_subset + def check_match(i, j, checks_passed=None): """Check if two bools/strings/lists match @@ -261,7 +286,7 @@ def run( ): """Run the ROM parser""" - game_dict = {} + game_dict = copy.deepcopy(files) self.logger.debug(f"{self.log_line_sep * self.log_line_length}") self.logger.debug( @@ -272,23 +297,38 @@ def run( self.logger.debug(f"{self.log_line_sep * self.log_line_length}") for f in files: - game_dict[f] = self.parse_file(f) - # Include the priority - game_dict[f]["priority"] = files[f]["priority"] + # # Get the potential title position out for compilations + title_pos = files[f].get("title_pos", None) + + f_parsed = self.parse_file( + f, + title_pos=title_pos, + ) + game_dict[f].update(f_parsed) return game_dict def parse_file( self, f, + title_pos=None, ): - """Parse useful info out of a specific file""" + """Parse useful info out of a specific file + + Args: + f (str): file name + title_pos (int, optional): Title position for compilations. Defaults to None. + """ file_dict = {} if self.use_filename: - file_dict = self.parse_filename(f, file_dict) + file_dict = self.parse_filename( + f, + title_pos=title_pos, + file_dict=file_dict, + ) if self.use_retool: file_dict = self.parse_retool(f, file_dict) @@ -767,10 +807,11 @@ def get_parsed_match( if not ra_checks_passed: continue - ra_checks_passed = check_match(m_parsed[check], - r_parsed[check], - checks_passed=ra_checks_passed, - ) + ra_checks_passed = check_match( + m_parsed[check], + r_parsed[check], + checks_passed=ra_checks_passed, + ) # After this first pass, also see if any of the regex checks are grouped, # and double-check the sublevel below. This is because we could have e.g. @@ -783,12 +824,15 @@ def get_parsed_match( if not ra_checks_passed: continue - r_c_group = self.regex_config[r_c].get("group", None) + r_c_group = self.regex_config[r_c].get( + "group", None + ) if r_c_group == check: - ra_checks_passed = check_match(m_parsed[r_c], - r_parsed[r_c], - checks_passed=ra_checks_passed, - ) + ra_checks_passed = check_match( + m_parsed[r_c], + r_parsed[r_c], + checks_passed=ra_checks_passed, + ) if ra_checks_passed: @@ -865,8 +909,20 @@ def set_implicit_languages( return file_dict - def parse_filename(self, f, file_dict=None): - """Parse info out of filename""" + def parse_filename( + self, + f, + title_pos=None, + file_dict=None, + ): + """Parse info out of filename + + Args: + f (str): filename + title_pos (int): Title position for compilations. Defaults to None + file_dict (dict): Existing file dictionary. Defaults to None, which + will create an empty one + """ if file_dict is None: file_dict = {} @@ -876,6 +932,11 @@ def parse_filename(self, f, file_dict=None): for regex_key in self.regex_config: + # Are we potentially using the title position? + use_title_pos = False + if regex_key in USE_TITLE_POS: + use_title_pos = True + regex_type = self.regex_config[regex_key].get("type", "bool") search_tags = self.regex_config[regex_key].get("search_tags", True) group = self.regex_config[regex_key].get("group", None) @@ -933,6 +994,8 @@ def parse_filename(self, f, file_dict=None): tag, regex_type, pattern_mappings=pattern_mappings, + title_pos=title_pos, + use_title_pos=use_title_pos, ) if pattern_string is not None: @@ -945,7 +1008,12 @@ def parse_filename(self, f, file_dict=None): found_tag = True else: pattern_string = get_pattern_val( - regex, f, regex_type, pattern_mappings=pattern_mappings + regex, + f, + regex_type, + pattern_mappings=pattern_mappings, + title_pos=title_pos, + use_title_pos=use_title_pos, ) if pattern_string is not None: file_dict[regex_key] = pattern_string diff --git a/romsearch/modules/rompatcher.py b/romsearch/modules/rompatcher.py index dd7d352..bcd46d8 100644 --- a/romsearch/modules/rompatcher.py +++ b/romsearch/modules/rompatcher.py @@ -229,11 +229,12 @@ def download_patch_file( return patch_file - def patch_rom(self, - unpatched_file, - patch_file, - patch_dir, - ): + def patch_rom( + self, + unpatched_file, + patch_file, + patch_dir, + ): """Patch a ROM Args: @@ -263,7 +264,9 @@ def patch_rom(self, ) elif patch_method == "rompatcher.js": - rompatcher_js_file = f"{unpatch_file_split[0]} (patched){unpatch_file_split[1]}" + rompatcher_js_file = ( + f"{unpatch_file_split[0]} (patched){unpatch_file_split[1]}" + ) self.rompatcher_js_patch( unpatched_file=unpatched_file, @@ -357,7 +360,9 @@ def rompatcher_js_patch( patch_dir (str): Patch directory """ - rompatcher_js_path = self.config.get("rompatcher", {}).get("rompatcher_js_path", None) + rompatcher_js_path = self.config.get("rompatcher", {}).get( + "rompatcher_js_path", None + ) if rompatcher_js_path is None: raise ValueError("Path to RomPatcher.js needs to be defined in user config") @@ -398,7 +403,7 @@ def rompatcher_js_patch( os.chdir(patch_dir) with subprocess.Popen( - cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) as process: for line in process.stdout: diff --git a/romsearch/modules/romsearch.py b/romsearch/modules/romsearch.py index 51a5577..37b6392 100644 --- a/romsearch/modules/romsearch.py +++ b/romsearch/modules/romsearch.py @@ -189,6 +189,7 @@ def run( dat_dict = dat_parser.run() # Get dupes here, if we're doing that + dupe_dict = None retool_dict = None if self.run_dupeparser: dupe_parser = DupeParser( @@ -199,7 +200,7 @@ def run( logger=self.logger, log_line_length=log_line_length, ) - _, retool_dict = dupe_parser.run() + dupe_dict, retool_dict = dupe_parser.run() if self.romsearch_method == "download_then_filter": # Run the rclone sync @@ -262,6 +263,7 @@ def run( finder = GameFinder( platform=platform, config=self.config, + dupe_dict=dupe_dict, default_config=self.default_config, regex_config=self.regex_config, logger=self.logger, @@ -287,25 +289,33 @@ def run( for i, game in enumerate(all_games): - # Parse by the short name, include the priority in there as well rom_files = {} - games_lower = [g.lower() for g in all_games[game]] - priorities = [all_games[game][g]["priority"] for g in all_games[game]] + # We check by a lowercase version of the short name for f in all_file_dict: - file_short_name_lower = all_file_dict[f]["short_name"].lower() - if file_short_name_lower in games_lower: - games_idx = games_lower.index(file_short_name_lower) - rom_files[f] = { - "priority": priorities[games_idx], - } - - if all_file_dict[f]["matched"]: - raise ValueError( - f"{f} has already been matched! This should not happen" - ) - - all_file_dict[f]["matched"] = True + f_lower = all_file_dict[f]["short_name"].lower() + for g in all_games[game]: + + g_lower = g.lower() + + if f_lower == g_lower: + + # Update the dictionary as appropriate + if f not in rom_files: + rom_files[f] = {} + rom_files[f].update(all_games[game][g]) + + # If we're duplicating a match, and it's not part of a compilation, freak out + is_compilation = all_games[game][g].get("is_compilation", False) + if all_file_dict[f]["matched"] and not is_compilation: + self.logger.warning( + centred_string(f"{f} has already been matched! " + f"This should not generally happen", + total_length=log_line_length, + ) + ) + + all_file_dict[f]["matched"] = True parse = ROMParser( platform=platform, diff --git a/romsearch/util/general.py b/romsearch/util/general.py index 3082356..04d8d3e 100644 --- a/romsearch/util/general.py +++ b/romsearch/util/general.py @@ -1,4 +1,5 @@ import copy +import numpy as np import os import time from datetime import datetime @@ -20,7 +21,14 @@ def get_parent_name( game_name, dupe_dict, ): - """Get the parent name recursively searching through a dupe dict""" + """Get the parent name(s) recursively searching through a dupe dict + + Because we can have compilations, find all cases where things match up + + Args: + game_name (str): game name to find parents for + dupe_dict (dict): dupe dict to search through + """ # We do this by lowercase checking reg_dupes = [g for g in dupe_dict] @@ -31,33 +39,34 @@ def get_parent_name( found_dupe = False - found_parent_name = None + found_parent_names = [] # First, just check the dupes if game_name.lower() in all_dupes: - found_idx = all_dupes.index(game_name.lower()) - found_parent_name = reg_dupes[found_idx] + found_idx = np.where(np.asarray(all_dupes) == game_name.lower())[0] + found_parent_names = [reg_dupes[i] for i in found_idx] found_dupe = True # Check all the clones within the dupes else: for i, clone in enumerate(all_clones): - if found_dupe: - continue clone = [c.lower() for c in clone] if game_name.lower() in clone: - found_parent_name = reg_dupes[i] + found_parent_names.append(reg_dupes[i]) found_dupe = True if not found_dupe: - found_parent_name = copy.deepcopy(game_name) + found_parent_names = copy.deepcopy(game_name) - if found_parent_name is None: + if found_parent_names is None: raise ValueError("Could not find a parent name!") - return found_parent_name + if not isinstance(found_parent_names, list): + found_parent_names = [found_parent_names] + + return found_parent_names def get_file_time( diff --git a/romsearch/util/logger.py b/romsearch/util/logger.py index 2916b58..5fef092 100644 --- a/romsearch/util/logger.py +++ b/romsearch/util/logger.py @@ -38,7 +38,7 @@ def setup_logger( else: log_dir = os.path.join(log_dir, script_name, *additional_dir) - if log_level not in ["debug", "info", "critical"]: + if log_level not in ["debug", "info", "warning", "critical"]: log_level = "info" print(f"Invalid log level '{log_level}', defaulting to 'info'") @@ -74,6 +74,8 @@ def setup_logger( logger.setLevel(logging.DEBUG) elif log_level == "INFO": logger.setLevel(logging.INFO) + elif log_level == "WARNING": + logger.setLevel(logging.WARNING) elif log_level == "CRITICAL": logger.setLevel(logging.CRITICAL) else: