Skip to content

Commit

Permalink
Backport fc36155 to 34a3d38: Refactor translation feature and add new…
Browse files Browse the repository at this point in the history
… features description to README.
  • Loading branch information
CensoredUsername committed Aug 17, 2024
1 parent ac8a8b2 commit 2d2b9c7
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 100 deletions.
28 changes: 27 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ script files. It will not extract files from .rpa archives. For that, use
[rpatool](https://github.com/Shizmob/rpatool) or
[UnRPA](https://github.com/Lattyware/unrpa).

You are currently reading the README of the `legacy` version of this tool, which requires python 2
and targets Ren'py `6` and `7`.

## Status

master (python 3):[![Build Status](https://github.com/CensoredUsername/unrpyc/actions/workflows/python-app.yaml/badge.svg?branch=master)](https://github.com/CensoredUsername/unrpyc/actions/workflows/python-app.yaml)
Expand All @@ -17,7 +20,30 @@ legacy-dev (python 2):[![Build Status](https://github.com/CensoredUsername/unrpy

## Usage

This tool can either be ran as a command line tool, as a library, or injected into the game itself. For files from Ren'py 6 and 7, python 2.7 is required to run it as a command line tool.
This tool can either be ran as a command line tool, as a library, or injected into the game itself.
To use it as a command line tool, a local python 2 installation is required. To use it for its
default function (decompiling) you can simply pass it the files you want to decompile as arguments,
or pass it the folder containing them. For example, `python unrpyc.py file1.rpyc file2.rpyc` or
`python unrpyc.py folder/`

### Additional features

#### Translation:
For easier reading of decompiled script files, unrpyc can use translation data contained in a game
to automatically convert the emitted script files to another language. You can find the supported
languages for a game by looking in the `game/tl` folder of said game (`None` being the default)

To use this feature, simply pass the name of the target language (which has to match the name found
in the tl folder) with the `-t`/`--translate` option. For example, if a game has a folder
`path/to/renpyapp/game/tl/french`, then you can run the command:
`python unrpyc.py /path/to/renpyapp/ -t french`

#### Raw ast view:
Instead of decompiling, the tool can simply show the contents of a rpyc file. This is mainly useful
for bug reports and the development of unrpyc. You can pass the `-d`/`--dump` flag to activate this
feature.

Note: this generates a _lot_ of output.

## Compatibility

Expand Down
240 changes: 141 additions & 99 deletions unrpyc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def cpu_count():
import decompiler
import deobfuscate
from decompiler import astdump, translate
from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_safe_dump,
pickle_loads)
from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_loads)


class Context:
Expand Down Expand Up @@ -144,6 +143,20 @@ def read_ast_from_file(in_file, context):
return stmts


def get_ast(in_file, try_harder, context):
"""
Opens the rpyc file at path in_file to load the contained AST.
If try_harder is True, an attempt will be made to work around obfuscation techniques.
Else, it is loaded as a normal rpyc file.
"""
with open(in_file, 'rb') as in_file:
if try_harder:
ast = deobfuscate.read_ast(in_file, context)
else:
ast = read_ast_from_file(in_file, context)
return ast


def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, dump=False,
decompile_python=False, comparable=False, no_pyexpr=False, translator=None,
tag_outside_block=False, init_offset=False, sl_custom_names=None):
Expand All @@ -163,12 +176,7 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d
return

context.log('Decompiling %s to %s ...' % (input_filename, out_filename))

with open(input_filename, 'rb') as in_file:
if try_harder:
ast = deobfuscate.read_ast(in_file, context)
else:
ast = read_ast_from_file(in_file, context)
ast = get_ast(input_filename, try_harder, context)

with codecs.open(out_filename, 'w', encoding='utf-8') as out_file:
if dump:
Expand All @@ -182,42 +190,56 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d

context.set_state('ok')

def extract_translations(input_filename, language, context):
context.log("Extracting translations from %s..." % input_filename)

with open(input_filename, 'rb') as in_file:
ast = read_ast_from_file(in_file)
def worker_tl(arg_tup):
"""
This file implements the first pass of the translation feature. It gathers TL-data from the
given rpyc files, to be used by the common worker to translate while decompiling.
arg_tup is (args, filename). Returns the gathered TL data in the context.
"""
args, filename = arg_tup
context = Context()

try:
context.log('Extracting translations from %s...' % filename)
ast = get_ast(filename, args.try_harder, context)

tl_inst = translate.Translator(args.translate, True)
tl_inst.translate_dialogue(ast)

# this object has to be sent back to the main process, for which it needs to be pickled.
# the default pickler cannot pickle fake classes correctly, so manually handle that here.
context.set_result(pickle_safe_dumps((tl_inst.dialogue, tl_inst.strings)))
context.set_state("ok")

except Exception as e:
context.set_error(e)
context.log('Error while extracting translations from %s' % filename)
context.log(traceback.format_exc())

return context

translator = translate.Translator(language, True)
translator.translate_dialogue(ast)
# we pickle and unpickle this manually because the regular unpickler will choke on it
return pickle_safe_dumps(translator.dialogue), translator.strings

def worker_common(arg_tup):
"""
The core of unrpyc. arg_tup is (args, filename). This worker will unpack the file at filename,
decompile it, and write the output to it's corresponding rpy file.
"""

def worker(arg_tup):
(args, filename) = arg_tup
context = Context()

try:
if args.write_translation_file:
result = extract_translations(filename, args.language, context)
context.set_result(result)
if args.translator:
args.translator = pickle_loads(args.translator)

else:
if args.translation_file is not None:
translator = translate.Translator(None)
translator.language, translator.dialogue, translator.strings = (
pickle_loads(args.translations))
else:
translator = None

decompile_rpyc(
filename, context, args.clobber, try_harder=args.try_harder, dump=args.dump,
decompile_python=args.decompile_python, no_pyexpr=args.no_pyexpr,
comparable=args.comparable, translator=translator,
tag_outside_block=args.tag_outside_block, init_offset=args.init_offset,
sl_custom_names=args.sl_custom_names
)
try:
decompile_rpyc(
filename, context, args.clobber, try_harder=args.try_harder, dump=args.dump,
decompile_python=args.decompile_python, no_pyexpr=args.no_pyexpr,
comparable=args.comparable, translator=args.translator,
tag_outside_block=args.tag_outside_block, init_offset=args.init_offset,
sl_custom_names=args.sl_custom_names
)

except Exception, e:
context.set_error(e)
Expand All @@ -226,6 +248,39 @@ def worker(arg_tup):

return context


def run_workers(worker, common_args, private_args, parallelism):
"""
Runs worker in parallel using multiprocessing, with a max of `parallelism` processes.
Workers are called as worker((common_args, private_args[i])).
Workers should return an instance of `Context` as return value.
"""

worker_args = ((common_args, x) for x in private_args)

results = []
if parallelism > 1:
with Pool(parallelism) as pool:
for result in pool.imap(worker, worker_args, 1):
results.append(result)

for line in result.log_contents:
print(line)

print("")

else:
for result in map(worker, worker_args):
results.append(result)

for line in result.log_contents:
print(line)

print("")

return results


def parse_sl_custom_names(unparsed_arguments):
# parse a list of strings in the format
# classname=name-nchildren into {classname: (name, nchildren)}
Expand Down Expand Up @@ -283,15 +338,6 @@ def main():
help="use the specified number or processes to decompile."
"Defaults to the amount of hw threads available minus one, disabled when muliprocessing is unavailable.")

parser.add_argument('-t', '--translation-file', dest='translation_file', action='store', default=None,
help="use the specified file to translate during decompilation")

parser.add_argument('-T', '--write-translation-file', dest='write_translation_file', action='store', default=None,
help="store translations in the specified file instead of decompiling")

parser.add_argument('-l', '--language', dest='language', action='store', default=None,
help="if writing a translation file, the language of the translations to write")

parser.add_argument('--sl1-as-python', dest='decompile_python', action='store_true',
help="Only dumping and for decompiling screen language 1 screens. "
"Convert SL1 Python AST to Python code instead of dumping it or converting it to screenlang.")
Expand All @@ -300,6 +346,15 @@ def main():
help="Only for dumping, remove several false differences when comparing dumps. "
"This suppresses attributes that are different even when the code is identical, such as file modification times. ")

parser.add_argument(
'-t',
'--translate',
dest='translate',
type=str,
action='store',
help="Changes the dialogue language in the decompiled script files, using a translation "
"already present in the tl dir.")

parser.add_argument('--no-pyexpr', dest='no_pyexpr', action='store_true',
help="Only for dumping, disable special handling of PyExpr objects, instead printing them as strings. "
"This is useful when comparing dumps from different versions of Ren'Py. "
Expand Down Expand Up @@ -339,24 +394,10 @@ def main():
# Catch impossible arg combinations so they don't produce strange errors or fail silently
if (args.no_pyexpr or args.comparable) and not args.dump:
ap.error(
"Arguments 'comparable' and 'no_pyexpr' are not usable without 'dump'.")

if ((args.try_harder or args.dump)
and (args.write_translation_file or args.translation_file or args.language)):
ap.error(
"Arguments 'try_harder' and/or 'dump' are not usable with the translation "
"feature.")

# Fail early to avoid wasting time going through the files
if (args.write_translation_file
and not args.clobber
and path.exists(args.write_translation_file)):
ap.error(
"Output translation file already exists. Pass --clobber to overwrite.")
"Options '--comparable' and '--no_pyexpr' require '--dump'.")

if args.translation_file:
with open(args.translation_file, 'rb') as in_file:
args.translations = in_file.read()
if args.dump and args.translate:
ap.error("Options '--translate' and '--dump' cannot be used together.")

if args.sl_custom_names is not None:
try:
Expand Down Expand Up @@ -399,40 +440,41 @@ def glob_or_complain(s):
# If a big file starts near the end, there could be a long time with only one thread running,
# which is inefficient. Avoid this by starting big files first.
worklist.sort(key=lambda x: path.getsize(x), reverse=True)
worklist = [(args, x) for x in worklist]

results = []

if args.processes > 1:
with Pool(args.processes) as pool:
for result in pool.imap(worker, worklist, 1):
results.append(result)

for line in result.log_contnets:
print(line)

print("")

else:
for result in itertools.imap(worker, worklist):
results.append(result)

for line in result.log_contents:
print(line)

print("")

if args.write_translation_file:
print("Writing translations to %s..." % args.write_translation_file)
translated_dialogue = {}
translated_strings = {}
for result in results:
if not result.value:
continue
translated_dialogue.update(pickle_loads(result.value[0]))
translated_strings.update(result.value[1])
with open(args.write_translation_file, 'wb') as out_file:
pickle_safe_dump((args.language, translated_dialogue, translated_strings), out_file)
translation_errors = 0
args.translator = None
if args.translate:
# For translation, we first need to analyse all files for translation data.
# We then collect all of these back into the main process, and build a
# datastructure of all of them. This datastructure is then passed to
# all decompiling processes.
# Note: because this data contains some FakeClasses, Multiprocessing cannot
# pass it between processes (it pickles them, and pickle will complain about
# these). Therefore, we need to manually pickle and unpickle it.

print("Step 1: analysing files for translations.")
results = run_workers(worker_tl, args, worklist, args.processes)

print('Compiling extracted translations.')
tl_dialogue = {}
tl_strings = {}
for entry in results:
if entry.state != "ok":
translation_errors += 1

if entry.value:
new_dialogue, new_strings = pickle_loads(entry.value)
tl_dialogue.update(new_dialogue)
tl_strings.update(new_strings)

translator = translate.Translator(None)
translator.dialogue = tl_dialogue
translator.strings = tl_strings
args.translator = pickle_safe_dumps(translator)

print("Step 2: decompiling.")

results = run_workers(worker_common, args, worklist, args.processes)

success = sum(result.state == "ok" for result in results)
skipped = sum(result.state == "skip" for result in results)
Expand All @@ -446,10 +488,7 @@ def glob_or_complain(s):
print(55 * '-')
print("Processed %s" % plural_s(len(results), 'file'))

if args.write_translation_file:
print("> %s were successfully analyzed." % plural_s(success, 'file'))
else:
print("> %s were successfully decompiled." % plural_s(success, 'file'))
print("> %s were successfully decompiled." % plural_s(success, 'file'))

if broken:
print("> %s did not have the correct header, "
Expand All @@ -461,6 +500,9 @@ def glob_or_complain(s):
if skipped:
print("> %s were skipped as the output file already existed." % plural_s(skipped, 'file'))

if translation_errors:
print("> %s failed translation extraction." % plural_s(translation_errors, 'file'))


if skipped:
print("")
Expand Down

0 comments on commit 2d2b9c7

Please sign in to comment.