Skip to content

Commit

Permalink
message
Browse files Browse the repository at this point in the history
  • Loading branch information
bhaddow committed Feb 19, 2024
1 parent f69293e commit 27c66a6
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions opuscleaner/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,20 +400,23 @@ def main():
raise RuntimeError(f"Unable to find corpus with basename: {corpus_id}")
#TODO:
# - Use downloader for multithreaded downloading (but need to set target dir)
# - Do not download if file existing files
# - Do not download if file exists

if hasattr(entry, "paths"):
for source_path in entry.paths:
LOG.debug(f"Copying from {source_path}")
shutil.copy(source_path,target_dir)
else:
LOG.debug(f"Queueing corpus {corpus_id}")
LOG.debug(f"Downloading corpus {corpus_id}")
get_bilingual_dataset(entry, target_dir)
#downloader.download(entry) # Currently downloads to DOWNLOAD_PATH
# # This does not work, because workers do not exit

#for thread in downloader.threads:
# thread.join()
#import time
#time.sleep(10)
#print(downloader.queue.get())



Expand Down

0 comments on commit 27c66a6

Please sign in to comment.