Skip to content

Commit

Permalink
fixes: progress bar for merge_datasets (#445)
Browse files Browse the repository at this point in the history
  • Loading branch information
bhimrazy authored Jan 9, 2025
1 parent 04e77c0 commit 6bdafce
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/litdata/processing/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,7 @@ def optimize(

if not _IS_IN_STUDIO and (machine is not None or num_nodes is not None):
raise ValueError(
"Only https://lightning.ai/ supports multiple nodes or selecting a machine."
"Create an account to try it out."
"Only https://lightning.ai/ supports multiple nodes or selecting a machine.Create an account to try it out."
)

if not _IS_IN_STUDIO:
Expand Down Expand Up @@ -590,10 +589,13 @@ def merge_datasets(input_dirs: List[str], output_dir: str, max_workers: Optional

with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures: List[concurrent.futures.Future] = []
for copy_info in _tqdm(copy_infos):
for copy_info in copy_infos:
future = executor.submit(_apply_copy, copy_info, resolved_output_dir)
futures.append(future)

for future in _tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
future.result()

_save_index(index_json, resolved_output_dir)


Expand Down

0 comments on commit 6bdafce

Please sign in to comment.