Skip to content

Commit

Permalink
Fix reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
wwxxzz committed Jan 20, 2025
1 parent 518ed62 commit fac878c
Showing 1 changed file with 20 additions and 24 deletions.
44 changes: 20 additions & 24 deletions src/pai_rag/tools/data_process/ops/parser_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(
else:
self.should_replace = False
logger.warning(
"Environment variables for paths are not fully set. Path replacement will be skipped."
"File path won't be replaced to data source URI since either INPUT_MOUNT_PATH or OSS_SOURCE_PATH is not provided."
)
logger.info(
f"""ParserActor [PaiDataReader] init finished with following parameters:
Expand All @@ -87,30 +87,25 @@ def __init__(
)

def replace_mount_with_real_path(self, document):
if not self.should_replace:
return document

try:
if self.should_replace:
file_path = document.metadata["file_path"]
file_path_obj = Path(file_path).resolve()
relative_path_str = (
file_path_obj.relative_to(self.mount_path).as_posix().strip("/")
)
document.metadata["file_path"] = f"{self.real_path}/{relative_path_str}"
document.metadata["mount_path"] = file_path
logger.debug(
f"Replacing path: {file_path} --> {document.metadata['file_path']}"
)
return document
except ValueError:
# file_path 不以 mount_path 开头
logger.debug(
f"Path {document.metadata['file_path']} does not start with mount path {self.mount_path}. No replacement done."
)
return document
except Exception as e:
logger.error(f"Error replacing path {document.metadata['file_path']}: {e}")
return document
try:
relative_path_str = (
file_path_obj.relative_to(self.mount_path).as_posix().strip("/")
)
document.metadata["file_path"] = f"{self.real_path}/{relative_path_str}"
document.metadata["mount_path"] = file_path
logger.debug(
f"Replacing original file_path: {file_path} --> {document.metadata['file_path']}"
)
except ValueError:
# file_path 不以 mount_path 开头
logger.debug(
f"Path {file_path} does not start with mount path {self.mount_path}. No replacement done."
)
except Exception as e:
logger.error(f"Error replacing path {file_path}: {e}")

def process(self, input_file):
current_thread = threading.current_thread()
Expand All @@ -119,4 +114,5 @@ def process(self, input_file):
if len(documents) == 0:
logger.info(f"No data found in the input file: {input_file}")
return None
return convert_document_to_dict(self.replace_mount_with_real_path(documents[0]))
self.replace_mount_with_real_path(documents[0])
return convert_document_to_dict(documents[0])

0 comments on commit fac878c

Please sign in to comment.