From a71a201f05dfe9e14afb8369a198ccb2e0582bbe Mon Sep 17 00:00:00 2001 From: weiyu-huang Date: Fri, 27 Dec 2024 17:32:25 -0800 Subject: [PATCH 1/3] Refactor metadata handling using setdefault --- steps/etl/crawl_links.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/steps/etl/crawl_links.py b/steps/etl/crawl_links.py index b1d71b8..4929acb 100644 --- a/steps/etl/crawl_links.py +++ b/steps/etl/crawl_links.py @@ -46,9 +46,8 @@ def _crawl_link(dispatcher: CrawlerDispatcher, link: str, user: UserDocument) -> def _add_to_metadata(metadata: dict, domain: str, successfull_crawl: bool) -> dict: - if domain not in metadata: - metadata[domain] = {} - metadata[domain]["successful"] = metadata.get(domain, {}).get("successful", 0) + successfull_crawl - metadata[domain]["total"] = metadata.get(domain, {}).get("total", 0) + 1 + metadata.setdefault(domain, {"successful": 0, "total": 0}) + metadata[domain]["successful"] += successfull_crawl + metadata[domain]["total"] += 1 return metadata From 524d4a9269d21302b236777d8b44b12cf314f86e Mon Sep 17 00:00:00 2001 From: weiyu-huang Date: Fri, 27 Dec 2024 23:41:58 -0800 Subject: [PATCH 2/3] refactor: remove redundant UUID conversion in to_mongo Move UUID string conversion to model_dump to avoid duplicate logic. UUID conversion now happens once during initial dump instead of repeating in to_mongo. --- llm_engineering/domain/base/nosql.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llm_engineering/domain/base/nosql.py b/llm_engineering/domain/base/nosql.py index d24819d..2237aa1 100644 --- a/llm_engineering/domain/base/nosql.py +++ b/llm_engineering/domain/base/nosql.py @@ -49,10 +49,6 @@ def to_mongo(self: T, **kwargs) -> dict: if "_id" not in parsed and "id" in parsed: parsed["_id"] = str(parsed.pop("id")) - for key, value in parsed.items(): - if isinstance(value, uuid.UUID): - parsed[key] = str(value) - return parsed def model_dump(self: T, **kwargs) -> dict: From a8e6fc5cfeb9cdfc0b8da115297ea46f7f07de4b Mon Sep 17 00:00:00 2001 From: weiyu-huang Date: Fri, 27 Dec 2024 23:44:06 -0800 Subject: [PATCH 3/3] refactor: update type hints to use union syntax Change Optional[str] to str | None for improved readability --- llm_engineering/domain/documents.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llm_engineering/domain/documents.py b/llm_engineering/domain/documents.py index 8084f93..01aa3d7 100644 --- a/llm_engineering/domain/documents.py +++ b/llm_engineering/domain/documents.py @@ -1,5 +1,4 @@ from abc import ABC -from typing import Optional from pydantic import UUID4, Field @@ -35,7 +34,7 @@ class Settings: class PostDocument(Document): - image: Optional[str] = None + image: str | None = None link: str | None = None class Settings: