From eea04e4effc6ecfbe6372583dbda83bb106f18e5 Mon Sep 17 00:00:00 2001 From: AlexCXC <1223408988@qq.com> Date: Mon, 16 Oct 2023 15:16:06 +0800 Subject: [PATCH 1/4] fix import/sync CDS error log --- .../common_dataset_sync_utils.py | 32 ++++++++--------- .../common_dataset/common_dataset_syncer.py | 7 +++- dtable_events/utils/dtable_server_api.py | 36 +++++++++++++++---- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/dtable_events/common_dataset/common_dataset_sync_utils.py b/dtable_events/common_dataset/common_dataset_sync_utils.py index 4c31da13..5f09af3b 100644 --- a/dtable_events/common_dataset/common_dataset_sync_utils.py +++ b/dtable_events/common_dataset/common_dataset_sync_utils.py @@ -10,7 +10,7 @@ from dtable_events.common_dataset.dtable_db_cell_validators import validate_table_db_cell_value from dtable_events.utils import get_inner_dtable_server_url from dtable_events.utils.constants import ColumnTypes -from dtable_events.utils.dtable_server_api import BaseExceedsLimitException, DTableServerAPI +from dtable_events.utils.dtable_server_api import BaseExceedsException, DTableServerAPI from dtable_events.utils.dtable_db_api import DTableDBAPI logger = logging.getLogger(__name__) @@ -562,11 +562,11 @@ def create_dst_table_or_update_columns(dst_dtable_uuid, dst_table_id, dst_table_ try: resp_json = dst_dtable_server_api.add_table(dst_table_name, lang, columns=columns) dst_table_id = resp_json.get('_id') - except BaseExceedsLimitException: + except BaseExceedsException as e: return None, { 'dst_table_id': None, - 'error_msg': 'base exceeds limit', - 'error_type': 'base_exceeds_limit', + 'error_msg': e.error_msg, + 'error_type': e.error_type, 'task_status_code': 400 } except Exception as e: @@ -587,11 +587,11 @@ def create_dst_table_or_update_columns(dst_dtable_uuid, dst_table_id, dst_table_ } for col in to_be_appended_columns] try: dst_dtable_server_api.batch_append_columns_by_table_id(dst_table_id, columns) - except BaseExceedsLimitException: + except BaseExceedsException as e: return None, { 'dst_table_id': None, - 'error_msg': 'base exceeds limit', - 'error_type': 'base_exceeds_limit', + 'error_msg': e.error_msg, + 'error_type': e.error_type, 'task_status_code': 400 } except Exception as e: @@ -610,11 +610,11 @@ def create_dst_table_or_update_columns(dst_dtable_uuid, dst_table_id, dst_table_ } for col in to_be_updated_columns] try: dst_dtable_server_api.batch_update_columns_by_table_id(dst_table_id, columns) - except BaseExceedsLimitException: + except BaseExceedsException as e: return None, { 'dst_table_id': None, - 'error_msg': 'base exceeds limit', - 'error_type': 'base_exceeds_limit', + 'error_msg': e.error_msg, + 'error_type': e.error_type, 'task_status_code': 400 } except Exception as e: @@ -632,11 +632,11 @@ def append_dst_rows(dst_dtable_uuid, dst_table_name, to_be_appended_rows, dst_dt for i in range(0, len(to_be_appended_rows), step): try: dst_dtable_server_api.batch_append_rows(dst_table_name, to_be_appended_rows[i: i+step], need_convert_back=False) - except BaseExceedsLimitException: + except BaseExceedsException as e: return { 'dst_table_id': None, - 'error_msg': 'base exceeds limit', - 'error_type': 'base_exceeds_limit', + 'error_msg': e.error_msg, + 'error_type': e.error_type, 'task_status_code': 400 } except Exception as e: @@ -660,11 +660,11 @@ def update_dst_rows(dst_dtable_uuid, dst_table_name, to_be_updated_rows, dst_dta }) try: dst_dtable_server_api.batch_update_rows(dst_table_name, updates, need_convert_back=False) - except BaseExceedsLimitException: + except BaseExceedsException as e: return { 'dst_table_id': None, - 'error_msg': 'base exceeds limit', - 'error_type': 'base_exceeds_limit', + 'error_msg': e.error_msg, + 'error_type': e.error_type, 'task_status_code': 400 } except Exception as e: diff --git a/dtable_events/common_dataset/common_dataset_syncer.py b/dtable_events/common_dataset/common_dataset_syncer.py index 88a4cee7..97fe3433 100644 --- a/dtable_events/common_dataset/common_dataset_syncer.py +++ b/dtable_events/common_dataset/common_dataset_syncer.py @@ -178,7 +178,12 @@ def check_common_dataset(session_class): continue else: if result.get('error_msg'): - if result.get('error_type') in ('generate_synced_columns_error', 'base_exceeds_limit'): + if result.get('error_type') in ( + 'generate_synced_columns_error', + 'base_exceeds_limit', + 'exceed_columns_limit', + 'exceed_rows_limit' + ): logging.warning('src_dtable_uuid: %s src_table_id: %s src_view_id: %s dst_dtable_uuid: %s dst_table_id: %s client error: %s', src_dtable_uuid, src_table_id, src_view_id, dst_dtable_uuid, dst_table_id, result) with session_class() as db_session: diff --git a/dtable_events/utils/dtable_server_api.py b/dtable_events/utils/dtable_server_api.py index c2c8d851..ac577651 100644 --- a/dtable_events/utils/dtable_server_api.py +++ b/dtable_events/utils/dtable_server_api.py @@ -17,7 +17,22 @@ class WrongFilterException(Exception): pass -class BaseExceedsLimitException(Exception): +class BaseExceedsException(Exception): + + def __init__(self, error_type, error_msg): + self.error_type = error_type + self.error_msg = error_msg + + +class BaseExceedsLimitException(BaseExceedsException): + pass + + +class BaseExceedsRowsLimitException(BaseExceedsException): + pass + + +class BaseExceedsColumnsLimitException(BaseExceedsException): pass @@ -29,15 +44,24 @@ def parse_response(response): if response.status_code >= 400: if response.status_code == 404: raise NotFoundException() + error_type, error_msg = '', '' try: response_json = response.json() except: - pass + error_msg = response.text else: - if response_json.get('error_type') == 'wrong_filter_in_filters': - raise WrongFilterException() - if response_json.get('error_msg') == 'base_exceeds_limit': - raise BaseExceedsLimitException() + error_type = response_json.get('error_type') + error_msg = response_json.get('error_msg') + + if error_type == 'wrong_filter_in_filters': + raise WrongFilterException() + if error_type == 'exceed_rows_limit' or error_msg == 'Exceed the rows limit': + raise BaseExceedsRowsLimitException('exceed_rows_limit', 'Exceed the rows limit') + if error_type == 'exceed_columns_limit' or error_msg == 'Exceed the columns limit': + raise BaseExceedsColumnsLimitException('exceed_columns_limit', 'Exceed the columns limit') + if error_type == 'base_exceeds_limit' or error_msg == 'The base size exceeds the limit of 200MB, the operation cannot be performed.': + raise BaseExceedsLimitException('base_exceeds_limit', 'The base size exceeds the limit of 200MB, the operation cannot be performed.') + raise ConnectionError(response.status_code, response.text) else: try: From 03a9544c84a7626aa6a608f05021220cb7fc5336 Mon Sep 17 00:00:00 2001 From: AlexCXC <1223408988@qq.com> Date: Mon, 16 Oct 2023 18:27:52 +0800 Subject: [PATCH 2/4] fix some error logs to warning --- dtable_events/automations/actions.py | 6 +++++- .../common_dataset/common_dataset_sync_utils.py | 14 +++++++------- dtable_events/dtable_io/excel.py | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dtable_events/automations/actions.py b/dtable_events/automations/actions.py index 35a4c1e2..88c749e1 100644 --- a/dtable_events/automations/actions.py +++ b/dtable_events/automations/actions.py @@ -1626,7 +1626,11 @@ def get_linked_table_rows(self): table_name = self.get_table_name(self.linked_table_id) columns = self.get_columns(self.linked_table_id) - sql = filter2sql(table_name, columns, filter_conditions, by_group=True) + try: + sql = filter2sql(table_name, columns, filter_conditions, by_group=True) + except Exception as e: + logger.error('rule: %s dtable: %s table: %s condition: %s to sql error: %s', self.auto_rule.rule_id, self.auto_rule.dtable_uuid, table_name, filter_conditions, e) + return [] query_clause = "*" if column_names: if "_id" not in column_names: diff --git a/dtable_events/common_dataset/common_dataset_sync_utils.py b/dtable_events/common_dataset/common_dataset_sync_utils.py index 5f09af3b..40a37179 100644 --- a/dtable_events/common_dataset/common_dataset_sync_utils.py +++ b/dtable_events/common_dataset/common_dataset_sync_utils.py @@ -770,11 +770,11 @@ def import_sync_CDS(context): if server_only and (start + step) > SRC_ROWS_LIMIT: step = SRC_ROWS_LIMIT - start sql = sql_template + (" LIMIT {offset}, {limit}".format(offset=start, limit=step)) - logger.debug('fetch src rows-id sql: %s', sql) + logger.debug('fetch src rows-id sql: %s', sql[:200]) try: rows, _ = src_dtable_db_api.query(sql, convert=False, server_only=server_only) except Exception as e: - logger.error('fetch src rows id filter_conditions: %s sql: %s src columns: %s error: %s', filter_conditions, sql, src_table['columns'], e) + logger.error('fetch src rows id filter_conditions: %s sql: %s src columns: %s error: %s', filter_conditions, sql[:200], src_table['columns'], e) return { 'dst_table_id': None, 'error_msg': 'fetch src rows id error: %s' % e, @@ -795,11 +795,11 @@ def import_sync_CDS(context): start, step = 0, 10000 while is_sync and True: sql = f"SELECT `_id` FROM `{dst_table_name}` LIMIT {start}, {step}" - logger.debug('fetch dst rows-id sql: %s', sql) + logger.debug('fetch dst rows-id sql: %s', sql[:200]) try: rows, _ = dst_dtable_db_api.query(sql, convert=False, server_only=True) except Exception as e: - logger.error('fetch dst rows id sql: %s error: %s', sql, e) + logger.error('fetch dst rows id sql: %s error: %s', sql[:200], e) return { 'dst_table_id': None, 'error_msg': 'fetch dst rows id error: %s' % e, @@ -834,7 +834,7 @@ def import_sync_CDS(context): try: src_rows, _ = src_dtable_db_api.query(sql, convert=False, server_only=server_only) except Exception as e: - logger.error('fetch src to-be-updated-rows sql: %s error: %s', sql, e) + logger.error('fetch src to-be-updated-rows sql: %s error: %s', sql[:200], e) return { 'dst_table_id': None, 'error_msg': 'fetch src to-be-updated-rows error: %s' % e, @@ -846,7 +846,7 @@ def import_sync_CDS(context): try: dst_rows, _ = dst_dtable_db_api.query(sql, convert=False, server_only=True) except Exception as e: - logger.error('fetch dst to-be-updated-rows sql: %s error: %s', sql, e) + logger.error('fetch dst to-be-updated-rows sql: %s error: %s', sql[:200], e) return { 'dst_table_id': None, 'error_msg': 'fetch dst to-be-updated-rows error: %s' % e, @@ -882,7 +882,7 @@ def import_sync_CDS(context): try: src_rows, _ = src_dtable_db_api.query(sql, convert=False, server_only=server_only) except Exception as e: - logger.error('fetch to-be-appended-rows sql: %s error: %s', sql, e) + logger.error('fetch to-be-appended-rows sql: %s error: %s', sql[:200], e) return { 'dst_table_id': None, 'error_msg': 'fetch to-be-appended-rows error: %s' % e, diff --git a/dtable_events/dtable_io/excel.py b/dtable_events/dtable_io/excel.py index 266de203..0c9564b1 100644 --- a/dtable_events/dtable_io/excel.py +++ b/dtable_events/dtable_io/excel.py @@ -246,7 +246,7 @@ def parse_excel_columns(sheet_rows, head_index, max_column): for index in range(max_column): name = get_excel_cell_value(head_row, index) - column_name = str(name.replace('\ufeff', '').strip()) if name else 'Field' + str(index + 1) + column_name = str(name).replace('\ufeff', '').strip() if name else 'Field' + str(index + 1) if column_name in column_name_set: raise Exception('Duplicated column names are not supported') From 6cc29990db5e5e2fbc83b71a9e06a57cb3d033de Mon Sep 17 00:00:00 2001 From: AlexHappy <1223408988@qq.com> Date: Mon, 16 Oct 2023 22:50:24 +0800 Subject: [PATCH 3/4] use BaseExceedsException --- dtable_events/utils/dtable_server_api.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/dtable_events/utils/dtable_server_api.py b/dtable_events/utils/dtable_server_api.py index ac577651..a528f7af 100644 --- a/dtable_events/utils/dtable_server_api.py +++ b/dtable_events/utils/dtable_server_api.py @@ -23,19 +23,6 @@ def __init__(self, error_type, error_msg): self.error_type = error_type self.error_msg = error_msg - -class BaseExceedsLimitException(BaseExceedsException): - pass - - -class BaseExceedsRowsLimitException(BaseExceedsException): - pass - - -class BaseExceedsColumnsLimitException(BaseExceedsException): - pass - - class NotFoundException(Exception): pass @@ -56,11 +43,11 @@ def parse_response(response): if error_type == 'wrong_filter_in_filters': raise WrongFilterException() if error_type == 'exceed_rows_limit' or error_msg == 'Exceed the rows limit': - raise BaseExceedsRowsLimitException('exceed_rows_limit', 'Exceed the rows limit') + raise BaseExceedsException('exceed_rows_limit', 'Exceed the rows limit') if error_type == 'exceed_columns_limit' or error_msg == 'Exceed the columns limit': - raise BaseExceedsColumnsLimitException('exceed_columns_limit', 'Exceed the columns limit') + raise BaseExceedsException('exceed_columns_limit', 'Exceed the columns limit') if error_type == 'base_exceeds_limit' or error_msg == 'The base size exceeds the limit of 200MB, the operation cannot be performed.': - raise BaseExceedsLimitException('base_exceeds_limit', 'The base size exceeds the limit of 200MB, the operation cannot be performed.') + raise BaseExceedsException('base_exceeds_limit', 'The base size exceeds the limit of 200MB, the operation cannot be performed.') raise ConnectionError(response.status_code, response.text) else: From 145cf7d5f873a252a05fe24fd956c97bf1ba7f7c Mon Sep 17 00:00:00 2001 From: AlexCXC <1223408988@qq.com> Date: Tue, 17 Oct 2023 10:36:35 +0800 Subject: [PATCH 4/4] opt code --- dtable_events/automations/actions.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dtable_events/automations/actions.py b/dtable_events/automations/actions.py index 88c749e1..35a4c1e2 100644 --- a/dtable_events/automations/actions.py +++ b/dtable_events/automations/actions.py @@ -1626,11 +1626,7 @@ def get_linked_table_rows(self): table_name = self.get_table_name(self.linked_table_id) columns = self.get_columns(self.linked_table_id) - try: - sql = filter2sql(table_name, columns, filter_conditions, by_group=True) - except Exception as e: - logger.error('rule: %s dtable: %s table: %s condition: %s to sql error: %s', self.auto_rule.rule_id, self.auto_rule.dtable_uuid, table_name, filter_conditions, e) - return [] + sql = filter2sql(table_name, columns, filter_conditions, by_group=True) query_clause = "*" if column_names: if "_id" not in column_names: