From 89abe78abf2f23cf087044570887c55fd3dcc028 Mon Sep 17 00:00:00 2001 From: Dery Rahman Ahaddienata Date: Wed, 8 Jan 2025 17:02:34 +0700 Subject: [PATCH] fix(mc2mc): comment on headers (#64) fix: comment on headers --- mc2mc/internal/query/helper.go | 21 ++++++++------------- mc2mc/internal/query/helper_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/mc2mc/internal/query/helper.go b/mc2mc/internal/query/helper.go index acf73fc..d3c16c0 100644 --- a/mc2mc/internal/query/helper.go +++ b/mc2mc/internal/query/helper.go @@ -10,27 +10,22 @@ const ( ) var ( - headerPattern = regexp.MustCompile(`(?i)^\s*set\s+[^;]+;`) // regex to match header statements + headerPattern = regexp.MustCompile(`(?im)^\s*set\s+[^;]+;\s*`) // regex to match header statements ) func SeparateHeadersAndQuery(query string) (string, string) { query = strings.TrimSpace(query) - headers := []string{} - remainingQuery := query - - // keep matching header statements until there are no more - for { - match := headerPattern.FindString(remainingQuery) - if match == "" { - break - } - headers = append(headers, strings.TrimSpace(match)) - remainingQuery = strings.TrimSpace(remainingQuery[len(match):]) - } + // extract all header lines (SET statements and comments) + headers := headerPattern.FindAllString(query, -1) + // Remove all headers from the original query to get the remaining query + remainingQuery := strings.TrimSpace(headerPattern.ReplaceAllString(query, "")) headerStr := "" if len(headers) > 0 { + for i, header := range headers { + headers[i] = strings.TrimSpace(header) + } headerStr = strings.Join(headers, "\n") } diff --git a/mc2mc/internal/query/helper_test.go b/mc2mc/internal/query/helper_test.go index 43c5b8f..d21d6c0 100644 --- a/mc2mc/internal/query/helper_test.go +++ b/mc2mc/internal/query/helper_test.go @@ -64,4 +64,30 @@ select CONCAT_WS('; ', COLLECT_LIST(dates)) AS dates from presentation.main.impo expectedQuery := `select CONCAT_WS('; ', COLLECT_LIST(dates)) AS dates from presentation.main.important_date` assert.Equal(t, expectedQuery, query) }) + t.Run("works with query with comment on header", func(t *testing.T) { + q1 := `set odps.sql.allow.fullscan=true; +-- comment here +set odps.sql.python.version=cp37; + +select distinct event_timestamp, + client_id, + country_code, +from presentation.main.important_date +where CAST(event_timestamp as DATE) = '{{ .DSTART | Date }}' + and client_id in ('123') +` + headers, query := query.SeparateHeadersAndQuery(q1) + expectedHeader := `set odps.sql.allow.fullscan=true; +set odps.sql.python.version=cp37;` + assert.Equal(t, expectedHeader, headers) + + expectedQuery := `-- comment here +select distinct event_timestamp, + client_id, + country_code, +from presentation.main.important_date +where CAST(event_timestamp as DATE) = '{{ .DSTART | Date }}' + and client_id in ('123')` + assert.Contains(t, expectedQuery, query) + }) }