Skip to content

Commit

Permalink
Support both new lines and Query Parameters with text format auto_exp…
Browse files Browse the repository at this point in the history
…lain better
  • Loading branch information
keiko713 committed Aug 20, 2024
1 parent 49d4d0e commit f0a65c6
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 35 deletions.
62 changes: 62 additions & 0 deletions logs/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4193,6 +4193,68 @@ var tests = []testpair{
},
}},
},
// new lines
{
[]state.LogLine{{
Content: "duration: 1681.452 ms plan:\n" +
"\tQuery Text: PREPARE pga_keys (text) AS\n" +
"\t SELECT * FROM api_keys WHERE access_scope_type = $1\n" +
"\t AND organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191';\n" +
"\tSeq Scan on api_keys (cost=0.00..11.93 rows=1 width=132)\n" +
"\t Filter: ((access_scope_type = 'organization'::text) AND (organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191'::uuid))",
}},
[]state.LogLine{{
Query: "PREPARE pga_keys (text) AS\n\t SELECT * FROM api_keys WHERE access_scope_type = $1\n\t AND organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191';",
Classification: pganalyze_collector.LogLineInformation_STATEMENT_AUTO_EXPLAIN,
ReviewedForSecrets: true,
SecretMarkers: []state.LogSecretMarker{{
ByteStart: 30,
ByteEnd: 376,
Kind: state.StatementTextLogSecret,
}},
}},
[]state.PostgresQuerySample{{
Query: "PREPARE pga_keys (text) AS\n\t SELECT * FROM api_keys WHERE access_scope_type = $1\n\t AND organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191';",
RuntimeMs: 1681.452,
HasExplain: true,
ExplainSource: pganalyze_collector.QuerySample_AUTO_EXPLAIN_EXPLAIN_SOURCE,
ExplainFormat: pganalyze_collector.QuerySample_TEXT_EXPLAIN_FORMAT,
ExplainOutputText: "Seq Scan on api_keys (cost=0.00..11.93 rows=1 width=132)\n" +
"\t Filter: ((access_scope_type = 'organization'::text) AND (organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191'::uuid))",
}},
},
// new lines + Query Parameters
{
[]state.LogLine{{
Content: "duration: 1681.452 ms plan:\n" +
"\tQuery Text: PREPARE pga_keys (text) AS\n" +
"\t SELECT * FROM api_keys WHERE access_scope_type = $1\n" +
"\t AND organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191';\n" +
"\tQuery Parameters: $1 = 'organizati...'\n" +
"\tSeq Scan on api_keys (cost=0.00..11.93 rows=1 width=132)\n" +
"\t Filter: ((access_scope_type = 'organization'::text) AND (organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191'::uuid))",
}},
[]state.LogLine{{
Query: "PREPARE pga_keys (text) AS\n\t SELECT * FROM api_keys WHERE access_scope_type = $1\n\t AND organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191';",
Classification: pganalyze_collector.LogLineInformation_STATEMENT_AUTO_EXPLAIN,
ReviewedForSecrets: true,
SecretMarkers: []state.LogSecretMarker{{
ByteStart: 30,
ByteEnd: 416,
Kind: state.StatementTextLogSecret,
}},
}},
[]state.PostgresQuerySample{{
Query: "PREPARE pga_keys (text) AS\n\t SELECT * FROM api_keys WHERE access_scope_type = $1\n\t AND organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191';",
RuntimeMs: 1681.452,
HasExplain: true,
ExplainSource: pganalyze_collector.QuerySample_AUTO_EXPLAIN_EXPLAIN_SOURCE,
ExplainFormat: pganalyze_collector.QuerySample_TEXT_EXPLAIN_FORMAT,
ExplainOutputText: "Seq Scan on api_keys (cost=0.00..11.93 rows=1 width=132)\n" +
"\t Filter: ((access_scope_type = 'organization'::text) AND (organization_id = '13f82cb8-a0cf-424e-903a-6f8d7db83191'::uuid))",
Parameters: []null.String{null.StringFrom("organizati...")},
}},
},
// pganalyze-collector-identify
{
[]state.LogLine{{
Expand Down
90 changes: 55 additions & 35 deletions logs/querysample/querysample.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,6 @@ func transformExplainJSONToQuerySample(logLine state.LogLine, explainText string
// Remove query text from EXPLAIN itself, to avoid duplication and match EXPLAIN (FORMAT JSON)
sampleQueryText := strings.TrimSpace(explainJSONOutput.QueryText)
explainJSONOutput.QueryText = ""
// Handle Query Parameters (available from Postgres 16+)
var parameters []null.String
// Regular expression to find all values in single quotes or NULL
// Query Parameters example: $1 = 'foo', $2 = '123', $3 = NULL, $4 = 'bo''o'
re := regexp.MustCompile(`'((?:[^']|'')*)'|NULL`)
for _, part := range re.FindAllString(explainJSONOutput.QueryParameters, -1) {
if part == "NULL" {
parameters = append(parameters, null.NewString("", false))
} else {
parameters = append(parameters, null.StringFrom(strings.Trim(part, "'")))
}
}

return state.PostgresQuerySample{
Query: sampleQueryText,
Expand All @@ -63,39 +51,55 @@ func transformExplainJSONToQuerySample(logLine state.LogLine, explainText string
ExplainSource: pganalyze_collector.QuerySample_AUTO_EXPLAIN_EXPLAIN_SOURCE,
ExplainFormat: pganalyze_collector.QuerySample_JSON_EXPLAIN_FORMAT,
ExplainOutputJSON: &explainJSONOutput,
Parameters: parameters,
Parameters: findQueryParameters(explainJSONOutput.QueryParameters),
}, nil
}

var autoExplainTextPlanDetailsRegexp = regexp.MustCompile(`^Query Text: (.+)\s+([\s\S]+)`)
var herokuAutoExplainWithTabRegexp = regexp.MustCompile(`^Query Text: ([^\t]+)\t([\s\S]+)`)
var autoExplainTextWithQueryParametersRegexp = regexp.MustCompile(`^Query Text: ([\s\S]+)\r?\n\s*Query Parameters: (.+)\r?\n\s*([\s\S]+)`)
var autoExplainTextWithCostsRegexp = regexp.MustCompile(`^Query Text: ([\s\S]+?)\r?\n\s*([\S ]+ \(cost=\d+\.\d{2}\.\.\d+\.\d{2} rows=\d+ width=\d+\)[\s\S]+)`)

func transformExplainTextToQuerySample(logLine state.LogLine, explainText string, queryRuntimeMs float64) (state.PostgresQuerySample, error) {
explainParts := autoExplainTextPlanDetailsRegexp.FindStringSubmatch(explainText)
if len(explainParts) != 3 {
return state.PostgresQuerySample{}, fmt.Errorf("auto_explain output doesn't match expected format")
querySample := state.PostgresQuerySample{
RuntimeMs: queryRuntimeMs,
OccurredAt: logLine.OccurredAt,
Username: logLine.Username,
Database: logLine.Database,
LogLineUUID: logLine.UUID,
HasExplain: true,
ExplainSource: pganalyze_collector.QuerySample_AUTO_EXPLAIN_EXPLAIN_SOURCE,
ExplainFormat: pganalyze_collector.QuerySample_TEXT_EXPLAIN_FORMAT,
}
// If EXPLAIN output's first char is not a capital letter (e.g. not something like "Update on" or "Index Scan"),
// likely it's hitting the Heroku's newline break in "Query Text:" chunk
// Handle the separation of the query and the explain output text with the tab for these cases
explainOutputFirstChar := explainParts[2][0]
if cUtil.IsHeroku() && !(explainOutputFirstChar >= 'A' && explainOutputFirstChar <= 'Z') {
if parts := herokuAutoExplainWithTabRegexp.FindStringSubmatch(explainText); len(parts) == 3 {
explainParts = parts
withParametersParts := autoExplainTextWithQueryParametersRegexp.FindStringSubmatch(explainText)
if len(withParametersParts) == 4 {
querySample.Parameters = findQueryParameters(withParametersParts[2])
querySample.Query = withParametersParts[1]
querySample.ExplainOutputText = withParametersParts[3]
} else {
explainParts := autoExplainTextWithCostsRegexp.FindStringSubmatch(explainText)
if len(explainParts) != 3 {
// Fallback to the old way (not supporting new lines in Query Text, but does support EXPLAIN without costs)
explainParts = autoExplainTextPlanDetailsRegexp.FindStringSubmatch(explainText)
if len(explainParts) != 3 {
return state.PostgresQuerySample{}, fmt.Errorf("auto_explain output doesn't match expected format")
}
}

// If EXPLAIN output's first char is not a capital letter (e.g. not something like "Update on" or "Index Scan"),
// likely it's hitting the Heroku's newline break in "Query Text:" chunk
// Handle the separation of the query and the explain output text with the tab for these cases
// (this can be retired with the autoExplainTextWithCostsRegexp, but leaving here for the EXPLAIN without costs case)
explainOutputFirstChar := explainParts[2][0]
if cUtil.IsHeroku() && !(explainOutputFirstChar >= 'A' && explainOutputFirstChar <= 'Z') {
if parts := herokuAutoExplainWithTabRegexp.FindStringSubmatch(explainText); len(parts) == 3 {
explainParts = parts
}
}
querySample.Query = explainParts[1]
querySample.ExplainOutputText = explainParts[2]
}
return state.PostgresQuerySample{
Query: strings.TrimSpace(explainParts[1]),
RuntimeMs: queryRuntimeMs,
OccurredAt: logLine.OccurredAt,
Username: logLine.Username,
Database: logLine.Database,
LogLineUUID: logLine.UUID,
HasExplain: true,
ExplainSource: pganalyze_collector.QuerySample_AUTO_EXPLAIN_EXPLAIN_SOURCE,
ExplainFormat: pganalyze_collector.QuerySample_TEXT_EXPLAIN_FORMAT,
ExplainOutputText: explainParts[2],
}, nil
return querySample, nil
}

func TransformLogMinDurationStatementToQuerySample(logLine state.LogLine, queryText string, queryRuntime string, queryProtocolStep string, parameterParts [][]string) (s state.PostgresQuerySample, ok bool) {
Expand Down Expand Up @@ -129,3 +133,19 @@ func TransformLogMinDurationStatementToQuerySample(logLine state.LogLine, queryT
}
return sample, true
}

func findQueryParameters(paramText string) []null.String {
// Handle Query Parameters (available from Postgres 16+)
var parameters []null.String
// Regular expression to find all values in single quotes or NULL
// Query Parameters example: $1 = 'foo', $2 = '123', $3 = NULL, $4 = 'bo''o'
re := regexp.MustCompile(`'((?:[^']|'')*)'|NULL`)
for _, part := range re.FindAllString(paramText, -1) {
if part == "NULL" {
parameters = append(parameters, null.NewString("", false))
} else {
parameters = append(parameters, null.StringFrom(strings.Trim(part, "'")))
}
}
return parameters
}

0 comments on commit f0a65c6

Please sign in to comment.