diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 16e87a00dd9..807ccbeeb3c 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -258,13 +258,21 @@ jobs: key: go-${{ runner.os }}${{ runner.arch }}-build-${{ env.COMMIT }} - name: Run unit tests + id: test_run timeout-minutes: 15 - run: make unit-test-coverage + run: | + trap 'echo "exit_code=$?" >> "$GITHUB_OUTPUT"' EXIT + make unit-test-coverage env: UNIT_TEST_DIRS: ${{ inputs.unit_test_directory }} TEST_ARGS: ${{ needs.set-up-single-test.outputs.single_test_args }} TEST_TIMEOUT: ${{ needs.set-up-single-test.outputs.test_timeout }} + - name: Highlight test timeout + if: ${{ steps.test_run.outcome == 'failure' && steps.test_run.outputs.exit_code == 124 }} + run: | + echo "::error::Test Test timed out" + - name: Generate test summary uses: mikepenz/action-junit-report@v5.0.0-rc01 if: failure() @@ -341,8 +349,16 @@ jobs: key: go-${{ runner.os }}${{ runner.arch }}-build-${{ env.COMMIT }} - name: Run integration test + id: test_run timeout-minutes: 15 - run: make integration-test-coverage + run: | + trap 'echo "exit_code=$?" >> "$GITHUB_OUTPUT"' EXIT + make integration-test-coverage + + - name: Highlight test timeout + if: ${{ steps.test_run.outcome == 'failure' && steps.test_run.outputs.exit_code == 124 }} + run: | + echo "::error::Test Test timed out" - name: Generate test summary uses: mikepenz/action-junit-report@v5.0.0-rc01 @@ -488,12 +504,20 @@ jobs: echo "JOB_ID=${job_id:-unknown}" >> "$GITHUB_OUTPUT" - name: Run functional test + id: test_run if: ${{ inputs.run_single_functional_test != true || (inputs.run_single_functional_test == true && contains(fromJSON(needs.set-up-single-test.outputs.dbs), env.PERSISTENCE_DRIVER)) }} timeout-minutes: ${{ fromJSON(needs.set-up-single-test.outputs.github_timeout) }} # make sure this is larger than the test timeout in the Makefile - run: make functional-test-coverage + run: | + trap 'echo "exit_code=$?" >> "$GITHUB_OUTPUT"' EXIT + make functional-test-coverage env: TEST_ARGS: ${{ needs.set-up-single-test.outputs.single_test_args }} + - name: Highlight test timeout + if: ${{ steps.test_run.outcome == 'failure' && steps.test_run.outputs.exit_code == 124 }} + run: | + echo "::error::Test Test timed out" + - name: Generate test summary uses: mikepenz/action-junit-report@v5.0.0-rc01 if: failure() @@ -615,18 +639,16 @@ jobs: echo "JOB_ID=${job_id-unknown}" >> "$GITHUB_OUTPUT" - name: Run functional test xdc + id: test_run timeout-minutes: 25 # update this to TEST_TIMEOUT+5 if you update the Makefile - run: make functional-test-xdc-coverage + run: | + trap 'echo "exit_code=$?" >> "$GITHUB_OUTPUT"' EXIT + make functional-test-xdc-coverage - - name: Generate test summary - uses: mikepenz/action-junit-report@v5.0.0-rc01 - if: failure() - with: - report_paths: ./.testoutput/*.junit.xml - detailed_summary: true - check_annotations: false - annotate_only: true - skip_annotations: true + - name: Highlight test timeout + if: ${{ steps.test_run.outcome == 'failure' && steps.test_run.outputs.exit_code == 124 }} + run: | + echo "::error::Test Test timed out" - name: Upload test results # Can't pin to major because the action linter doesn't recognize the include-hidden-files flag. @@ -740,8 +762,16 @@ jobs: echo "JOB_ID=${job_id:-unknown}" >> "$GITHUB_OUTPUT" - name: Run functional test ndc + id: test_run timeout-minutes: 15 - run: make functional-test-ndc-coverage + run: | + trap 'echo "exit_code=$?" >> "$GITHUB_OUTPUT"' EXIT + make functional-test-ndc-coverage + + - name: Highlight test timeout + if: ${{ steps.test_run.outcome == 'failure' && steps.test_run.outputs.exit_code == 124 }} + run: | + echo "::error::Test Test timed out" - name: Upload test results # Can't pin to major because the action linter doesn't recognize the include-hidden-files flag. diff --git a/go.mod b/go.mod index 4fa2ea1944b..3960be593c3 100644 --- a/go.mod +++ b/go.mod @@ -117,6 +117,7 @@ require ( github.com/klauspost/compress v1.17.9 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.1 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/maruel/panicparse/v2 v2.4.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect diff --git a/go.sum b/go.sum index 29b8cbc29c9..a0ac78b5526 100644 --- a/go.sum +++ b/go.sum @@ -194,6 +194,8 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/maruel/panicparse/v2 v2.4.0 h1:yQKMIbQ0DKfinzVkTkcUzQyQ60UCiNnYfR7PWwTs2VI= +github.com/maruel/panicparse/v2 v2.4.0/go.mod h1:nOY2OKe8csO3F3SA5+hsxot05JLgukrF54B9x88fVp4= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= diff --git a/tests/testcore/functional_test_base.go b/tests/testcore/functional_test_base.go index 971d5a81ce5..2e5b9ac3f25 100644 --- a/tests/testcore/functional_test_base.go +++ b/tests/testcore/functional_test_base.go @@ -80,7 +80,8 @@ type ( historyrequire.HistoryRequire updateutils.UpdateUtils - Logger log.Logger + testTimeout *testTimeout + Logger log.Logger // Test cluster configuration. testClusterFactory TestClusterFactory @@ -244,12 +245,21 @@ func (s *FunctionalTestBase) SetupSuiteWithCluster(clusterConfigFile string, opt func (s *FunctionalTestBase) SetupTest() { s.checkTestShard() s.initAssertions() + s.testTimeout = newTestTimeout(s.T(), 2*time.Minute) } func (s *FunctionalTestBase) SetupSubTest() { s.initAssertions() } +func (s *FunctionalTestBase) TeardownTest() { + s.testTimeout.cancel() +} + +func (s *FunctionalTestBase) SetTestTimeout(d time.Duration) { + s.testTimeout.set(d) +} + func (s *FunctionalTestBase) initAssertions() { // `s.Assertions` (as well as other test helpers which depends on `s.T()`) must be initialized on // both test and subtest levels (but not suite level, where `s.T()` is `nil`). diff --git a/tests/testcore/timeout.go b/tests/testcore/timeout.go new file mode 100644 index 00000000000..a94e1bd3935 --- /dev/null +++ b/tests/testcore/timeout.go @@ -0,0 +1,58 @@ +package testcore + +import ( + "bytes" + "os" + "runtime/debug" + "testing" + "time" + + "github.com/maruel/panicparse/v2/stack" +) + +type testTimeout struct { + updateCh chan time.Duration + closeCh chan struct{} +} + +func newTestTimeout(t *testing.T, after time.Duration) *testTimeout { + res := &testTimeout{closeCh: make(chan struct{})} + go func() { + startedAt := time.Now() + for { + tick := time.NewTimer(time.Second) + detonateAt := startedAt.Add(after) + select { + case after = <-res.updateCh: + case <-res.closeCh: + return + case <-tick.C: + if time.Now().After(detonateAt) { + // Cannot use t.Fatalf since it will wait until the test completes. + t.Logf("test took more than %v to complete, exiting now", after) + + // Print stack trace to help debugging. + rawStack := append(debug.Stack(), '\n', '\n') + _, _, err := stack.ScanSnapshot(bytes.NewReader(rawStack), os.Stdout, stack.DefaultOpts()) + if err != nil { + t.Logf("failed to parse stack trace: %v", err) + } + + // `timeout` exits with code 124 to indicate a timeout - might as well do that here. + // GitHub Actions will mark this test when it sees this exit code. + //revive:disable-next-line:deep-exit + os.Exit(124) + } + } + } + }() + return res +} + +func (b testTimeout) set(d time.Duration) { + b.updateCh <- d +} + +func (b testTimeout) cancel() { + b.closeCh <- struct{}{} +} diff --git a/tests/update_workflow_test.go b/tests/update_workflow_test.go index 460c98f0518..bc4492acf39 100644 --- a/tests/update_workflow_test.go +++ b/tests/update_workflow_test.go @@ -4922,8 +4922,6 @@ func (s *UpdateWorkflowSuite) TestUpdateWithStart() { // make sure there's no lock contention s.Empty(capture.Snapshot()[metrics.TaskWorkflowBusyCounter.Name()]) - - retCh <- multiopsResponseErr{resp, err} }() return retCh } @@ -5352,24 +5350,25 @@ func (s *UpdateWorkflowSuite) TestUpdateWithStart() { startReq.WorkflowIdConflictPolicy = enumspb.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING // allows 1st - updateReq := s.updateWorkflowRequest(tv.WithUpdateIDNumber(0), - &updatepb.WaitPolicy{LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED}) - uwsCh := sendUpdateWithStart(ctx, startReq, updateReq) - _, err := s.TaskPoller.PollAndHandleWorkflowTask(tv, - func(task *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { - return &workflowservice.RespondWorkflowTaskCompletedRequest{ - Messages: s.UpdateAcceptCompleteMessages(tv, task.Messages[0]), - }, nil - }) - s.NoError(err) - uwsRes := <-uwsCh - s.NoError(uwsRes.err) + //updateReq := s.updateWorkflowRequest(tv.WithUpdateIDNumber(0), + // &updatepb.WaitPolicy{LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED}) + //uwsCh := sendUpdateWithStart(ctx, startReq, updateReq) + //_, err := s.TaskPoller.PollAndHandleWorkflowTask(tv, + // func(task *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { + // return &workflowservice.RespondWorkflowTaskCompletedRequest{ + // Messages: s.UpdateAcceptCompleteMessages(tv, task.Messages[0]), + // }, nil + // }) + //s.NoError(err) + //uwsRes := <-uwsCh + //s.NoError(uwsRes.err) // denies 2nd - updateReq = s.updateWorkflowRequest(tv.WithUpdateIDNumber(1), updateReq.WaitPolicy) + updateReq := s.updateWorkflowRequest(tv.WithUpdateIDNumber(1), + &updatepb.WaitPolicy{LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED}) select { case <-sendUpdateWithStart(ctx, startReq, updateReq): - err = (<-sendUpdateWithStart(ctx, startReq, updateReq)).err + err := (<-sendUpdateWithStart(ctx, startReq, updateReq)).err s.Error(err) errs := err.(*serviceerror.MultiOperationExecution).OperationErrors() s.Len(errs, 2)