From 3fb1fb42a6a36094f294ce2760fd9bb4e622d065 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= <bartek.roza@gmail.com>
Date: Thu, 11 Jul 2024 10:36:05 +0000
Subject: [PATCH] dump goroutines' stacktraces on timeout in hare tests (#6101)

## Motivation
---
 hare3/hare_test.go | 64 +++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/hare3/hare_test.go b/hare3/hare_test.go
index 71de7f71c3..dd8139ab12 100644
--- a/hare3/hare_test.go
+++ b/hare3/hare_test.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"math/rand"
 	"os"
+	"runtime/pprof"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -13,6 +15,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"go.uber.org/mock/gomock"
 	"go.uber.org/zap/zapcore"
+	"go.uber.org/zap/zaptest"
 
 	"github.com/spacemeshos/go-spacemesh/atxsdata"
 	"github.com/spacemeshos/go-spacemesh/codec"
@@ -505,37 +508,38 @@ type testTracer struct {
 	sent        chan *Message
 }
 
-func (t *testTracer) waitStopped() types.LayerID {
-	wait := 10 * time.Second
+func waitForChan[T any](t testing.TB, ch <-chan T, timeout time.Duration, failureMsg string) T {
+	var value T
 	select {
-	case <-time.After(wait):
-		require.FailNow(t, "didn't stop", "wait %v", wait)
-	case lid := <-t.stopped:
-		return lid
+	case <-time.After(timeout):
+		builder := strings.Builder{}
+		pprof.Lookup("goroutine").WriteTo(&builder, 2)
+		t.Fatalf(failureMsg+", waited: %v, stacktraces:\n%s", timeout, builder.String())
+	case value = <-ch:
 	}
-	return 0
+	return value
 }
 
-func (t *testTracer) waitEligibility() []*types.HareEligibility {
-	wait := 10 * time.Second
+func sendWithTimeout[T any](t testing.TB, value T, ch chan<- T, timeout time.Duration, failureMsg string) {
 	select {
-	case <-time.After(wait):
-		require.FailNow(t, "no eligibility", "wait %v", wait)
-	case el := <-t.eligibility:
-		return el
+	case <-time.After(timeout):
+		builder := strings.Builder{}
+		pprof.Lookup("goroutine").WriteTo(&builder, 2)
+		t.Fatalf(failureMsg+", waited: %v, stacktraces:\n%s", timeout, builder.String())
+	case ch <- value:
 	}
-	return nil
+}
+
+func (t *testTracer) waitStopped() types.LayerID {
+	return waitForChan(t.TB, t.stopped, 10*time.Second, "didn't stop")
+}
+
+func (t *testTracer) waitEligibility() []*types.HareEligibility {
+	return waitForChan(t.TB, t.eligibility, 10*time.Second, "no eligibility")
 }
 
 func (t *testTracer) waitSent() *Message {
-	wait := 10 * time.Second
-	select {
-	case <-time.After(wait):
-		require.FailNow(t, "no message", "wait %v", wait)
-	case m := <-t.sent:
-		return m
-	}
-	return nil
+	return waitForChan(t.TB, t.sent, 10*time.Second, "no message")
 }
 
 func (*testTracer) OnStart(types.LayerID) {}
@@ -548,21 +552,11 @@ func (t *testTracer) OnStop(lid types.LayerID) {
 }
 
 func (t *testTracer) OnActive(el []*types.HareEligibility) {
-	wait := 10 * time.Second
-	select {
-	case <-time.After(wait):
-		require.FailNow(t, "eligibility can't be sent", "wait %v", wait)
-	case t.eligibility <- el:
-	}
+	sendWithTimeout(t.TB, el, t.eligibility, 10*time.Second, "eligibility can't be sent")
 }
 
 func (t *testTracer) OnMessageSent(m *Message) {
-	wait := 10 * time.Second
-	select {
-	case <-time.After(wait):
-		require.FailNow(t, "message can't be sent", "wait %v", wait)
-	case t.sent <- m:
-	}
+	sendWithTimeout(t.TB, m, t.sent, 10*time.Second, "message can't be sent")
 }
 
 func (*testTracer) OnMessageReceived(*Message) {}
@@ -911,7 +905,7 @@ func TestProposals(t *testing.T) {
 				nil,
 				nil,
 				layerpatrol.New(),
-				WithLogger(logtest.New(t).Zap()),
+				WithLogger(zaptest.NewLogger(t)),
 			)
 			for _, atx := range tc.atxs {
 				require.NoError(t, atxs.Add(db, &atx, types.AtxBlob{}))