Skip to content

Commit

Permalink
Make grpc server recover from panics caused by requests (#453)
Browse files Browse the repository at this point in the history
If a request causes a panic, currently the server just exits. This is not a pleasant way to find out about bugs that cause panics.

Now we just return an error to the client and log the panic so it can be investigated.
  • Loading branch information
JamesMurkin authored Nov 11, 2020
1 parent 05db67f commit 0cd4e26
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions internal/common/grpc/grpc.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package grpc

import (
"runtime/debug"
"time"

grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
grpc_auth "github.com/grpc-ecosystem/go-grpc-middleware/auth"
grpc_recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery"
grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
log "github.com/sirupsen/logrus"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/status"

"github.com/G-Research/armada/internal/armada/authorization"
)
Expand All @@ -24,10 +29,19 @@ func CreateGrpcServer(authServices []authorization.AuthService) *grpc.Server {
unaryInterceptors = append(unaryInterceptors, grpc_prometheus.UnaryServerInterceptor)
streamInterceptors = append(streamInterceptors, grpc_prometheus.StreamServerInterceptor)

recovery := grpc_recovery.WithRecoveryHandler(panicRecoveryHandler)
unaryInterceptors = append(unaryInterceptors, grpc_recovery.UnaryServerInterceptor(recovery))
streamInterceptors = append(streamInterceptors, grpc_recovery.StreamServerInterceptor(recovery))

return grpc.NewServer(
grpc.KeepaliveParams(keepalive.ServerParameters{
MaxConnectionIdle: 5 * time.Minute,
}),
grpc.StreamInterceptor(grpc_middleware.ChainStreamServer(streamInterceptors...)),
grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer(unaryInterceptors...)))
}

func panicRecoveryHandler(p interface{}) (err error) {
log.Errorf("Request triggered panic with cause %v \n%s", p, string(debug.Stack()))
return status.Errorf(codes.Internal, "Internal server error caused by %v", p)
}

0 comments on commit 0cd4e26

Please sign in to comment.