From 021b3c949c7c462a21249107e2ffa65ba7546680 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 3 Feb 2023 11:48:31 +0100 Subject: [PATCH] rapide: fix: make unexpected blocks a retriable error In 719b5e64af256916c4b5a275e93e63ca455a34c0 I changed `download.expand` to release the node lock before returning. This allows other workers to access the just expanded node while we walk up the chain ancestry. This mean that when we relock the node later to add it to our list of tasks, someone else may have downloaded some of the blocks already and thus they are not in the childrens slice and thus we wont add them to our list. This also solve the case where you give a Traversal that is not supported by the underlying protocol, in that case we want to kill the current download and retry deeper, while previously we would hard error on this and kill the worker. --- rapide/serverdriven.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rapide/serverdriven.go b/rapide/serverdriven.go index 7186017a6..c5152190e 100644 --- a/rapide/serverdriven.go +++ b/rapide/serverdriven.go @@ -53,7 +53,7 @@ func (w *serverDrivenWorker) work(ctx context.Context) { err := w.doOneDownload(ctx, workCid, traversal) switch err { - case nil, io.EOF, errGotDoneBlock: + case nil, io.EOF, errGotDoneBlock, errGotUnexpectedBlock: w.resetCurrentChildsNodeWorkState() continue default: @@ -67,7 +67,7 @@ func (w *serverDrivenWorker) work(ctx context.Context) { } } -var errUnexpectedBlock = errors.New("got an unexpected block") +var errGotUnexpectedBlock = errors.New("got an unexpected block") var errGotDoneBlock = errors.New("downloaded an already done node") // doOneDownload will return nil when it does not find work @@ -107,7 +107,7 @@ func (w *serverDrivenWorker) doOneDownload(ctx context.Context, workCid cid.Cid, task, ok := w.tasks[c] if !ok { // received unexpected block - return errUnexpectedBlock + return errGotUnexpectedBlock } delete(w.tasks, c)