Skip to content

Commit

Permalink
Update the snapshot cache before closing (#15477)
Browse files Browse the repository at this point in the history
[AB#4261](https://dev.azure.com/fluidframework/235294da-091d-4c29-84fc-cdfc3d90890b/_workitems/edit/4261)

Follow up to closing the summarizer client instead of refreshing it when
it knows it's in a bad state. We determined that the parent client that
was re-elected usually the same main client that spawned it.

The telemetry indicated summarizer was loading from the same cache on
average 4-5 times. Closing and loading new summarizers from the same
cache from the same main container has a negative performance impact on
the user.

This PR attempts to provide a solution to that problem by updating the
cache with the latest snapshot before closing the container.

Other options considered were explicitly telling the main client's
summary manager to load the next summarizer with the latest snapshot.
This was deemed harder to implement.
  • Loading branch information
tyler-cai-microsoft authored May 5, 2023
1 parent 72a470a commit af82d51
Showing 1 changed file with 28 additions and 23 deletions.
51 changes: 28 additions & 23 deletions packages/runtime/container-runtime/src/containerRuntime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3377,29 +3377,7 @@ export class ContainerRuntime
readAndParseBlob: ReadAndParseBlob,
versionId: string | null,
): Promise<{ snapshotTree: ISnapshotTree; versionId: string; latestSnapshotRefSeq: number }> {
if (this.summaryStateUpdateMethod === "restart") {
const error = new GenericError("Restarting summarizer instead of refreshing");

this.mc.logger.sendTelemetryEvent(
{
...event,
eventName: "ClosingSummarizerOnSummaryStale",
codePath: event.eventName,
message: "Stopping fetch from storage",
versionId: versionId != null ? versionId : undefined,
closeSummarizerDelayMs: this.closeSummarizerDelayMs,
},
error,
);

// Delay 10 seconds before restarting summarizer to prevent the summarizer from restarting too frequently.
await delay(this.closeSummarizerDelayMs);
this._summarizer?.stop("latestSummaryStateStale");
this.closeFn();
throw error;
}

return PerformanceEvent.timedExecAsync(
const snapshotResults = await PerformanceEvent.timedExecAsync(
logger,
event,
async (perfEvent: {
Expand Down Expand Up @@ -3445,6 +3423,33 @@ export class ContainerRuntime
};
},
);

// We choose to close the summarizer after the snapshot cache is updated to avoid
// situations which the main client (which is likely to be re-elected as the leader again)
// loads the summarizer from cache.
if (this.summaryStateUpdateMethod === "restart") {
const error = new GenericError("Restarting summarizer instead of refreshing");

this.mc.logger.sendTelemetryEvent(
{
...event,
eventName: "ClosingSummarizerOnSummaryStale",
codePath: event.eventName,
message: "Stopping fetch from storage",
versionId: versionId != null ? versionId : undefined,
closeSummarizerDelayMs: this.closeSummarizerDelayMs,
},
error,
);

// Delay 10 seconds before restarting summarizer to prevent the summarizer from restarting too frequently.
await delay(this.closeSummarizerDelayMs);
this._summarizer?.stop("latestSummaryStateStale");
this.closeFn();
throw error;
}

return snapshotResults;
}

public notifyAttaching() {} // do nothing (deprecated method)
Expand Down

0 comments on commit af82d51

Please sign in to comment.