From 4064c074a2a51ee83079346cd7105c118740f74b Mon Sep 17 00:00:00 2001
From: Sylvain Lebresne <sylvain@apollographql.com>
Date: Tue, 13 Sep 2022 17:53:30 +0200
Subject: [PATCH] Fix bug in patch for #2137 (#2140)

The code from #2137 is optimizing some use of __typename
when other fields are queried alongside it. But when
__typename had only inline fragments alongside it,
the logic was flawed and was discarding any potential
instance of the optimization done somewhere more
nested. This fixes that problem.

Additionally, this patch adds a test for the optimization
of #2137, but to do that, this patch adds a new behaviour
to the query planner whereby along the generation of the
plan, it also exposes some statistics on the plan generation.
As of this commit, the only thing exposed is the number
of plan that were evaluated under the hood, as that is
what we care to check here (but it is also one of the main
contributor to time spent query planning, so arguably an
important statistic).
---
 query-planner-js/CHANGELOG.md                 |   5 +-
 .../src/__tests__/buildPlan.test.ts           | 153 ++++++++++++++++++
 query-planner-js/src/buildPlan.ts             |  74 +++++++--
 query-planner-js/src/index.ts                 |  11 +-
 4 files changed, 223 insertions(+), 20 deletions(-)

diff --git a/query-planner-js/CHANGELOG.md b/query-planner-js/CHANGELOG.md
index c65f712de..7b14ed878 100644
--- a/query-planner-js/CHANGELOG.md
+++ b/query-planner-js/CHANGELOG.md
@@ -4,8 +4,11 @@ This CHANGELOG pertains only to Apollo Federation packages in the 2.x range. The
 
 ## vNext
 
+- Fix issue with path #2137 (optimization for `__typename`) [PR #2140](https://github.com/apollographql/federation/pull/2140).
+
 ## 2.1.2-alpha.1
-- Fix potential inefficient planning due to __typename [PR #2137](https://github.com/apollographql/federation/pull/2137).
+
+- Fix potential inefficient planning due to `__typename` [PR #2137](https://github.com/apollographql/federation/pull/2137).
 - Fix potential assertion during query planning [PR #2133](https://github.com/apollographql/federation/pull/2133).
 
 ## 2.1.2-alpha.0
diff --git a/query-planner-js/src/__tests__/buildPlan.test.ts b/query-planner-js/src/__tests__/buildPlan.test.ts
index 9f3de5658..6b631242a 100644
--- a/query-planner-js/src/__tests__/buildPlan.test.ts
+++ b/query-planner-js/src/__tests__/buildPlan.test.ts
@@ -4055,4 +4055,157 @@ describe('__typename handling', () => {
       }
     `);
   });
+
+  it('does not needlessly consider options for __typename', () => {
+    const subgraph1 = {
+      name: 'Subgraph1',
+      typeDefs: gql`
+        type Query {
+          s: S
+        }
+
+        type S @key(fields: "id") {
+          id: ID
+        }
+      `
+    }
+
+    const subgraph2 = {
+      name: 'Subgraph2',
+      typeDefs: gql`
+        type S @key(fields: "id") {
+          id: ID
+          t: T @shareable
+        }
+
+        type T @key(fields: "id") {
+          id: ID!
+          x: Int
+        }
+      `
+    }
+
+    const subgraph3 = {
+      name: 'Subgraph3',
+      typeDefs: gql`
+        type S @key(fields: "id") {
+          id: ID
+          t: T @shareable
+        }
+
+        type T @key(fields: "id") {
+          id: ID!
+          y: Int
+        }
+      `
+    }
+
+    const [api, queryPlanner] = composeAndCreatePlanner(subgraph1, subgraph2, subgraph3);
+    // This tests the patch from https://github.com/apollographql/federation/pull/2137.
+    // Namely, the schema is such that `x` can only be fetched from one subgraph, but
+    // technically __typename can be fetched from 2 subgraphs. However, the optimization
+    // we test for is that we actually don't consider both choices for __typename and
+    // instead only evaluate a single query plan (the assertion on `evaluatePlanCount`)
+    let operation = operationFromDocument(api, gql`
+      query {
+        s {
+          t {
+            __typename
+            x
+          }
+        }
+      }
+    `);
+
+    let plan = queryPlanner.buildQueryPlan(operation);
+    expect(queryPlanner.lastGeneratedPlanStatistics()?.evaluatedPlanCount).toBe(1);
+    expect(plan).toMatchInlineSnapshot(`
+      QueryPlan {
+        Sequence {
+          Fetch(service: "Subgraph1") {
+            {
+              s {
+                __typename
+                id
+              }
+            }
+          },
+          Flatten(path: "s") {
+            Fetch(service: "Subgraph2") {
+              {
+                ... on S {
+                  __typename
+                  id
+                }
+              } =>
+              {
+                ... on S {
+                  t {
+                    __typename
+                    x
+                  }
+                }
+              }
+            },
+          },
+        },
+      }
+    `);
+
+    // Almost the same test, but we artificially create a case where the result set
+    // for `s` has a __typename alongside just an inline fragments. This should
+    // change nothing to the example (the __typename on `s` is trivially fetched
+    // from the 1st subgraph and does not create new choices), but an early bug
+    // in the implementation made this example forgo the optimization of the
+    // __typename within `t`. We make sure this is not case (that we still only
+    // consider a single choice of plan).
+    operation = operationFromDocument(api, gql`
+      query {
+        s {
+          __typename
+          ... on S {
+            t {
+              __typename
+              x
+            }
+          }
+        }
+      }
+    `);
+
+    plan = queryPlanner.buildQueryPlan(operation);
+    expect(queryPlanner.lastGeneratedPlanStatistics()?.evaluatedPlanCount).toBe(1);
+    expect(plan).toMatchInlineSnapshot(`
+      QueryPlan {
+        Sequence {
+          Fetch(service: "Subgraph1") {
+            {
+              s {
+                __typename
+                id
+              }
+            }
+          },
+          Flatten(path: "s") {
+            Fetch(service: "Subgraph2") {
+              {
+                ... on S {
+                  __typename
+                  id
+                }
+              } =>
+              {
+                ... on S {
+                  t {
+                    __typename
+                    x
+                  }
+                }
+              }
+            },
+          },
+        },
+      }
+    `);
+  });
 });
diff --git a/query-planner-js/src/buildPlan.ts b/query-planner-js/src/buildPlan.ts
index 36df699bd..b5dce4abe 100644
--- a/query-planner-js/src/buildPlan.ts
+++ b/query-planner-js/src/buildPlan.ts
@@ -230,6 +230,7 @@ class QueryPlanningTaversal<RV extends Vertex> {
     readonly startFetchIdGen: number,
     readonly hasDefers: boolean,
     readonly variableDefinitions: VariableDefinitions,
+    private readonly statistics: PlanningStatistics | undefined,
     private readonly startVertex: RV,
     private readonly rootKind: SchemaRootKind,
     readonly costFunction: CostFunction,
@@ -445,6 +446,10 @@ class QueryPlanningTaversal<RV extends Vertex> {
       debug.log(() => `Reduced plans to consider to ${planCount} plans`);
     }
 
+    if (this.statistics) {
+      this.statistics.evaluatedPlanCount += planCount;
+    }
+
     debug.log(() => `All branches:${this.closedBranches.map((opts, i) => `\n${i}:${opts.map((opt => `\n - ${simultaneousPathsToString(opt)}`))}`)}`);
 
     // Note that usually, we'll have a majority of branches with just one option. We can group them in
@@ -528,6 +533,7 @@ class QueryPlanningTaversal<RV extends Vertex> {
       0,
       false,
       this.variableDefinitions,
+      undefined,
       edge.head,
       'query',
       this.costFunction,
@@ -1974,19 +1980,24 @@ function optimizeSiblingTypenames(selectionSet: SelectionSet): SelectionSet {
     }
   }
 
-  if (!updatedSelections || (typenameSelection && !firstFieldSelection)) {
-    // This means that either no selection was modified at all, or there is no other field selection than __typename one.
-    // In both case, we want to return the current selectionSet unmodified.
+  if (!updatedSelections || updatedSelections.length === 0) {
+    // No selection was modified at all, or there is no other field selection than __typename one.
+    // In both case, we just return the current selectionSet unmodified.
     return selectionSet;
   }
 
-  // If we have some __typename selection that was removed but need to be "remembered" for later, "tag" whichever first
-  // field selection is still part of the operation (what we tag doesn't matter since again, __typename can be queried from
-  // anywhere and that has no performance impact).
+  // If we have some __typename selection that was removed but need to be "remembered" for later,
+  // "tag" whichever first field selection is still part of the operation.
   if (typenameSelection) {
-    assert(firstFieldSelection, 'Should not have got here');
-    // Note that as we tag the element, we also record the alias used if any since that needs to be preserved.
-    firstFieldSelection.element().addAttachement(SIBLING_TYPENAME_KEY, typenameSelection.field.alias ? typenameSelection.field.alias : '');
+    if (firstFieldSelection) {
+      // Note that as we tag the element, we also record the alias used if any since that needs to be preserved.
+      firstFieldSelection.element().addAttachement(SIBLING_TYPENAME_KEY, typenameSelection.field.alias ? typenameSelection.field.alias : '');
+    } else {
+      // If we have no other field selection, then we can't optimize __typename and we need to add
+      // it back to the updated subselections (we add it first because that's usually where we
+      // put __typename by convention).
+      updatedSelections = [typenameSelection as Selection].concat(updatedSelections);
+    }
   }
   return new SelectionSet(selectionSet.parentType, selectionSet.fragments).addAll(updatedSelections)
 }
@@ -2007,6 +2018,10 @@ function withSiblingTypenameOptimizedAway(operation: Operation): Operation {
   );
 }
 
+export type PlanningStatistics = {
+  evaluatedPlanCount: number,
+}
+
 export function computeQueryPlan({
   config,
   supergraphSchema,
@@ -2017,7 +2032,10 @@ export function computeQueryPlan({
   supergraphSchema: Schema,
   federatedQueryGraph: QueryGraph,
   operation: Operation,
-}): QueryPlan {
+}): {
+  plan: QueryPlan,
+  statistics: PlanningStatistics,
+} {
   if (operation.rootKind === 'subscription') {
     throw ERRORS.UNSUPPORTED_FEATURE.err(
       'Query planning does not currently support subscriptions.',
@@ -2025,6 +2043,10 @@ export function computeQueryPlan({
     );
   }
 
+  const statistics: PlanningStatistics = {
+    evaluatedPlanCount: 0,
+  };
+
   const reuseQueryFragments = config.reuseQueryFragments ?? true;
   let fragments = operation.selectionSet.fragments
   if (fragments && reuseQueryFragments) {
@@ -2057,7 +2079,10 @@ export function computeQueryPlan({
   debug.group(() => `Computing plan for\n${operation}`);
   if (operation.selectionSet.isEmpty()) {
     debug.groupEnd('Empty plan');
-    return { kind: 'QueryPlan' };
+    return {
+      plan: { kind: 'QueryPlan' },
+      statistics,
+    };
   }
 
   const root = federatedQueryGraph.root(operation.rootKind);
@@ -2081,6 +2106,7 @@ export function computeQueryPlan({
       processor,
       root,
       deferConditions,
+      statistics,
     })
   } else {
     rootNode = computePlanInternal({
@@ -2090,11 +2116,15 @@ export function computeQueryPlan({
       processor,
       root,
       hasDefers,
+      statistics,
     });
   }
 
   debug.groupEnd('Query plan computed');
-  return { kind: 'QueryPlan', node: rootNode };
+  return {
+    plan: { kind: 'QueryPlan', node: rootNode },
+    statistics,
+  };
 }
 
 function computePlanInternal({
@@ -2104,6 +2134,7 @@ function computePlanInternal({
   processor,
   root,
   hasDefers,
+  statistics,
 }: {
   supergraphSchema: Schema,
   federatedQueryGraph: QueryGraph,
@@ -2111,9 +2142,10 @@ function computePlanInternal({
   processor: FetchGroupProcessor<PlanNode | undefined, DeferredNode>
   root: RootVertex,
   hasDefers: boolean,
+  statistics: PlanningStatistics,
 }): PlanNode | undefined {
   if (operation.rootKind === 'mutation') {
-    const dependencyGraphs = computeRootSerialDependencyGraph(supergraphSchema, operation, federatedQueryGraph, root, hasDefers);
+    const dependencyGraphs = computeRootSerialDependencyGraph(supergraphSchema, operation, federatedQueryGraph, root, hasDefers, statistics);
     let allMain: (PlanNode | undefined)[] = [];
     let allDeferred: DeferredNode[] = [];
     let primarySelection: SelectionSet | undefined = undefined;
@@ -2138,7 +2170,7 @@ function computePlanInternal({
       deferred: allDeferred,
     });
   } else {
-    const dependencyGraph =  computeRootParallelDependencyGraph(supergraphSchema, operation, federatedQueryGraph, root, 0, hasDefers);
+    const dependencyGraph =  computeRootParallelDependencyGraph(supergraphSchema, operation, federatedQueryGraph, root, 0, hasDefers, statistics);
     const { main, deferred } = dependencyGraph.process(processor);
     return processRootNodes({
       processor,
@@ -2157,6 +2189,7 @@ function computePlanForDeferConditionals({
   processor,
   root,
   deferConditions,
+  statistics,
 }: {
   supergraphSchema: Schema,
   federatedQueryGraph: QueryGraph,
@@ -2164,6 +2197,7 @@ function computePlanForDeferConditionals({
   processor: FetchGroupProcessor<PlanNode | undefined, DeferredNode>
   root: RootVertex,
   deferConditions: SetMultiMap<string, string>,
+  statistics: PlanningStatistics,
 }): PlanNode | undefined {
   return generateConditionNodes(
     operation,
@@ -2176,6 +2210,7 @@ function computePlanForDeferConditionals({
       processor,
       root,
       hasDefers: true,
+      statistics,
     }),
   );
 }
@@ -2291,6 +2326,7 @@ function computeRootParallelDependencyGraph(
   root: RootVertex,
   startFetchIdGen: number,
   hasDefer: boolean,
+  statistics: PlanningStatistics,
 ): FetchDependencyGraph {
   return computeRootParallelBestPlan(
     supergraphSchema,
@@ -2300,6 +2336,7 @@ function computeRootParallelDependencyGraph(
     root,
     startFetchIdGen,
     hasDefer,
+    statistics,
   )[0];
 }
 
@@ -2311,6 +2348,7 @@ function computeRootParallelBestPlan(
   root: RootVertex,
   startFetchIdGen: number,
   hasDefers: boolean,
+  statistics: PlanningStatistics,
 ): [FetchDependencyGraph, OpPathTree<RootVertex>, number] {
   const planningTraversal = new QueryPlanningTaversal(
     supergraphSchema,
@@ -2319,10 +2357,11 @@ function computeRootParallelBestPlan(
     startFetchIdGen,
     hasDefers,
     variables,
+    statistics,
     root,
     root.rootKind,
     defaultCostFunction,
-    emptyContext
+    emptyContext,
   );
   const plan = planningTraversal.findBestPlan();
   // Getting no plan means the query is essentially unsatisfiable (it's a valid query, but we can prove it will never return a result),
@@ -2353,16 +2392,17 @@ function computeRootSerialDependencyGraph(
   federatedQueryGraph: QueryGraph,
   root: RootVertex,
   hasDefers: boolean,
+  statistics: PlanningStatistics,
 ): FetchDependencyGraph[] {
   const rootType = hasDefers ? supergraphSchema.schemaDefinition.rootType(root.rootKind) : undefined;
   // We have to serially compute a plan for each top-level selection.
   const splittedRoots = splitTopLevelFields(operation.selectionSet);
   const graphs: FetchDependencyGraph[] = [];
   let startingFetchId: number = 0;
-  let [prevDepGraph, prevPaths] = computeRootParallelBestPlan(supergraphSchema, splittedRoots[0], operation.variableDefinitions, federatedQueryGraph, root, startingFetchId, hasDefers);
+  let [prevDepGraph, prevPaths] = computeRootParallelBestPlan(supergraphSchema, splittedRoots[0], operation.variableDefinitions, federatedQueryGraph, root, startingFetchId, hasDefers, statistics);
   let prevSubgraph = onlyRootSubgraph(prevDepGraph);
   for (let i = 1; i < splittedRoots.length; i++) {
-    const [newDepGraph, newPaths] = computeRootParallelBestPlan(supergraphSchema, splittedRoots[i], operation.variableDefinitions, federatedQueryGraph, root, prevDepGraph.nextFetchId(), hasDefers);
+    const [newDepGraph, newPaths] = computeRootParallelBestPlan(supergraphSchema, splittedRoots[i], operation.variableDefinitions, federatedQueryGraph, root, prevDepGraph.nextFetchId(), hasDefers, statistics);
     const newSubgraph = onlyRootSubgraph(newDepGraph);
     if (prevSubgraph === newSubgraph) {
       // The new operation (think 'mutation' operation) is on the same subgraph than the previous one, so we can concat them in a single fetch
diff --git a/query-planner-js/src/index.ts b/query-planner-js/src/index.ts
index e47d01023..9362024a4 100644
--- a/query-planner-js/src/index.ts
+++ b/query-planner-js/src/index.ts
@@ -6,7 +6,7 @@ import { QueryPlan } from './QueryPlan';
 
 import { Schema, Operation, Concrete } from '@apollo/federation-internals';
 import { buildFederatedQueryGraph, QueryGraph } from "@apollo/query-graphs";
-import { computeQueryPlan } from './buildPlan';
+import { computeQueryPlan, PlanningStatistics } from './buildPlan';
 import { enforceQueryPlannerConfigDefaults, QueryPlannerConfig } from './config';
 
 export { QueryPlannerConfig } from './config';
@@ -14,6 +14,7 @@ export { QueryPlannerConfig } from './config';
 export class QueryPlanner {
   private readonly config: Concrete<QueryPlannerConfig>;
   private readonly federatedQueryGraph: QueryGraph;
+  private _lastGeneratedPlanStatistics: PlanningStatistics | undefined;
 
   constructor(
     public readonly supergraphSchema: Schema,
@@ -28,11 +29,17 @@ export class QueryPlanner {
       return { kind: 'QueryPlan' };
     }
 
-    return computeQueryPlan({
+    const {plan, statistics} = computeQueryPlan({
       config: this.config,
       supergraphSchema: this.supergraphSchema,
       federatedQueryGraph: this.federatedQueryGraph,
       operation,
     });
+    this._lastGeneratedPlanStatistics = statistics;
+    return plan;
+  }
+
+  lastGeneratedPlanStatistics(): PlanningStatistics | undefined {
+    return this._lastGeneratedPlanStatistics;
   }
 }