Docs for Realtime gRPC (#2018)

(cherry picked from commit b8ac916)
cortexlabs · Mar 30, 2021 · 1a8167c · 1a8167c
1 parent a90f19d
commit 1a8167c
Show file tree

Hide file tree

Showing 7 changed files with 474 additions and 43 deletions.
diff --git a/docs/start.md b/docs/start.md
@@ -21,7 +21,7 @@ cortex cluster up cluster.yaml
 cortex deploy apis.yaml
 ```
 
-* [RealtimeAPI](workloads/realtime/example.md) - create APIs that respond to prediction requests in real-time.
+* [RealtimeAPI](workloads/realtime/example.md) - create HTTP/gRPC APIs that respond to prediction requests in real-time.
 * [AsyncAPI](workloads/async/example.md) - create APIs that respond to prediction requests asynchronously.
 * [BatchAPI](workloads/batch/example.md) - create APIs that run distributed batch inference jobs.
 * [TaskAPI](workloads/task/example.md) - create APIs that run training or fine-tuning jobs.
diff --git a/docs/summary.md b/docs/summary.md
@@ -45,7 +45,7 @@
     * [Example](workloads/realtime/traffic-splitter/example.md)
     * [Configuration](workloads/realtime/traffic-splitter/configuration.md)
   * [Troubleshooting](workloads/realtime/troubleshooting.md)
-* [Async APIs](workloads/async/async.md)
+* [Async APIs](workloads/async/async-apis.md)
   * [Example](workloads/async/example.md)
   * [Predictor](workloads/async/predictors.md)
   * [Configuration](workloads/async/configuration.md)

diff --git a/docs/workloads/async/async.md → docs/workloads/async/async-apis.md b/docs/workloads/async/async.md → docs/workloads/async/async-apis.md
diff --git a/docs/workloads/realtime/configuration.md b/docs/workloads/realtime/configuration.md
@@ -19,7 +19,8 @@
 predictor:
   type: python
   path: <string>  # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
-  dependencies: # (optional)
+  protobuf_path: <string>  # path to a protobuf file (required if using gRPC)
+  dependencies:  # (optional)
     pip: <string>  # relative path to requirements.txt (default: requirements.txt)
     conda: <string>  # relative path to conda-packages.txt (default: conda-packages.txt)
     shell: <string>  # relative path to a shell script for system package installation (default: dependencies.sh)
@@ -52,7 +53,8 @@ predictor:
 predictor:
   type: tensorflow
   path: <string>  # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
-  dependencies: # (optional)
+  protobuf_path: <string>  # path to a protobuf file (required if using gRPC)
+  dependencies:  # (optional)
     pip: <string>  # relative path to requirements.txt (default: requirements.txt)
     conda: <string>  # relative path to conda-packages.txt (default: conda-packages.txt)
     shell: <string>  # relative path to a shell script for system package installation (default: dependencies.sh)
@@ -88,7 +90,8 @@ predictor:
 predictor:
   type: onnx
   path: <string>  # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
-  dependencies: # (optional)
+  protobuf_path: <string>  # path to a protobuf file (required if using gRPC)
+  dependencies:  # (optional)
     pip: <string>  # relative path to requirements.txt (default: requirements.txt)
     conda: <string>  # relative path to conda-packages.txt (default: conda-packages.txt)
     shell: <string>  # relative path to a shell script for system package installation (default: dependencies.sh)

diff --git a/docs/workloads/realtime/example.md b/docs/workloads/realtime/example.md
@@ -1,8 +1,10 @@
 # RealtimeAPI
 
-Create APIs that respond to prediction requests in real-time.
+## HTTP
 
-## Implement
+Create HTTP APIs that respond to prediction requests in real-time.
+
+### Implement
 
 ```bash
 mkdir text-generator && cd text-generator
@@ -41,32 +43,76 @@ torch
     gpu: 1
 ```
 
-## Deploy
+### Deploy
 
 ```bash
 cortex deploy text_generator.yaml
 ```
 
-## Monitor
+### Monitor
 
 ```bash
 cortex get text-generator --watch
 ```
 
-## Stream logs
+### Stream logs
 
 ```bash
 cortex logs text-generator
 ```
 
-## Make a request
+### Make a request
 
 ```bash
 curl http://***.elb.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}'
 ```
 
-## Delete
+### Delete
 
 ```bash
 cortex delete text-generator
 ```
+
+## gRPC
+
+To make the above API use gRPC as its protocol, make the following changes (the rest of the steps are the same):
+
+### Add protobuf file
+
+Create a `predictor.proto` file in your project's directory:
+
+```protobuf
+<!-- predictor.proto -->
+
+syntax = "proto3";
+package text_generator;
+
+service Predictor {
+    rpc Predict (Message) returns (Message);
+}
+
+message Message {
+    string text = 1;
+}
+```
+
+Set the `predictor.protobuf_path` field in the API spec to point to the `predictor.proto` file:
+
+```yaml
+# text_generator.yaml
+
+- name: text-generator
+  kind: RealtimeAPI
+  predictor:
+    type: python
+    path: predictor.py
+    protobuf_path: predictor.proto
+  compute:
+    gpu: 1
+```
+
+### Make a gRPC request
+
+```bash
+grpcurl -plaintext -proto predictor.proto -d '{"text": "hello-world"}' ***.elb.us-west-2.amazonaws.com:80 text_generator.Predictor/Predict
+```