diff --git a/models/Google--GcpGPT b/models/Google--GcpGPT
new file mode 100644
index 00000000..c130340c
--- /dev/null
+++ b/models/Google--GcpGPT
@@ -0,0 +1,41 @@
+deployment_config:
+  autoscaling_config:
+    min_replicas: 1
+    initial_replicas: 1
+    max_replicas: 8
+    target_num_ongoing_requests_per_replica: 24
+    metrics_interval_s: 10.0
+    look_back_period_s: 30.0
+    smoothing_factor: 0.5
+    downscale_delay_s: 300.0
+    upscale_delay_s: 15.0
+  max_concurrent_queries: 64
+  ray_actor_options:
+    resources:
+      accelerator_type_a10: 0.01
+engine_config:
+  model_id: google/flan-t5-base
+  hf_model_id: google/flan-t5-base
+  type: VLLMEngine
+  engine_kwargs:
+    trust_remote_code: true
+    max_num_batched_tokens: 4096
+    max_num_seqs: 64
+    gpu_memory_utilization: 0.95
+  max_total_tokens: 2048
+  generation:
+    prompt_format:
+      system: "{instruction}\n"
+      assistant: "### Response:\n{instruction}\n"
+      trailing_assistant: "### Response:\n"
+      user: "### Instruction:\n{instruction}\n"
+      default_system_message: "Below is an instruction that describes a task. Write a response that appropriately completes the request."
+      default_system_message: ""
+    stopping_sequences: ["### Response:", "### End"]
+scaling_config:
+  num_workers: 1
+  num_gpus_per_worker: 1
+  num_cpus_per_worker: 8
+  placement_strategy: "STRICT_PACK"
+  resources_per_worker:
+    accelerator_type_a10: 0.01