forked from speechmatics/ctranslate2_triton_backend
-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.pbtxt
51 lines (50 loc) · 749 Bytes
/
config.pbtxt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
backend: "ctranslate2"
name: "facebook_m2m100_1.2B"
max_batch_size: 64
input [
{
name: "INPUT_IDS"
data_type: TYPE_INT32
dims: [ -1 ]
allow_ragged_batch: true
}
]
input [
{
name: "TARGET_PREFIX"
data_type: TYPE_INT32
dims: [ -1 ]
allow_ragged_batch: true
}
]
output [
{
name: "OUTPUT_IDS"
data_type: TYPE_INT32
dims: [ -1 ]
}
]
parameters [
{
key: "compute_type"
value {
string_value: "float16"
}
},
{
key: "max_decoding_length_multiple"
value {
string_value: "2"
}
},
{
key: "beam_size"
value {
string_value: "5"
}
}
]
instance_group [{ kind: KIND_GPU, count: 1 }]
dynamic_batching {
max_queue_delay_microseconds: 5000
}