Skip to content

Commit

Permalink
adding source of truth
Browse files Browse the repository at this point in the history
  • Loading branch information
udaij12 committed Sep 23, 2024
1 parent 9a00295 commit 970db0e
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,23 @@
public class ModelConfig {
private static final Logger logger = LoggerFactory.getLogger(ModelConfig.class);

public static final int defaultMinWorkers = 1;
public static final int defaultBatchSize = 1;
public static final int defaultStartupTimeout = 120; // unit: sec
public static final int defaultResponseTimeout = 120; // unit: sec

/** the minimum number of workers of a model */
private int minWorkers;
private int minWorkers = defaultMinWorkers;
/** the maximum number of workers of a model */
private int maxWorkers;
/** the batch size of a model */
private int batchSize;
private int batchSize = defaultBatchSize;
/** the maximum delay in msec of a batch of a model */
private int maxBatchDelay;
/** the timeout in sec of a specific model's response. */
private int responseTimeout = 120; // unit: sec
private int responseTimeout = defaultResponseTimeout;
/** the timeout in sec of a specific model's startup. */
private int startupTimeout = 120; // unit: sec
private int startupTimeout = defaultStartupTimeout;
/**
* the device type where the model is loaded. It can be gpu, cpu. The model is loaded on CPU if
* deviceType: "cpu" is set on a GPU host.
Expand Down
23 changes: 6 additions & 17 deletions frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -193,32 +193,21 @@ public void setModelState(JsonObject modelInfo) {
minWorkers =
modelInfo.has(MIN_WORKERS) && !modelInfo.get(MIN_WORKERS).isJsonNull()
? modelInfo.get(MIN_WORKERS).getAsInt()
: 1; // default value for minWorkers

maxWorkers =
modelInfo.has(MAX_WORKERS) && !modelInfo.get(MAX_WORKERS).isJsonNull()
? modelInfo.get(MAX_WORKERS).getAsInt()
: 5; // default value for maxWorkers

maxBatchDelay =
modelInfo.has(MAX_BATCH_DELAY) && !modelInfo.get(MAX_BATCH_DELAY).isJsonNull()
? modelInfo.get(MAX_BATCH_DELAY).getAsInt()
: 100; // default value for maxBatchDelay

: modelArchive.getModelConfig().defaultMinWorkers; // default value for minWorkers
maxWorkers = modelInfo.get(MAX_WORKERS).getAsInt();
maxBatchDelay = modelInfo.get(MAX_BATCH_DELAY).getAsInt();
responseTimeout =
modelInfo.has(RESPONSE_TIMEOUT) && !modelInfo.get(RESPONSE_TIMEOUT).isJsonNull()
? modelInfo.get(RESPONSE_TIMEOUT).getAsInt()
: 120; // default value for responseTimeout

: modelArchive.getModelConfig().defaultResponseTimeout; // default value for responseTimeout
startupTimeout =
modelInfo.has(STARTUP_TIMEOUT) && !modelInfo.get(STARTUP_TIMEOUT).isJsonNull()
? modelInfo.get(STARTUP_TIMEOUT).getAsInt()
: 120; // default value for startupTimeout

: modelArchive.getModelConfig().defaultStartupTimeout; // default value for startupTimeout
batchSize =
modelInfo.has(BATCH_SIZE) && !modelInfo.get(BATCH_SIZE).isJsonNull()
? modelInfo.get(BATCH_SIZE).getAsInt()
: 1; // default value for batchSize
: modelArchive.getModelConfig().defaultBatchSize; // default value for batchSize

JsonElement runtime = modelInfo.get(RUNTIME_TYPE);
String runtime_str = Manifest.RuntimeType.PYTHON.getValue();
Expand Down

0 comments on commit 970db0e

Please sign in to comment.