Skip to content

Commit

Permalink
DAOS-13380 engine: test tgt_nr
Browse files Browse the repository at this point in the history
Test-tag: daily_regression full_regression

Required-githooks: true

Signed-off-by: Xuezhao Liu <xuezhao.liu@intel.com>
  • Loading branch information
liuxuezhao committed Jul 5, 2023
1 parent 008a9ba commit 8ffd40b
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 62 deletions.
6 changes: 4 additions & 2 deletions src/control/server/ctl_storage_rpc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1569,7 +1569,10 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) {
var engineCfgs []*engine.Config
for i, sc := range tc.storageCfgs {
log.Debugf("storage cfg contains bdevs %v for engine %d", sc.Bdevs(), i)
engineCfgs = append(engineCfgs, engine.MockConfig().WithStorage(sc...))
engineCfgs = append(engineCfgs,
engine.MockConfig().
WithStorage(sc...).
WithTargetCount(tc.engineTargetCount[i]))
}
sCfg := config.DefaultServer().WithEngines(engineCfgs...)
cs := mockControlService(t, log, sCfg, csbmbc, tc.smbc, tc.smsc)
Expand Down Expand Up @@ -1625,7 +1628,6 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) {
}
te.setDrpcClient(newMockDrpcClient(dcc))
te._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1))
te.setTargetCount(tc.engineTargetCount[idx])
for _, tc := range te.storage.GetBdevConfigs() {
tc.Bdev.DeviceRoles.OptionBits = storage.OptionBits(storage.BdevRoleAll)
}
Expand Down
8 changes: 0 additions & 8 deletions src/control/server/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,6 @@ func (ei *EngineInstance) setHugepageSz(hpSizeMb int) {
ei.runner.GetConfig().HugepageSz = hpSizeMb
}

// setTargetCount updates target count in engine config.
func (ei *EngineInstance) setTargetCount(numTargets int) {
ei.Lock()
defer ei.Unlock()

ei.runner.GetConfig().TargetCount = numTargets
}

// GetTargetCount returns the target count set for this instance.
func (ei *EngineInstance) GetTargetCount() int {
ei.RLock()
Expand Down
6 changes: 0 additions & 6 deletions src/control/server/instance_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,6 @@ func (ei *EngineInstance) finishStartup(ctx context.Context, ready *srvpb.Notify
if err := ei.handleReady(ctx, ready); err != nil {
return err
}
// update engine target count to reflect allocated number of targets, not number requested
// when starting
// NOTE: Engine mem_size passed on engine invocation is based on the number of targets
// requested in config so if number of targets allocated doesn't match the number of
// targets requested the mem_size value may be inappropriate.
ei.setTargetCount(int(ready.GetNtgts()))

ei.ready.SetTrue()

Expand Down
91 changes: 46 additions & 45 deletions src/engine/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@
static char modules[MAX_MODULE_OPTIONS + 1];

/**
* Number of target threads the user would like to start
* 0 means default value, see dss_tgt_nr_get();
* Number of target threads the user would like to start.
*/
static unsigned int nr_threads;

Expand Down Expand Up @@ -250,56 +249,61 @@ modules_load(void)
return rc;
}

static unsigned int
ncores_needed(unsigned int tgt_nr, unsigned int nr_helpers)
{
return DAOS_TGT0_OFFSET + tgt_nr + nr_helpers;
}

/**
* Get the appropriate number of main XS based on the number of cores and
* passed in preferred number of threads.
* Check if the #targets and #nr_xs_helpers is valid to start server, the #nr_xs_helpers possibly
* be reduced.
*/
static int
dss_tgt_nr_get(unsigned int ncores, unsigned int nr, bool oversubscribe)
dss_tgt_nr_check(unsigned int ncores, unsigned int tgt_nr, bool oversubscribe)
{
int tgt_nr;

D_ASSERT(ncores >= 1);

/* at most 2 helper XS per target */
if (dss_tgt_offload_xs_nr > 2 * nr)
dss_tgt_offload_xs_nr = 2 * nr;
else if (dss_tgt_offload_xs_nr == 0)
if (dss_tgt_offload_xs_nr > 2 * tgt_nr) {
D_PRINT("#nr_xs_helpers(%d) cannot exceed 2 times #targets (2 x %d = %d).\n",
dss_tgt_offload_xs_nr, tgt_nr, 2 * tgt_nr);
dss_tgt_offload_xs_nr = 2 * tgt_nr;
} else if (dss_tgt_offload_xs_nr == 0) {
D_WARN("Suggest to config at least 1 helper XS per DAOS engine\n");
}

/* Each system XS uses one core, and with dss_tgt_offload_xs_nr
* offload XS. Calculate the tgt_nr as the number of main XS based
* on number of cores.
*/
retry:
tgt_nr = ncores - DAOS_TGT0_OFFSET - dss_tgt_offload_xs_nr;
if (tgt_nr <= 0)
tgt_nr = 1;

/* If user requires less target threads then set it as dss_tgt_nr,
* if user oversubscribes, then:
* . if oversubscribe is enabled, use the required number
* . if oversubscribe is disabled(default),
* use the number calculated above
* Note: oversubscribing may hurt performance.
*/
if (nr >= 1 && ((nr < tgt_nr) || oversubscribe)) {
tgt_nr = nr;
if (dss_tgt_offload_xs_nr > 2 * tgt_nr)
dss_tgt_offload_xs_nr = 2 * tgt_nr;
} else if (dss_tgt_offload_xs_nr > 2 * tgt_nr) {
dss_tgt_offload_xs_nr--;
goto retry;
if (oversubscribe) {
if (ncores_needed(tgt_nr, dss_tgt_offload_xs_nr) > ncores) {
if (ncores > DAOS_TGT0_OFFSET + tgt_nr)
dss_tgt_offload_xs_nr = ncores - DAOS_TGT0_OFFSET - tgt_nr;
else
dss_tgt_offload_xs_nr = 0;

D_PRINT("Force to start engine with %d targets on %d cores, #nr_xs_helpers "
"set as %d.\n",
tgt_nr, ncores, dss_tgt_offload_xs_nr);
}
goto out;
}

if (tgt_nr != nr)
D_PRINT("%d target XS(xstream) requested (#cores %d); "
"use (%d) target XS\n", nr, ncores, tgt_nr);
if (ncores_needed(tgt_nr, dss_tgt_offload_xs_nr) > ncores) {
if (ncores < DAOS_TGT0_OFFSET + tgt_nr) {
D_ERROR("cannot start engine with %d targets on %d cores, may try with "
"DAOS_TARGET_OVERSUBSCRIBE=1\n",
tgt_nr, ncores);
return -DER_INVAL;
}
dss_tgt_offload_xs_nr = ncores - DAOS_TGT0_OFFSET - tgt_nr;
D_PRINT("Start engine with %d targets on %d cores, #nr_xs_helpers set as %d.\n",
tgt_nr, ncores, dss_tgt_offload_xs_nr);
}

out:
if (dss_tgt_offload_xs_nr % tgt_nr != 0)
dss_helper_pool = true;

return tgt_nr;
return 0;
}

static int
Expand All @@ -321,22 +325,21 @@ dss_topo_init()
depth = hwloc_get_type_depth(dss_topo, HWLOC_OBJ_NUMANODE);
numa_node_nr = hwloc_get_nbobjs_by_depth(dss_topo, depth);
d_getenv_bool("DAOS_TARGET_OVERSUBSCRIBE", &tgt_oversub);
dss_tgt_nr = nr_threads;

/* if no NUMA node was specified, or NUMA data unavailable */
/* fall back to the legacy core allocation algorithm */
if (dss_numa_node == -1 || numa_node_nr <= 0) {
D_PRINT("Using legacy core allocation algorithm\n");
dss_tgt_nr = dss_tgt_nr_get(dss_core_nr, nr_threads,
tgt_oversub);

if (dss_core_offset >= dss_core_nr) {
D_ERROR("invalid dss_core_offset %u "
"(set by \"-f\" option),"
" should within range [0, %u]",
dss_core_offset, dss_core_nr - 1);
return -DER_INVAL;
}
return 0;

return dss_tgt_nr_check(dss_core_nr, dss_tgt_nr, tgt_oversub);
}

if (dss_numa_node > numa_node_nr) {
Expand Down Expand Up @@ -381,17 +384,15 @@ dss_topo_init()
hwloc_bitmap_asprintf(&cpuset, core_allocation_bitmap);
free(cpuset);

dss_tgt_nr = dss_tgt_nr_get(dss_num_cores_numa_node, nr_threads,
tgt_oversub);
if (dss_core_offset >= dss_num_cores_numa_node) {
D_ERROR("invalid dss_core_offset %d (set by \"-f\" option), "
"should within range [0, %d]", dss_core_offset,
dss_num_cores_numa_node - 1);
return -DER_INVAL;
}

D_PRINT("Using NUMA core allocation algorithm\n");
return 0;

return dss_tgt_nr_check(dss_num_cores_numa_node, dss_tgt_nr, tgt_oversub);
}

static ABT_mutex server_init_state_mutex;
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/control/dmg_server_set_logmasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ server_config:
engines_per_host: 1
engines:
0:
targets: 4
storage:
0:
class: ram
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/harness/core_files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ server_config:
engines_per_host: 1
engines:
0:
targets: 4
storage:
0:
class: ram
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/pool/create_all_vm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ server_config:
engines_per_host: 1
engines:
0:
targets: 5
targets: 4
nr_xs_helpers: 0
storage:
0:
Expand Down
1 change: 1 addition & 0 deletions utils/nlt_server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ engines:
- DAOS_MD_CAP=1024
- DAOS_STRICT_SHUTDOWN=1
- CRT_CTX_SHARE_ADDR=0
- DAOS_TARGET_OVERSUBSCRIBE=1
- ABT_STACK_OVERFLOW_CHECK=mprotect
storage:
-
Expand Down

0 comments on commit 8ffd40b

Please sign in to comment.