Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove stages from Pipeline API #5244

Merged
merged 15 commits into from
Jan 29, 2024
20 changes: 7 additions & 13 deletions dali/benchmark/caffe2_alexnet_bench.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -105,20 +105,17 @@ BENCHMARK_DEFINE_F(C2Alexnet, Caffe2Pipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
if (st.iterations() == 1 && pipelined) {
// We will start he processing for the next batch
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down Expand Up @@ -236,20 +233,17 @@ BENCHMARK_DEFINE_F(C2Alexnet, HybridPipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
if (st.iterations() == 1 && pipelined) {
// We will start he processing for the next batch
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down
18 changes: 6 additions & 12 deletions dali/benchmark/caffe_alexnet_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,20 +107,17 @@ BENCHMARK_DEFINE_F(Alexnet, CaffePipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
if (st.iterations() == 1 && pipelined) {
// We will start he processing for the next batch
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down Expand Up @@ -238,20 +235,17 @@ BENCHMARK_DEFINE_F(Alexnet, HybridPipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
if (st.iterations() == 1 && pipelined) {
// We will start he processing for the next batch
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down
6 changes: 2 additions & 4 deletions dali/benchmark/checkpointing_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,11 @@ class CheckpointingOverhead : public DALIBenchmark {
Workspace ws;

// Warmup
pipe->RunCPU();
pipe->RunGPU();
pipe->Run();
pipe->Outputs(&ws);

while (st.KeepRunning()) {
pipe->RunCPU();
pipe->RunGPU();
pipe->Run();
pipe->Outputs(&ws);
if (policy == CheckpointingPolicy::SaveEveryIter) {
volatile auto cpt = pipe->GetCheckpoint();
Expand Down
11 changes: 4 additions & 7 deletions dali/benchmark/decoder_bench.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -56,8 +56,7 @@ class DecoderBench : public DALIBenchmark {
// Run once to allocate the memory
Workspace ws;
pipe.SetExternalInput("raw_jpegs", data);
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
Expand All @@ -66,13 +65,11 @@ class DecoderBench : public DALIBenchmark {
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.SetExternalInput("raw_jpegs", data);
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}

pipe.SetExternalInput("raw_jpegs", data);
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations) {
Expand Down
11 changes: 4 additions & 7 deletions dali/benchmark/file_reader_alexnet_bench.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -106,20 +106,17 @@ BENCHMARK_DEFINE_F(FileReaderAlexnet, CaffePipe)(benchmark::State& st) { // NOLI

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
if (st.iterations() == 1 && pipelined) {
// We will start he processing for the next batch
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down
6 changes: 2 additions & 4 deletions dali/benchmark/file_reader_fast_forward_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ BENCHMARK_DEFINE_F(FileReaderFastForward, FastForward)(benchmark::State& st) { /

Workspace ws;
for (int i = 0; i < snapshot_at; i++) {
pipe->RunCPU();
pipe->RunGPU();
pipe->Run();
pipe->Outputs(&ws);
}

Expand All @@ -85,8 +84,7 @@ BENCHMARK_DEFINE_F(FileReaderFastForward, FastForward)(benchmark::State& st) { /
pipe2->RestoreFromCheckpoint(cpt);

st.PauseTiming();
pipe2->RunCPU();
pipe2->RunGPU();
pipe2->Run();
pipe2->Outputs(&ws);
st.ResumeTiming();
}
Expand Down
29 changes: 10 additions & 19 deletions dali/benchmark/resnet50_bench.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -107,8 +107,7 @@ BENCHMARK_DEFINE_F(RN50, C2Pipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
Expand All @@ -118,11 +117,9 @@ BENCHMARK_DEFINE_F(RN50, C2Pipe)(benchmark::State& st) { // NOLINT
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.SetExternalInput("raw_jpegs", data);
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down Expand Up @@ -242,8 +239,7 @@ BENCHMARK_DEFINE_F(RN50, HybridPipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
Expand All @@ -253,11 +249,9 @@ BENCHMARK_DEFINE_F(RN50, HybridPipe)(benchmark::State& st) { // NOLINT
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.SetExternalInput("raw_jpegs", data);
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down Expand Up @@ -355,8 +349,7 @@ BENCHMARK_DEFINE_F(RN50, nvJPEGPipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
Expand All @@ -366,11 +359,9 @@ BENCHMARK_DEFINE_F(RN50, nvJPEGPipe)(benchmark::State& st) { // NOLINT
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.SetExternalInput("raw_jpegs", data);
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down
11 changes: 4 additions & 7 deletions dali/benchmark/resnet50_nvjpeg_bench.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,20 +89,17 @@ BENCHMARK_DEFINE_F(RealRN50, nvjpegPipe)(benchmark::State& st) { // NOLINT

// Run once to allocate the memory
Workspace ws;
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

while (st.KeepRunning()) {
if (st.iterations() == 1 && pipelined) {
// We will start he processing for the next batch
// immediately after issueing work to the gpu to
// pipeline the cpu/copy/gpu work
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
}
pipe.RunCPU();
pipe.RunGPU();
pipe.Run();
pipe.Outputs(&ws);

if (st.iterations() == st.max_iterations && pipelined) {
Expand Down
34 changes: 22 additions & 12 deletions dali/c_api/c_api.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -251,6 +251,7 @@ daliCreatePipeline2(daliPipelineHandle *pipe_handle, const char *serialized_pipe
bool se = separated_execution != 0;
bool pe = pipelined_execution != 0;
bool ae = async_execution != 0;

auto pipeline =
std::make_unique<dali::Pipeline>(std::string(serialized_pipeline, length), max_batch_size,
num_threads, device_id, pe, prefetch_queue_depth, ae);
Expand Down Expand Up @@ -283,26 +284,36 @@ int daliGetMaxBatchSize(daliPipelineHandle_t pipe_handle) {
return (*pipe_handle)->pipeline->max_batch_size();
}

int daliInputFeedCount(daliPipelineHandle_t pipe_handle, const char *input_name) {
auto &pipeline = (*pipe_handle)->pipeline;
return pipeline->InputFeedCount(input_name);
}

void daliPrefetch(daliPipelineHandle_t pipe_handle) {
auto &pipeline = (*pipe_handle)->pipeline;
pipeline->Prefetch();
}

void daliPrefetchUniform(daliPipelineHandle_t pipe_handle, int queue_depth) {
auto &pipeline = (*pipe_handle)->pipeline;
for (int i = 0; i < queue_depth; ++i) {
pipeline->RunCPU();
pipeline->RunGPU();
auto sz = pipeline->GetQueueSizes();
if (queue_depth != sz.cpu_size || queue_depth != sz.gpu_size) {
DALI_WARN("daliPrefetchUniform is deprecated and setting queue_length different than"
" the one set in the pipeline has no effect. Use daliPrefetch instead.");
}
pipeline->Prefetch();
}


void daliPrefetchSeparate(daliPipelineHandle_t pipe_handle,
int cpu_queue_depth, int gpu_queue_depth) {
auto &pipeline = (*pipe_handle)->pipeline;
for (int i = 0; i < gpu_queue_depth; ++i) {
pipeline->RunCPU();
pipeline->RunGPU();
}
for (int i = 0; i < cpu_queue_depth; ++i) {
pipeline->RunCPU();
auto sz = pipeline->GetQueueSizes();
if (cpu_queue_depth != sz.cpu_size || gpu_queue_depth != sz.gpu_size) {
DALI_WARN("daliPrefetchSeparate is deprecated and setting queue_length different than"
" the one set in the pipeline has no effect. Use daliPrefetch instead.");
}
pipeline->Prefetch();
}


Expand Down Expand Up @@ -402,8 +413,7 @@ dali_data_type_t daliGetExternalInputType(daliPipelineHandle_t pipe_handle, cons

void daliRun(daliPipelineHandle_t pipe_handle) {
dali::Pipeline *pipeline = (*pipe_handle)->pipeline.get();
pipeline->RunCPU();
pipeline->RunGPU();
pipeline->Run();
}


Expand Down
Loading
Loading