Skip to content

Commit

Permalink
Build CL by default and include it in the release package.
Browse files Browse the repository at this point in the history
Stub out unimplemented interfaces.
  • Loading branch information
sakridge committed Dec 25, 2019
1 parent 539a67a commit 943f51a
Show file tree
Hide file tree
Showing 13 changed files with 139 additions and 46 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ install:
mkdir -p $(DESTDIR)
ifneq ($(OS),Darwin)
cp -f src/$(V)/libcuda-crypt.so $(DESTDIR)
cp -f src/$(V)/libcl-crypt.so $(DESTDIR)
endif
ls -lh $(DESTDIR)

Expand Down
56 changes: 38 additions & 18 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,33 @@ LIB=cuda-crypt
CL_ECC_TEST_BIN=cl_ed25519_verify
CL_LIB=cl-crypt

CHACHA_IF_DIR:=crypt-if

CL_HEADER_DIR:=opencl-platform

CUDA_HEADER_DIR:=cuda-headers
CUDA_SHA256_DIR:=cuda-sha256

CUDA_DIR ?= /usr/local/cuda

CXX ?= g++
CFLAGS+=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
#use -DUSE_RDTSC for Windows compilation
CL_CFLAGS=-fPIC -std=c++11 -DENDIAN_NEUTRAL -DOPENCL_VARIANT -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR) -I$(CL_HEADER_DIR) -Icommon/

CUDA_PATH ?= /usr/local/cuda-9.1
CFLAGS_COMMON:=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CHACHA_IF_DIR)
CFLAGS+=$(CFLAGS_COMMON) -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)

all: $V/$(CHACHA_TEST_BIN) $V/$(ECC_TEST_BIN) $(V)/lib$(LIB).so
#use -DUSE_RDTSC for Windows compilation
CL_CFLAGS_common:=-fPIC -std=c++11 $(CFLAGS_COMMON) -DOPENCL_VARIANT \
-I$(CL_HEADER_DIR) -Icommon/ \
-I$(CUDA_DIR)/targets/x86_64-linux/include $(HOST_CFLAGS)
CL_CFLAGS_release:=$(CL_CFLAGS_common) -O3
CL_CFLAGS_debug:=$(CL_CFLAGS_common) -O0 -g
CL_CFLAGS:=$(CL_CFLAGS_$V)

all: $(V)/$(CHACHA_TEST_BIN) \
$(V)/$(ECC_TEST_BIN) \
$(V)/$(CL_ECC_TEST_BIN) \
$(V)/lib$(LIB).so \
$(V)/lib$(CL_LIB).so

ECC_DIR:=cuda-ecc-ed25519

Expand All @@ -45,7 +59,7 @@ $V/seed.o: $(SEED_SRCS)
@mkdir -p $(@D)
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@

SIGN_SRCS:=$(addprefix $(ECC_DIR)/,sign.cu sha512.h ge.h sc.h fe.cu ../$(CUDA_HEADER_DIR)/gpu_common.h ed25519.h)
SIGN_SRCS:=$(addprefix $(ECC_DIR)/,sign.cu sha512.h ge.h sc.h fe.cu ../common/gpu_common.h ed25519.h)
$V/sign.o: $(SIGN_SRCS)
@mkdir -p $(@D)
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
Expand All @@ -60,6 +74,18 @@ $V/gpu_ctx.o: $(addprefix $(ECC_DIR)/,gpu_ctx.cu gpu_ctx.h)
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@

CL_ECC_DIR:=opencl-ecc-ed25519
CL_CRYPT_DIR:=opencl-crypt
CL_POH_VERIFY_DIR:=opencl-poh-verify

CL_POH_VERIFY_SRCS:=$(CL_POH_VERIFY_DIR)/cl_poh_verify.cpp
$V/cl_poh_verify.o: $(CL_POH_VERIFY_SRCS)
@mkdir -p $(@D)
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@

CL_CHACHA_SRCS:=$(CL_CRYPT_DIR)/cl_chacha.cpp
$V/cl_chacha.o: $(CL_CHACHA_SRCS)
@mkdir -p $(@D)
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@

CL_SIGN_SRCS:=$(CL_ECC_DIR)/sign.cpp $(ECC_DIR)/fe.cu $(ECC_DIR)/ed25519.h
$V/cl_sign.o: $(CL_SIGN_SRCS)
Expand All @@ -76,7 +102,7 @@ $V/cl_gpu_ctx.o: $(addprefix $(CL_ECC_DIR)/,gpu_ctx.cpp gpu_ctx.h)
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@

CHACHA_DIR:=cuda-crypt
CHACHA_SRCS:=$(addprefix $(CHACHA_DIR)/,chacha_cbc.cu chacha.h common.cu)
CHACHA_SRCS:=$(addprefix $(CHACHA_DIR)/,chacha_cbc.cu common.cu) $(CHACHA_IF_DIR)/chacha.h

$V/chacha_cbc.o: $(CHACHA_SRCS)
@mkdir -p $(@D)
Expand All @@ -99,23 +125,17 @@ $V/poh_verify.o: $(POH_SRCS)
@mkdir -p $(@D)
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@

CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o)
CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o cl_chacha.o cl_poh_verify.o)

$V/cl_crypt-dlink.o: $(CL_CPU_GPU_OBJS)
ar rvs $@ $^

$V/lib$(CL_LIB).a: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
ar rcs $@ $^

$V/lib$(CL_LIB).so: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
$(CXX) -shared --shared $^ -o $@
$V/lib$(CL_LIB).so: $(CL_CPU_GPU_OBJS)
$(CXX) -shared $^ -lOpenCL -o $@

$V/cl_ecc_main.o: $(CL_ECC_DIR)/main.cpp $(ECC_DIR)/ed25519.h
@mkdir -p $(@D)
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
$(CXX) $(CL_CFLAGS) -pthread -I$(ECC_DIR) -c $< -o $@

$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).so
$(CXX) $(CL_CFLAGS) -Wl,-v -L$(CUDA_PATH)/lib64 -L$V -lpthread $^ -lOpenCL -o $@
$(CXX) $(CL_CFLAGS) -L$(CUDA_DIR)/lib64 -L$V -pthread $< -l$(CL_LIB) -lOpenCL -o $@

CPU_GPU_OBJS=$(addprefix $V/,chacha_cbc.o aes_cbc.o verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o)

Expand Down
File renamed without changes.
7 changes: 4 additions & 3 deletions src/cuda-crypt/chacha.h → src/crypt-if/chacha.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ extern "C" {
#define CHACHA_ROUNDS 500
#define SAMPLE_SIZE 32

void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
const uint8_t key[CHACHA_KEY_SIZE], const uint8_t nonce[CHACHA_NONCE_SIZE],
uint32_t counter);
void chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
const uint8_t key[CHACHA_KEY_SIZE],
const uint8_t nonce[CHACHA_NONCE_SIZE],
uint32_t counter);

void cuda_chacha20_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
const uint8_t key[CHACHA_KEY_SIZE], uint8_t* ivec);
Expand Down
17 changes: 12 additions & 5 deletions src/cuda-crypt/chacha20_core.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ static void __host__ __device__ chacha20_encrypt(const u32 input[16],
}
}

void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
const uint8_t key[CHACHA_KEY_SIZE],
const uint8_t nonce[CHACHA_NONCE_SIZE],
uint32_t counter)
void __host__ __device__
chacha20_ctr_encrypt_device(const uint8_t *in, uint8_t *out, size_t in_len,
const uint8_t key[CHACHA_KEY_SIZE],
const uint8_t nonce[CHACHA_NONCE_SIZE],
uint32_t counter)
{
uint32_t input[16];
uint8_t buf[64];
Expand Down Expand Up @@ -105,4 +106,10 @@ void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, s
}
}


void chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
const uint8_t key[CHACHA_KEY_SIZE],
const uint8_t nonce[CHACHA_NONCE_SIZE],
uint32_t counter)
{
chacha20_ctr_encrypt_device(in, out, in_len, key, nonce, counter);
}
2 changes: 1 addition & 1 deletion src/cuda-crypt/chacha_cbc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ __global__ void chacha_ctr_encrypt_kernel(const unsigned char* input, unsigned c
size_t i = (size_t)(blockIdx.x * blockDim.x + threadIdx.x);

if (i < num_keys) {
chacha20_ctr_encrypt(input, &output[i * length], length, &keys[i * CHACHA_KEY_SIZE], &nonces[i * CHACHA_NONCE_SIZE], 0);
chacha20_ctr_encrypt_device(input, &output[i * length], length, &keys[i * CHACHA_KEY_SIZE], &nonces[i * CHACHA_NONCE_SIZE], 0);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/gpu-common.mk
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
NVCC:=nvcc
GPU_PTX_ARCH:=compute_35
GPU_ARCHS?=sm_37,sm_50,sm_61,sm_70
HOST_CFLAGS:=-Wall -Werror -fPIC -Wno-strict-aliasing
GPU_CFLAGS:=--gpu-code=$(GPU_ARCHS),$(GPU_PTX_ARCH) --gpu-architecture=$(GPU_PTX_ARCH)
CFLAGS_release:=-Icommon --ptxas-options=-v $(GPU_CFLAGS) -O3 -Xcompiler "-Wall -Werror -fPIC -Wno-strict-aliasing"
CFLAGS_release:=-Icommon --ptxas-options=-v $(GPU_CFLAGS) -O3 -Xcompiler "$(HOST_CFLAGS)"
CFLAGS_debug:=$(CFLAGS_release) -g
CFLAGS:=$(CFLAGS_$V)
34 changes: 34 additions & 0 deletions src/opencl-crypt/cl_chacha.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

#include "chacha.h"

void chacha_cbc_encrypt_many_sample(const uint8_t* in,
void* sha_state_arg,
size_t length,
const uint8_t* keys,
uint8_t* ivecs,
uint32_t num_keys,
const uint64_t* samples,
uint32_t num_samples,
uint64_t starting_block_offset,
float* time_us)
{
fprintf(stderr, "chacha_cbc_encrypt_many_sample not implemented\n");
exit(1);
}

void chacha_end_sha_state(const void* sha_state, uint8_t* out, uint32_t num_keys)
{
fprintf(stderr, "chacha_end_sha_state not implemented\n");
exit(1);
}

void chacha_init_sha_state(void* sha_state, uint32_t num_keys)
{
fprintf(stderr, "chacha_init_sha_state not implemented\n");
exit(1);
}


2 changes: 2 additions & 0 deletions src/opencl-ecc-ed25519/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

extern void ed25519_free_gpu_mem();

bool g_verbose = false;

typedef struct {
size_t size;
uint64_t num_retransmits;
Expand Down
2 changes: 1 addition & 1 deletion src/opencl-ecc-ed25519/sign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void ed25519_sign_many(const gpu_Elems* elems,

size_t num_threads_per_block = 64;
size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block;
LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block);
LOG("signing blocks: %zu threads_per_block: %zu\n", num_blocks, num_threads_per_block);

/*
__kernel void ed25519_sign_kernel(__global unsigned char* packets,
Expand Down
14 changes: 12 additions & 2 deletions src/opencl-ecc-ed25519/verify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ void ed25519_verify_many(const gpu_Elems* elems,
num_elems, total_signatures, total_packets, message_size);

size_t out_size = total_signatures * sizeof(uint8_t);
size_t offsets_size = total_signatures * sizeof(uint32_t);

uint32_t total_packets_size = total_packets * message_size;

Expand Down Expand Up @@ -157,7 +156,7 @@ void ed25519_verify_many(const gpu_Elems* elems,

size_t num_threads_per_block = 64;
size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block;
LOG("num_blocks: %d threads_per_block: %d keys: %d out: %p\n",
LOG("num_blocks: %zu threads_per_block: %zu keys: %d out: %p\n",
num_blocks, num_threads_per_block, (int)total_packets, out);

CL_ERR( clSetKernelArg(ed25519_verify_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->packets) );
Expand Down Expand Up @@ -192,3 +191,14 @@ const char* ed25519_license() {
"Licensed under the Apache License, Version 2.0 "
"<http://www.apache.org/licenses/LICENSE-2.0>";
}

// Supported by the cuda lib, so stub them here.
int cuda_host_register(void* ptr, size_t size, unsigned int flags)
{
return 0;
}

int cuda_host_unregister(void* ptr)
{
return 0;
}
30 changes: 15 additions & 15 deletions src/opencl-platform/cl_init_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ bool cl_check_init(cl_uint sel_device_type) {
*/
bool cl_check_init(void) {

if(cl_is_init == true) {
if (cl_is_init == true) {
return true;
} else {
cout << "OpenCL platform query & init..." << endl;
Expand All @@ -212,11 +212,10 @@ bool cl_check_init(void) {
string kernel_src;

cl_device_id device;
cl_platform_id platform;
cl_uint platform_num = 0;
cl_platform_id* platform_list = NULL;

cl_uint device_num = 0;
cl_uint num_devices = 0;
cl_device_id* device_list = NULL;

size_t attr_size = 0;
Expand All @@ -232,8 +231,7 @@ bool cl_check_init(void) {
cout << "Platforms found: " << platform_num << endl;

/* list all platforms and VENDOR/VERSION properties */
for(int platf=0; platf<platform_num; platf++)
{
for (cl_uint platf = 0; platf < platform_num; platf++) {
/* get attribute CL_PLATFORM_VENDOR */
CL_ERR( clGetPlatformInfo(platform_list[platf],
CL_PLATFORM_VENDOR, 0, NULL, &attr_size));
Expand All @@ -248,33 +246,36 @@ bool cl_check_init(void) {

/* get attribute size CL_PLATFORM_VERSION */
CL_ERR( clGetPlatformInfo(platform_list[platf],
CL_PLATFORM_VERSION, 0, NULL, &attr_size));
CL_PLATFORM_VERSION,
0, NULL, &attr_size));
attr_data = new char[attr_size];
DIE(attr_data == NULL, "alloc attr_data");

/* get data size CL_PLATFORM_VERSION */
CL_ERR( clGetPlatformInfo(platform_list[platf],
CL_PLATFORM_VERSION, attr_size, attr_data, NULL));
CL_PLATFORM_VERSION,
attr_size, attr_data, NULL));
cout << attr_data << endl;
delete[] attr_data;

/* get num of available OpenCL devices type ALL on the selected platform */
if(clGetDeviceIDs(platform_list[platf],
query_device_type, 0, NULL, &device_num) != CL_SUCCESS) {
device_num = 0;
if (clGetDeviceIDs(platform_list[platf],
query_device_type, 0,
NULL, &num_devices) != CL_SUCCESS) {
num_devices = 0;
continue;
}

device_list = new cl_device_id[device_num];
device_list = new cl_device_id[num_devices];
DIE(device_list == NULL, "alloc devices");

/* get all available OpenCL devices type ALL on the selected platform */
CL_ERR( clGetDeviceIDs(platform_list[platf], query_device_type,
device_num, device_list, NULL));
cout << "\tDevices found " << device_num << endl;
num_devices, device_list, NULL));
cout << "\tDevices found " << num_devices << endl;

/* list all devices and TYPE/VERSION properties */
for(int dev=0; dev<device_num; dev++)
for(cl_uint dev=0; dev < num_devices; dev++)
{
/* get attribute size */
CL_ERR( clGetDeviceInfo(device_list[dev], CL_DEVICE_NAME,
Expand All @@ -291,7 +292,6 @@ bool cl_check_init(void) {
string tmpAttrData = attr_data;

// always select last device of type GPU
platform = platform_list[platf];
device = device_list[dev];

delete[] attr_data;
Expand Down
17 changes: 17 additions & 0 deletions src/opencl-poh-verify/cl_poh_verify.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>

extern "C" {
int poh_verify_many(uint8_t* hashes,
const uint64_t* num_hashes_arr,
size_t num_elems,
uint8_t use_non_default_stream)
{
fprintf(stderr, "poh_verify_many not implemented.");
exit(1);
return 0;
}
}

0 comments on commit 943f51a

Please sign in to comment.