Skip to content

Commit

Permalink
demo
Browse files Browse the repository at this point in the history
  • Loading branch information
meenchen committed May 23, 2023
1 parent db54093 commit 0c9d325
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 11 deletions.
9 changes: 6 additions & 3 deletions experimental/transformer/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ CXX = g++
CXXFLAGS = -std=c++17 -mavx2 -pthread -O3

# Executable and source files
TARGET = test_ops test_Int8OPTAttention test_Int8OPTDecoderLayer test_Int8OPTDecoder test_OPTForCausalLM profile_OPTForCausalLM test_OPTTokenizer test_OPTGenerate
TARGET = test_ops test_Int8OPTAttention test_Int8OPTDecoderLayer test_Int8OPTDecoder test_OPTForCausalLM profile_OPTForCausalLM test_OPTTokenizer test_OPTGenerate demo

LIB_DIR = ../matmul_optimization/src
LIB_SRC = $(wildcard $(LIB_DIR)/lib/*.cc)
Expand Down Expand Up @@ -40,10 +40,13 @@ profile_OPTForCausalLM:
$(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -D PROFILER -o profile_OPTForCausalLM tests/test_OPTForCausalLM.cc $(SRC)

test_OPTTokenizer:
$(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -D PROFILER -o test_OPTTokenizer tests/test_OPTTokenizer.cc $(SRC)
$(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o test_OPTTokenizer tests/test_OPTTokenizer.cc $(SRC)

test_OPTGenerate:
$(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -D PROFILER -o test_OPTGenerate tests/test_OPTGenerate.cc $(SRC)
$(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o test_OPTGenerate tests/test_OPTGenerate.cc $(SRC)

demo:
$(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -o demo application/demo.cc $(SRC)

# Clean up
clean:
Expand Down
63 changes: 63 additions & 0 deletions experimental/transformer/application/demo.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <iostream>
#include <map>

#include "OPTGenerate.h"

std::map<std::string, int> model_config = {
{"OPT125M", OPT_125M},
{"OPT1.3B", OPT_1_3B},
{"OPT6.7B", OPT_6_7B},
};

std::map<int, std::string> model_path = {
{OPT_125M, "models/OPT_125m"},
{OPT_1_3B, "models/OPT_1.3B"},
{OPT_6_7B, "models/OPT_6.7B"},
};

int main(int argc, char* argv[]) {
std::string target_model = "OPT1.3B";

if (argc > 1) {
auto target_str = argv[1];
if (model_config.count(target_model) == 0) {
std::cerr << "Model config:" << target_str << " unsupported" << std::endl;
std::cerr << "Please select one of the following:";
for (const auto& k : model_config) {
std::cerr << k.first << ", ";
}
std::cerr << std::endl;
throw("Unsupported model\n");
}
std::cout << "Model: " << argv[1] << " selected" << std::endl;
target_model = argv[1];
} else {
std::cout << "Using default model: " + target_model << std::endl;
}

// Load model
std::cout << "Loading model... " << std::flush;
int model_id = model_config[target_model];
std::string m_path = model_path[model_id];
OPTForCausalLM model = OPTForCausalLM(m_path, get_opt_model_config(model_id));
std::cout << "Finished!" << std::endl;

// Load encoder
std::string vocab_file = "./models/OPT_125m/vocab.json";
std::string bpe_file = "./models/OPT_125m/merges.txt";
Encoder encoder = get_encoder(vocab_file, bpe_file);

// Get input from the user
std::cout << "Please enter a line of text: ";
std::string input;
std::getline(std::cin, input);
std::vector<int> input_ids = encoder.encode(input);
std::string decoded = encoder.decode(input_ids);
std::cout << "input:" << decoded << std::endl;

struct opt_params generation_config;
generation_config.n_predict = 256;
std::vector<int> generated_ids = OPTGenerate(model, input_ids, generation_config, &encoder, true);

decoded = encoder.decode(generated_ids);
};
6 changes: 2 additions & 4 deletions experimental/transformer/include/OPTGenerate.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,5 @@ void OPT_sample_typical(OPT_token_data_array* candidates, float p, size_t min_ke
void OPT_sample_top_p(OPT_token_data_array* candidates, float p, size_t min_keep);

std::vector<int> OPTGenerate(OPTForCausalLM model, std::vector<int> input_ids,
const struct opt_params generation_config);

void OPTGenerate_interactive(OPTForCausalLM model, std::vector<int> input_ids,
const struct opt_params generation_config, Encoder encoder);
const struct opt_params generation_config, Encoder* encoder = NULL,
bool interactive = false);
10 changes: 7 additions & 3 deletions experimental/transformer/include/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,27 @@ class Profiler {
counts[section]++;
}

void report() const {
#ifdef PROFILER
void report_internal() const {
std::cout << "Section, Total time(us), Average time(us), Count, GOPs" << std::endl;
for (const auto& entry : durations) {
std::string row;
row += entry.first + ", ";
row += std::to_string(entry.second) + ", ";
row += std::to_string(entry.second / counts.at(entry.first)) + ", ";
if (flops.count(entry.first) == 0)
row += std::to_string(counts.at(entry.first));
row += std::to_string(counts.at(entry.first)) + ", N/A, N/A";
else {
row += std::to_string(counts.at(entry.first)) + ", ";
// ops and microsecond
row += std::to_string((((float)flops.at(entry.first)) / (float)(entry.second)) / 1000.0);
}
std::cout << row << std::endl;
}
}

void report() const {
#ifdef PROFILER
report_internal();
#endif
}

Expand Down
4 changes: 4 additions & 0 deletions experimental/transformer/include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

#include "profiler.h"

#define STATS_START(x) Profiler::getInstance().start(x)
#define STATS_FLOPS(x, y) Profiler::getInstance().start(x, y)
#define STATS_END(x) Profiler::getInstance().stop(x)

#ifdef PROFILER
#define PROFILE_START(x) Profiler::getInstance().start(x)
#define PROFILE_START_FLOPS(x, y) Profiler::getInstance().start(x, y)
Expand Down
15 changes: 14 additions & 1 deletion experimental/transformer/src/OPTGenerate.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "OPTGenerate.h"

#include "common.h"
#include "util.h"

void OPT_sample_repetition_penalty(OPT_token_data_array* candidates, const int* last_tokens, size_t last_tokens_size,
float penalty) {
Expand Down Expand Up @@ -320,7 +321,7 @@ void OPT_sample_top_p(OPT_token_data_array* candidates, float p, size_t min_keep

// OPTGenerate function
std::vector<int> OPTGenerate(OPTForCausalLM model, std::vector<int> input_ids,
const struct opt_params generation_config) {
const struct opt_params generation_config, Encoder* encoder, bool interactive) {
std::vector<int> last_n_tokens(generation_config.n_ctx);
std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
std::vector<int> embd;
Expand All @@ -338,10 +339,14 @@ std::vector<int> OPTGenerate(OPTForCausalLM model, std::vector<int> input_ids,
}
}

if (encoder == NULL) interactive = false;
if (interactive) std::cout << "Generated: " << std::endl;

bool has_past_kv = false;
std::vector<Matrix3D<int8_t>> past_keys, past_values;
int n_remain = generation_config.n_predict;
while (n_remain != 0) {
STATS_START("Token generation");
std::vector<float> logits(generation_config.n_vocab);

int sqlen = 1;
Expand Down Expand Up @@ -433,8 +438,16 @@ std::vector<int> OPTGenerate(OPTForCausalLM model, std::vector<int> input_ids,
generate_ids.push_back(id);
input_ids = std::vector<int>{id};

if (interactive) std::cout << encoder->decode(input_ids) << std::flush;

--n_remain;
STATS_END("Token generation");
}

if (interactive) std::cout << std::endl;

Profiler::getInstance().report_internal();
Profiler::getInstance().reset();

return generate_ids;
}

0 comments on commit 0c9d325

Please sign in to comment.