Skip to content

Commit

Permalink
fix tests on macos
Browse files Browse the repository at this point in the history
  • Loading branch information
meenchen committed May 23, 2023
1 parent e2a7d2c commit c92a22d
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 160 deletions.
62 changes: 32 additions & 30 deletions experimental/transformer/src/OPTTokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

/*std::vector<int> OPT_tokenize(const OPT_vocab & vocab, const std::string & text, bool add_bos) {
std::vector<int> res(text.size() + (int) add_bos);
return res;
return res;
}*/

/*
Expand Down Expand Up @@ -44,21 +44,20 @@ std::map<std::string, int> PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
};
*/


/*
* Tokenizer
/*
* Tokenizer
*/
Encoder::Encoder(std::map<std::string, int> encoder, std::vector<std::pair<std::string, std::string>> bpe_merges) {
this->encoder = encoder;
for(auto &it: encoder) {
for (auto &it : encoder) {
this->decoder[it.second] = it.first;
}
this->byte_encoder = bytes_to_unicode();
for(auto &it: byte_encoder) {
for (auto &it : byte_encoder) {
this->byte_decoder[it.second] = it.first;
}
for(int i = 0; i < bpe_merges.size(); ++i) {
this->bpe_ranks.insert(std::make_pair(bpe_merges[i], i));
for (int i = 0; i < bpe_merges.size(); ++i) {
this->bpe_ranks.insert(std::make_pair(bpe_merges[i], i));
}
}

Expand Down Expand Up @@ -93,7 +92,7 @@ Encoder::std::vector<std::pair<int, unsigned char>> bytes_to_unicode() {

std::unordered_map<int, std::string> Encoder::bytes_to_unicode() {
std::unordered_map<int, std::string> byte_to_unicode;

// Range from '!' to '~'
for (int b = '!'; b <= '~'; ++b) {
byte_to_unicode[b] = std::string(1, static_cast<char>(b));
Expand Down Expand Up @@ -143,7 +142,7 @@ std::unordered_map<int, std::string> Encoder::bytes_to_unicode() {
byte_to_unicode[0xA3] = u8"\u0156"; // Ŗ
byte_to_unicode[0xA4] = u8"\u00A4"; // Currency symbol
byte_to_unicode[0xA5] = u8"\u0128"; // Ĩ

return byte_to_unicode;
}

Expand All @@ -159,43 +158,42 @@ std::set<std::pair<std::string, std::string>> Encoder::get_pairs(std::vector<std
}

std::string Encoder::bpe(std::string token) {
auto it = this->cache.find(token); // Find the token in the cache
if (it != this->cache.end()) { // If the token is in the cache
auto it = this->cache.find(token); // Find the token in the cache
if (it != this->cache.end()) { // If the token is in the cache
return it->second;
}

std::vector<std::string> word; // word = tuple(token)
std::vector<std::string> word; // word = tuple(token)
for (char c : token) {
word.push_back(std::string(1, c));
}

std::set<std::pair<std::string, std::string>> pairs = get_pairs(word);

if(pairs.empty())
return token;
if (pairs.empty()) return token;

while(true) {
while (true) {
std::pair<std::string, std::string> bigram;
int min_index = std::numeric_limits<int>::max(); // Start with the highest possible int value
int min_index = std::numeric_limits<int>::max(); // Start with the highest possible int value

for (const auto &pair: pairs) {
auto it = this->bpe_ranks.find(pair); // Find the pair in the map
if (it != this->bpe_ranks.end()) { // If the pair is in the map
if (it->second < min_index) { // If the current pair's value is less than the min_index
for (const auto &pair : pairs) {
auto it = this->bpe_ranks.find(pair); // Find the pair in the map
if (it != this->bpe_ranks.end()) { // If the pair is in the map
if (it->second < min_index) { // If the current pair's value is less than the min_index
min_index = it->second;
bigram = pair;
}
}
}

if (min_index == std::numeric_limits<int>::max()) // No pair was found in bpe_ranks
if (min_index == std::numeric_limits<int>::max()) // No pair was found in bpe_ranks
break;

std::string first = bigram.first;
std::string second = bigram.second;
std::vector<std::string> new_word;
int i = 0;
while(i < word.size()) {
while (i < word.size()) {
auto it = std::find(word.begin() + i, word.end(), first);
if (it == word.end()) {
new_word.insert(new_word.end(), word.begin() + i, word.end());
Expand All @@ -208,8 +206,7 @@ std::string Encoder::bpe(std::string token) {
if (word[i] == first && i < word.size() - 1 && word[i + 1] == second) {
new_word.push_back(first + second);
i += 2;
}
else {
} else {
new_word.push_back(word[i]);
i += 1;
}
Expand All @@ -232,7 +229,11 @@ std::vector<int> Encoder::encode(std::string text) {
std::vector<int> bpe_tokens;

// Using Regex to tokenize
std::regex pat = std::regex("'s|'t|'re|'ve|'m|'ll|'d| ?[a-zA-Z]+| ?[0-9]+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+");
// MACOS does not support p{L}\\p{N}, we may need different regex lib
// std::regex pat = std::regex("'s|'t|'re|'ve|'m|'ll|'d| ?[a-zA-Z]+| ?[0-9]+|
// ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+");
std::regex pat = std::regex("'s|'t|'re|'ve|'m|'ll|'d| ?[a-zA-Z]+| ?[0-9]+| ?[^\\s]+|\\s+(?!\\S)|\\s+");

std::sregex_iterator iter(text.begin(), text.end(), pat);
std::sregex_iterator end;

Expand All @@ -241,7 +242,7 @@ std::vector<int> Encoder::encode(std::string text) {
std::string encoded_token;

for (char b : token) {
for (auto &it: this->byte_encoder) {
for (auto &it : this->byte_encoder) {
if (it.first == int(static_cast<int>(b))) {
encoded_token += it.second;
break;
Expand All @@ -265,8 +266,7 @@ std::string Encoder::decode(std::vector<int> tokens) {
if (int(this->decoder[token][0]) < '!' || int(this->decoder[token][0]) > '~') {
text += " ";
i_flag = 2;
}
else {
} else {
text += std::string(1, this->decoder[token][0]);
}

Expand Down Expand Up @@ -296,7 +296,9 @@ Encoder get_encoder(std::string vocab_file, std::string bpe_file) {
while (std::getline(infile, line)) {
std::istringstream iss(line);
std::string a, b;
if (!(iss >> a >> b)) { break; } // error
if (!(iss >> a >> b)) {
break;
} // error
bpe_merges.push_back({a, b});
}

Expand Down
12 changes: 9 additions & 3 deletions experimental/transformer/src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
#include <stdio.h>

#include <cassert>
#include <cerrno> // for errno
#include <cmath>
#include <cstdlib>
#include <cstring> // for strerror
#include <iostream>

template <typename T>
void read_to_array(const char* path, T* array, int size) {
std::ifstream infile(path, std::ios::binary | std::ios::in);
assert(infile);
infile.read(reinterpret_cast<char*>(array), size * sizeof(T));
infile.close();
if (infile.fail()) {
std::cout << "Failed to open file: " << strerror(errno) << std::endl;
throw("Expected error...");
} else {
infile.read(reinterpret_cast<char*>(array), size * sizeof(T));
infile.close();
}
}

struct max_error_info {
Expand Down
136 changes: 9 additions & 127 deletions experimental/transformer/tests/test_Int8OPTDecoderLayer.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include "Int8OPTDecoderLayer.h"
#include "Int8OPTDecoder.h"
#include "operators.h"
#include "utils.h"

#define MAX_TEST_MEMORY_BUF 1024 * 1024 * 1024 // 1 GB
#define MAX_TEST_MEMORY_BUF 1 * 1024 * 1024 * 1576 // 1.5 GB
static char buffer[MAX_TEST_MEMORY_BUF];

class MemoryAllocator {
Expand Down Expand Up @@ -232,66 +232,7 @@ void test_DecoderLayer_generate_cache() {
const int sqlen = 1, b = 1, past_len = 108, head_dim = embed_dim / num_heads;
MemoryAllocator mem_buf;

struct BMM_S8T_S8N_F32T_params qk_bmm;
struct BMM_S8T_S8N_S8T_params pv_bmm;
struct W8A8B8O8Linear_params k_proj, v_proj, q_proj;
Matrix3D<int8_t> k_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<int8_t> k_proj_bias(mem_buf.get_int8buffer(embed_dim), 1, 1, embed_dim);
k_proj.weight = k_proj_weight;
k_proj.bias = k_proj_bias;
auto k_proj_op = W8A8B8O8Linear(k_proj);
Matrix3D<int8_t> v_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<int8_t> v_proj_bias(mem_buf.get_int8buffer(embed_dim), 1, 1, embed_dim);
v_proj.weight = v_proj_weight;
v_proj.bias = v_proj_bias;
auto v_proj_op = W8A8B8O8Linear(v_proj);
Matrix3D<int8_t> q_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<int8_t> q_proj_bias(mem_buf.get_int8buffer(embed_dim), 1, 1, embed_dim);
q_proj.weight = q_proj_weight;
q_proj.bias = q_proj_bias;
auto q_proj_op = W8A8B8O8Linear(q_proj);

struct W8A8BFP32OFP32Linear_params out_proj;
Matrix3D<int8_t> out_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<float> out_proj_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
out_proj.weight = out_proj_weight;
out_proj.bias = out_proj_bias;
auto out_proj_op = W8A8BFP32OFP32Linear(out_proj);

struct LayerNormQ_params self_attn_layer_norm, final_layer_norm;
Matrix3D<float> self_attn_layer_norm_weight(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
Matrix3D<float> self_attn_layer_norm_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
self_attn_layer_norm.weight = self_attn_layer_norm_weight;
self_attn_layer_norm.bias = self_attn_layer_norm_bias;

Matrix3D<float> final_layer_norm_weight(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
Matrix3D<float> final_layer_norm_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
final_layer_norm.weight = final_layer_norm_weight;
final_layer_norm.bias = final_layer_norm_bias;
LayerNormQ self_attn_layer_norm_op = LayerNormQ(self_attn_layer_norm);
LayerNormQ final_layer_norm_op = LayerNormQ(final_layer_norm);

struct W8A8B8O8LinearReLU_params fc1;
Matrix3D<int8_t> fc1_weight(mem_buf.get_int8buffer(embed_dim * hidden_dim), 1, hidden_dim, embed_dim);
Matrix3D<int8_t> fc1_bias(mem_buf.get_int8buffer(hidden_dim), 1, 1, hidden_dim);
fc1.weight = fc1_weight;
fc1.bias_int8 = fc1_bias;
auto fc1_op = W8A8B8O8LinearReLU(fc1);

struct W8A8BFP32OFP32Linear_params fc2;
Matrix3D<int8_t> fc2_weight(mem_buf.get_int8buffer(embed_dim * hidden_dim), 1, embed_dim, hidden_dim);
Matrix3D<float> fc2_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
fc2.weight = fc2_weight;
fc2.bias = fc2_bias;
auto fc2_op = W8A8BFP32OFP32Linear(fc2);

auto qk_bmm_op = BMM_S8T_S8N_F32T(qk_bmm);
auto pv_bmm_op = BMM_S8T_S8N_S8T(pv_bmm);

int layer_idx = 0;
Int8OPTDecoderLayer layer = Int8OPTDecoderLayer(
"models/OPT_125m/decoder/layer0", get_opt_model_config(OPT_125M), layer_idx, self_attn_layer_norm_op,
final_layer_norm_op, fc1_op, fc2_op, qk_bmm_op, pv_bmm_op, k_proj_op, v_proj_op, q_proj_op, out_proj_op);
Int8OPTDecoder decoder = Int8OPTDecoder("models/OPT_125m/decoder/", get_opt_model_config(OPT_125M));

int tgz = sqlen + past_len;
Matrix3D<float> hidden_states(mem_buf.get_fpbuffer(b * sqlen * embed_dim), b, sqlen, embed_dim);
Expand All @@ -305,12 +246,12 @@ void test_DecoderLayer_generate_cache() {

struct Int8OPTDecoderLayer_input input = {hidden_states, attention_mask, past_keys, past_value};

struct Int8OPTDecoderLayer_output output = layer.forward(input);
struct Int8OPTDecoderLayer_output output = decoder.layers[0].forward(input);

Matrix3D<float> residualGT(mem_buf.get_fpbuffer(b * sqlen * embed_dim), b, sqlen, embed_dim);
read_to_array("assets/tests/OPT_125m/test_cache_residual.bin", residualGT.m_data, b * sqlen * embed_dim);
// print_first_k_elelment("output.hidden_states.m_data", output.hidden_states.m_data, 64);
// print_first_k_elelment("residualGT.m_data", residualGT.m_data, 64);
// // print_first_k_elelment("output.hidden_states.m_data", output.hidden_states.m_data, 64);
// // print_first_k_elelment("residualGT.m_data", residualGT.m_data, 64);
int8_t *key_statesGT = mem_buf.get_int8buffer(output.past_key_value.first.length());
read_to_array("assets/tests/OPT_125m/test_present_key.bin", key_statesGT, output.past_key_value.first.length());
int8_t *value_statesGT = mem_buf.get_int8buffer(output.past_key_value.second.length());
Expand All @@ -334,66 +275,7 @@ void test_DecoderLayer_generate_cache_1_3B() {
const int sqlen = 1, b = 1, past_len = 108, head_dim = embed_dim / num_heads;
MemoryAllocator mem_buf;

struct BMM_S8T_S8N_F32T_params qk_bmm;
struct BMM_S8T_S8N_S8T_params pv_bmm;
struct W8A8B8O8Linear_params k_proj, v_proj, q_proj;
Matrix3D<int8_t> k_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<int8_t> k_proj_bias(mem_buf.get_int8buffer(embed_dim), 1, 1, embed_dim);
k_proj.weight = k_proj_weight;
k_proj.bias = k_proj_bias;
auto k_proj_op = W8A8B8O8Linear(k_proj);
Matrix3D<int8_t> v_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<int8_t> v_proj_bias(mem_buf.get_int8buffer(embed_dim), 1, 1, embed_dim);
v_proj.weight = v_proj_weight;
v_proj.bias = v_proj_bias;
auto v_proj_op = W8A8B8O8Linear(v_proj);
Matrix3D<int8_t> q_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<int8_t> q_proj_bias(mem_buf.get_int8buffer(embed_dim), 1, 1, embed_dim);
q_proj.weight = q_proj_weight;
q_proj.bias = q_proj_bias;
auto q_proj_op = W8A8B8O8Linear(q_proj);

struct W8A8BFP32OFP32Linear_params out_proj;
Matrix3D<int8_t> out_proj_weight(mem_buf.get_int8buffer(embed_dim * embed_dim), 1, embed_dim, embed_dim);
Matrix3D<float> out_proj_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
out_proj.weight = out_proj_weight;
out_proj.bias = out_proj_bias;
auto out_proj_op = W8A8BFP32OFP32Linear(out_proj);

struct LayerNormQ_params self_attn_layer_norm, final_layer_norm;
Matrix3D<float> self_attn_layer_norm_weight(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
Matrix3D<float> self_attn_layer_norm_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
self_attn_layer_norm.weight = self_attn_layer_norm_weight;
self_attn_layer_norm.bias = self_attn_layer_norm_bias;

Matrix3D<float> final_layer_norm_weight(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
Matrix3D<float> final_layer_norm_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
final_layer_norm.weight = final_layer_norm_weight;
final_layer_norm.bias = final_layer_norm_bias;
LayerNormQ self_attn_layer_norm_op = LayerNormQ(self_attn_layer_norm);
LayerNormQ final_layer_norm_op = LayerNormQ(final_layer_norm);

struct W8A8B8O8LinearReLU_params fc1;
Matrix3D<int8_t> fc1_weight(mem_buf.get_int8buffer(embed_dim * hidden_dim), 1, hidden_dim, embed_dim);
Matrix3D<int8_t> fc1_bias(mem_buf.get_int8buffer(hidden_dim), 1, 1, hidden_dim);
fc1.weight = fc1_weight;
fc1.bias_int8 = fc1_bias;
auto fc1_op = W8A8B8O8LinearReLU(fc1);

struct W8A8BFP32OFP32Linear_params fc2;
Matrix3D<int8_t> fc2_weight(mem_buf.get_int8buffer(embed_dim * hidden_dim), 1, embed_dim, hidden_dim);
Matrix3D<float> fc2_bias(mem_buf.get_fpbuffer(embed_dim), 1, 1, embed_dim);
fc2.weight = fc2_weight;
fc2.bias = fc2_bias;
auto fc2_op = W8A8BFP32OFP32Linear(fc2);

auto qk_bmm_op = BMM_S8T_S8N_F32T(qk_bmm);
auto pv_bmm_op = BMM_S8T_S8N_S8T(pv_bmm);

int layer_idx = 0;
Int8OPTDecoderLayer layer = Int8OPTDecoderLayer(
"models/OPT_1.3B/decoder/layer0", get_opt_model_config(OPT_1_3B), layer_idx, self_attn_layer_norm_op,
final_layer_norm_op, fc1_op, fc2_op, qk_bmm_op, pv_bmm_op, k_proj_op, v_proj_op, q_proj_op, out_proj_op);
Int8OPTDecoder decoder = Int8OPTDecoder("models/OPT_1.3B/decoder/", get_opt_model_config(OPT_1_3B));

int tgz = sqlen + past_len;
Matrix3D<float> hidden_states(mem_buf.get_fpbuffer(b * sqlen * embed_dim), b, sqlen, embed_dim);
Expand All @@ -407,7 +289,7 @@ void test_DecoderLayer_generate_cache_1_3B() {

struct Int8OPTDecoderLayer_input input = {hidden_states, attention_mask, past_keys, past_value};

struct Int8OPTDecoderLayer_output output = layer.forward(input);
struct Int8OPTDecoderLayer_output output = decoder.layers[0].forward(input);

Matrix3D<float> residualGT(mem_buf.get_fpbuffer(b * sqlen * embed_dim), b, sqlen, embed_dim);
read_to_array("assets/tests/OPT_1.3B/test_cache_residual.bin", residualGT.m_data, b * sqlen * embed_dim);
Expand Down Expand Up @@ -526,9 +408,9 @@ void test_DecoderLayer() {
}

int main() {
test_DecoderLayer();
test_DecoderLayer_generate();
test_DecoderLayer_generate_1_3B();
test_DecoderLayer_generate_cache();
test_DecoderLayer_generate_cache_1_3B();
test_DecoderLayer();
}

0 comments on commit c92a22d

Please sign in to comment.