diff --git a/definitions/categories/020/cat020_ref_1.3.json b/definitions/categories/020/cat020_ref_1.3.json index 795162e..f5c0744 100644 --- a/definitions/categories/020/cat020_ref_1.3.json +++ b/definitions/categories/020/cat020_ref_1.3.json @@ -12,7 +12,7 @@ "GVV", "GVA", "TRT", - "TA", + "DA", "-", "-", "-" diff --git a/src/asterix/asterixparser.cpp b/src/asterix/asterixparser.cpp index 2ee4f27..eed2e6c 100644 --- a/src/asterix/asterixparser.cpp +++ b/src/asterix/asterixparser.cpp @@ -315,7 +315,8 @@ std::pair ASTERIXParser::decodeDataBlock (const char* data, nloh if (debug) loginf << "ASTERIXParser: decodeDataBlock: index " << data_block_index << " length " << data_block_length - << " data '" << binary2hex((const unsigned char*)&data[data_block_index], data_block_length) << "'" << logendl; + << " data '" << binary2hex((const unsigned char*)&data[data_block_index], data_block_length) << "'" + << logendl; // try to decode if (records_.count(cat) != 0) @@ -335,7 +336,8 @@ std::pair ASTERIXParser::decodeDataBlock (const char* data, nloh // create records until end of content while (data_block_parsed_bytes < data_block_length) { - //loginf << "asterix parser decoding record " << cnt << " parsed bytes " << parsed_bytes_record << " length " << record_length; + //loginf << "asterix parser decoding record " << cnt << " parsed bytes " << parsed_bytes_record + // << " length " << record_length; record_parsed_bytes = records_.at(cat)->parseItem( data, data_block_index+data_block_parsed_bytes, data_block_length-data_block_parsed_bytes, @@ -361,7 +363,10 @@ std::pair ASTERIXParser::decodeDataBlock (const char* data, nloh (const unsigned char*)&data[data_block_index+data_block_parsed_bytes], record_parsed_bytes); - data_block_parsed_bytes += record_parsed_bytes ; + data_block_content.at("records")[ret.first]["index"] = data_block_index+data_block_parsed_bytes; + data_block_content.at("records")[ret.first]["length"] = record_parsed_bytes; + + data_block_parsed_bytes += record_parsed_bytes; ++ret.first; } diff --git a/src/asterix/record.cpp b/src/asterix/record.cpp index c441f9c..82cba9e 100644 --- a/src/asterix/record.cpp +++ b/src/asterix/record.cpp @@ -259,75 +259,80 @@ size_t Record::parseItem (const char* data, size_t index, size_t size, size_t cu } } - if (special_purpose_field_present) + if (reserved_expansion_field_present) { size_t re_bytes = static_cast (data[index+parsed_bytes]); + parsed_bytes += 1; // read 1 len byte + re_bytes -= 1; // includes 1 len byte - if (spf_) // decode ref + if (ref_) // decode ref { - if (debug) - loginf << "record '"+name_+"' has special purpose field, reading " << re_bytes << " bytes " + loginf << "record '"+name_+"' has reserved expansion field, reading " << re_bytes << " bytes " << logendl; assert (re_bytes > 1); - size_t ref_bytes = spf_->parseItem(data, index+parsed_bytes+1, re_bytes-1, 0, target["SPF"], debug); + size_t ref_bytes = ref_->parseItem(data, index+parsed_bytes, re_bytes, 0, target["REF"], debug); if (debug) - loginf << "record '"+name_+"' parsed special purpose field, read " << ref_bytes << " ref in " + loginf << "record '"+name_+"' parsed reserved expansion field, read " << ref_bytes << " ref in " << re_bytes << " bytes " << logendl; - if (ref_bytes != re_bytes-1) - throw runtime_error ("record item '"+name_+"' special purpose field definition only read " - + to_string(ref_bytes) + " bytes of specified "+to_string(ref_bytes-1)); + if (ref_bytes != re_bytes) + throw runtime_error ("record item '"+name_+"' reserved expansion field definition only read " + + to_string(ref_bytes) + " bytes of specified "+to_string(re_bytes)); - //loginf << "UGA SPF '" << target["SPF"].dump(4) << "'" << logendl; + //loginf << "UGA REF '" << target["REF"].dump(4) << "'" << logendl; } else { if (debug) - loginf << "record '"+name_+"' has special purpose field, reading " << re_bytes << " bytes " + loginf << "record '"+name_+"' has reserved expansion field, reading " << re_bytes << " bytes " << logendl; - target["SPF"] = binary2hex((const unsigned char*)&data[index+parsed_bytes], re_bytes); - + target["REF"] = binary2hex((const unsigned char*)&data[index+parsed_bytes], re_bytes); } parsed_bytes += re_bytes; } - if (reserved_expansion_field_present) + if (special_purpose_field_present) { size_t re_bytes = static_cast (data[index+parsed_bytes]); - if (ref_) // decode ref + parsed_bytes += 1; // read 1 len byte + re_bytes -= 1; // includes 1 len byte + + if (spf_) // decode ref { + if (debug) - loginf << "record '"+name_+"' has reserved expansion field, reading " << re_bytes << " bytes " + loginf << "record '"+name_+"' has special purpose field, reading " << re_bytes << " bytes " << logendl; assert (re_bytes > 1); - size_t ref_bytes = ref_->parseItem(data, index+parsed_bytes+1, re_bytes-1, 0, target["REF"], debug); + size_t ref_bytes = spf_->parseItem(data, index+parsed_bytes, re_bytes, 0, target["SPF"], debug); if (debug) - loginf << "record '"+name_+"' parsed reserved expansion field, read " << ref_bytes << " ref in " + loginf << "record '"+name_+"' parsed special purpose field, read " << ref_bytes << " ref in " << re_bytes << " bytes " << logendl; - if (ref_bytes != re_bytes-1) - throw runtime_error ("record item '"+name_+"' reserved expansion field definition only read " - + to_string(ref_bytes) + " bytes of specified "+to_string(ref_bytes-1)); + if (ref_bytes != re_bytes) + throw runtime_error ("record item '"+name_+"' special purpose field definition only read " + + to_string(ref_bytes) + " bytes of specified "+to_string(re_bytes)); - //loginf << "UGA REF '" << target["REF"].dump(4) << "'" << logendl; + //loginf << "UGA SPF '" << target["SPF"].dump(4) << "'" << logendl; } else { if (debug) - loginf << "record '"+name_+"' has reserved expansion field, reading " << re_bytes << " bytes " + loginf << "record '"+name_+"' has special purpose field, reading " << re_bytes << " bytes " << logendl; - target["REF"] = binary2hex((const unsigned char*)&data[index+parsed_bytes], re_bytes); + target["SPF"] = binary2hex((const unsigned char*)&data[index+parsed_bytes], re_bytes); + } parsed_bytes += re_bytes; diff --git a/src/client/main.cpp b/src/client/main.cpp index c54185a..2618d1f 100644 --- a/src/client/main.cpp +++ b/src/client/main.cpp @@ -20,6 +20,7 @@ #include "logger.h" #include "jsonwriter.h" #include "jasterix/global.h" +#include "string_conv.h" #if USE_OPENSSL #include "utils/hashchecker.h" @@ -48,7 +49,6 @@ namespace po = boost::program_options; #endif using namespace std; -//using namespace jASTERIX; extern jASTERIX::JSONWriter* json_writer; @@ -114,7 +114,8 @@ int main (int argc, char **argv) ("single_thread", po::bool_switch(&jASTERIX::single_thread), "process data in single thread") #if USE_OPENSSL ("add_artas_md5", po::bool_switch(&jASTERIX::add_artas_md5_hash), "add ARTAS MD5 hashes") - ("check_artas_md5", po::bool_switch(&check_artas_md5_hash), "add and check ARTAS MD5 hashes") + ("check_artas_md5", po::value(&check_artas_md5_hash), + "add and check ARTAS MD5 hashes (with record data), stating which categories to check, e.g. 1,20,21,48") #endif ("add_record_data", po::bool_switch(&jASTERIX::add_record_data), "add original record data in hex") ("print", po::bool_switch(&print), "print JSON output") @@ -160,7 +161,7 @@ int main (int argc, char **argv) loginf << "single_thread: process data in single thread" << logendl; #if USE_OPENSSL loginf << "add_artas_md5: add ARTAS MD5 hashes" << logendl; - loginf << "check_artas_md5: add and check ARTAS MD5 hashes" << logendl; + loginf << "add and check ARTAS MD5 hashes (with record data), stating which categories to check, e.g. 1,20,21,48" << logendl; #endif loginf << "add_record_data: add original record data in hex" << logendl; loginf << "print: print JSON output" << logendl; @@ -203,7 +204,7 @@ int main (int argc, char **argv) jASTERIX::add_artas_md5_hash = true; if (find(arguments.begin(), arguments.end(), "--check_artas_md5") != arguments.end()) - check_artas_md5_hash = true; + check_artas_md5_hash = *(find(arguments.begin(), arguments.end(), "--check_artas_md5")+1); #endif if (find(arguments.begin(), arguments.end(), "--add_record_data") != arguments.end()) jASTERIX::add_record_data = true; @@ -223,9 +224,10 @@ int main (int argc, char **argv) #endif #if USE_OPENSSL - if (check_artas_md5_hash) + if (check_artas_md5_hash.size()) { jASTERIX::add_artas_md5_hash = true; + jASTERIX::add_record_data = true; if (write_type.size()) { @@ -233,6 +235,26 @@ int main (int argc, char **argv) return -1; } + std::vector cat_strs; + split(check_artas_md5_hash, ',', cat_strs); + + int cat; + for (auto& cat_str : cat_strs) + { + cat = std::atoi(cat_str.c_str()); + if (cat < 1 || cat > 255) + { + logerr << "jASTERIX client: impossible artas md5 checking cat value '" << cat_str << "'" << logendl; + return -1; + } + check_artas_md5_categories.push_back(cat); + } + if (!check_artas_md5_categories.size()) + { + logerr << "jASTERIX client: no valid artas md5 checking cat values given" << logendl; + return -1; + } + hash_checker = new HashChecker(framing.size()); // true if framing set } #endif @@ -278,11 +300,11 @@ int main (int argc, char **argv) asterix.decodeFile (filename, write_callback); else // printing done via flag #if USE_OPENSSL - if (check_artas_md5_hash) + if (check_artas_md5_hash.size()) asterix.decodeFile (filename, check_callback); - else { + else asterix.decodeFile (filename, empty_callback); - } + #else asterix.decodeFile (filename, empty_callback); #endif @@ -294,17 +316,22 @@ int main (int argc, char **argv) else // printing done via flag { #if USE_OPENSSL - if (check_artas_md5_hash) + if (check_artas_md5_hash.size()) asterix.decodeFile (filename, framing, check_callback); - else { + else asterix.decodeFile (filename, framing, empty_callback); - } + #else asterix.decodeFile (filename, framing, empty_callback); #endif } } +#if USE_OPENSSL + if (hash_checker) + hash_checker->printCollisions(); +#endif + size_t num_frames = asterix.numFrames(); size_t num_records = asterix.numRecords(); @@ -361,8 +388,6 @@ int main (int argc, char **argv) #if USE_OPENSSL if (hash_checker) { - hash_checker->printCollisions(); - delete hash_checker; hash_checker = nullptr; } diff --git a/src/utils/hashchecker.cpp b/src/utils/hashchecker.cpp index c19c7ab..d672a19 100644 --- a/src/utils/hashchecker.cpp +++ b/src/utils/hashchecker.cpp @@ -2,7 +2,10 @@ #include "jasterix.h" #include "logger.h" -bool check_artas_md5_hash {false}; +#include + +std::string check_artas_md5_hash; +std::vector check_artas_md5_categories; HashChecker* hash_checker {nullptr}; using namespace nlohmann; @@ -18,7 +21,7 @@ void check_callback (std::unique_ptr data_chunk, size_t num_fram HashChecker::HashChecker(bool framing_used) : framing_used_(framing_used) { - record_data_present_ = jASTERIX::add_record_data; + assert(jASTERIX::add_record_data); } void HashChecker::process (std::unique_ptr data) @@ -26,8 +29,8 @@ void HashChecker::process (std::unique_ptr data) //loginf << "UGA '" << data->dump(4) << "'" << logendl; unsigned int category; - unsigned int index; - unsigned int length; + // unsigned int index; + // unsigned int length; if (!framing_used_) { @@ -37,18 +40,22 @@ void HashChecker::process (std::unique_ptr data) { category = data_block.at("category"); + if (std::find(check_artas_md5_categories.begin(), check_artas_md5_categories.end(), category) + == check_artas_md5_categories.end()) + continue; + if (!data_block.contains("content")) // data blocks with errors continue; json& data_block_content = data_block.at("content"); - index = data_block_content.at("index"); - length = data_block_content.at("length"); + // index = data_block_content.at("index"); + // length = data_block_content.at("length"); if (data_block_content.contains("records")) { for (json& record : data_block_content.at("records")) - processRecord (category, index, length, record); + processRecord (category, record); } } } @@ -76,71 +83,155 @@ void HashChecker::process (std::unique_ptr data) category = data_block.at("category"); + if (std::find(check_artas_md5_categories.begin(), check_artas_md5_categories.end(), category) + == check_artas_md5_categories.end()) + continue; + if (!data_block.contains("content")) // data block with errors continue; json& data_block_content = data_block.at("content"); - index = data_block_content.at("index"); - length = data_block_content.at("length"); + // index = data_block_content.at("index"); + // length = data_block_content.at("length"); if (data_block_content.contains("records")) { for (json& record : data_block_content.at("records")) - processRecord (category, index, length, record); + processRecord (category, record); } } } } } -void HashChecker::processRecord (unsigned int category, unsigned int index, unsigned int length, nlohmann::json& record) +void HashChecker::processRecord (unsigned int category, nlohmann::json& record) { -// loginf << "UGA cat " << category << " index " << index << " length " << length -// << " '" << record.dump(4) << "'" << logendl; + // loginf << "UGA cat " << category << " index " << index << " length " << length + // << " '" << record.dump(4) << "'" << logendl; assert (record.contains("artas_md5")); + assert (record.contains("index")); + assert (record.contains("length")); + assert (record.contains("record_data")); - if (record_data_present_) - hash_map_[record.at("artas_md5")].emplace_back(category, index, length, record.at("record_data")); - else { - hash_map_[record.at("artas_md5")].emplace_back(category, index, length, ""); - } + hash_map_[record.at("artas_md5")].emplace_back(category, record.at("index"), record.at("length"), + record.at("record_data")); -// "artas_md5": "5073ed0f", -// "record_data": "ff02620f221f645b9c0fff08bcc175c06ddf82" + // "artas_md5": "5073ed0f", + // "record_data": "ff02620f221f645b9c0fff08bcc175c06ddf82" } void HashChecker::printCollisions () { unsigned int record_cnt {0}; - unsigned int collision_cnt {0}; + unsigned int same_data_collision_cnt {0}; + unsigned int different_data_collision_cnt {0}; + + std::stringstream ss_same_data_indexes; + std::map same_data_collisions_per_cat; + std::map, unsigned int> diffent_data_collisions_per_cat; for (auto& hash_it : hash_map_) { if (hash_it.second.size() > 1) { - loginf << "found " << hash_it.second.size() << " collision in hash '" << hash_it.first << "':" << logendl; + loginf << "found " << hash_it.second.size() << " collisions in hash '" << hash_it.first << "':" << logendl; + + // find same data collisions + std::map, unsigned int> data_counts; // (cat, record data) -> cnt + std::pair cat_and_recdata; + + std::vector cleaned_hash_map; // removed same cat & data occurences + ss_same_data_indexes.str(""); for (auto& rec_it : hash_it.second) { - if (record_data_present_) - loginf << "\t cat " << rec_it.category_ << " data block index " << rec_it.index_ << " length " - << rec_it.length_ << " data '" << rec_it.record_data_ << "'" << logendl; - else - loginf << "\t cat " << rec_it.category_ << " data block index " << rec_it.index_ << " length " - << rec_it.length_ << logendl; - ++collision_cnt; // one for each + cat_and_recdata = std::make_pair(rec_it.category_, rec_it.record_data_); + if (data_counts.count(cat_and_recdata)) // same data found, just increase counter + { + data_counts[cat_and_recdata]++; + ss_same_data_indexes << "," << rec_it.index_; + } + else // new data found, add to cleaned map and initialize counter + { + cleaned_hash_map.push_back(rec_it); + data_counts[cat_and_recdata] = 1; + ss_same_data_indexes << rec_it.index_; + } + } + + for (auto& cnt_it : data_counts) + { + if (cnt_it.second > 1) + { + loginf << "\t " << cnt_it.second << " same data collisions in cat " << cnt_it.first.first + << " data '" << cnt_it.first.second << "' at indexes " << ss_same_data_indexes.str() + << logendl; + same_data_collision_cnt += cnt_it.second; + same_data_collisions_per_cat[cnt_it.first.first] += cnt_it.second; + } + } + + if (cleaned_hash_map.size() > 1) + { + loginf << "\t found different collisions:" << logendl; + +// for (auto& rec_it : cleaned_hash_map) // hash_it.second +// { +// loginf << "\t cat " << rec_it.category_ << " data '" << rec_it.record_data_ +// << "' index " << rec_it.index_ << logendl; +// // << " length " << rec_it.length_ +// ++different_data_collision_cnt; // one for each +// } + + for (auto rec_it=cleaned_hash_map.begin(); rec_it!=cleaned_hash_map.end(); ++rec_it) // hash_it.second + { + loginf << "\t cat " << rec_it->category_ << " data '" << rec_it->record_data_ + << "' index " << rec_it->index_ << logendl; + // << " length " << rec_it.length_ + ++different_data_collision_cnt; // one for each + + // iterate over rest of collisions and mark each + for (auto rest_rec_it = rec_it+1; rest_rec_it != cleaned_hash_map.end(); ++rest_rec_it) + { + unsigned int min_cat = std::min(rec_it->category_, rest_rec_it->category_); + unsigned int max_cat = std::max(rec_it->category_, rest_rec_it->category_); + diffent_data_collisions_per_cat [std::make_pair(min_cat, max_cat)] += 1; + } + } } } record_cnt += hash_it.second.size(); } - if (record_cnt) - loginf << "found " << collision_cnt << " collisions in " << record_cnt << " records (" - << 100.0*collision_cnt/record_cnt << "%)" << logendl; - else - loginf << "found " << collision_cnt << " collisions in " << record_cnt << " records" << logendl; + if (!record_cnt) + { + loginf << "no data found" << logendl; + return; + } + + loginf << logendl; + loginf << "found " << same_data_collision_cnt << " same data collisions in " << record_cnt << " records (" + << 100.0*same_data_collision_cnt/record_cnt << "%)" << logendl; + if (same_data_collision_cnt) + { + loginf << "same data collisions per category: " << logendl; + for (auto& cat_it : same_data_collisions_per_cat) + loginf << "\t cat " << cat_it.first << " count " << cat_it.second << " (" + << 100.0*cat_it.second/record_cnt << "%)"<< logendl; + loginf << logendl; + } + + loginf << "found " << different_data_collision_cnt << " different data collisions in " << record_cnt + << " records (" << 100.0*different_data_collision_cnt/record_cnt << "%)" << logendl; + if (different_data_collision_cnt) + { + loginf << "different data collisions per category: " << logendl; + for (auto& cat_it : diffent_data_collisions_per_cat) + loginf << "\t cat <" << cat_it.first.first << "," << cat_it.first.second << "> count " << cat_it.second + << " (" << 100.0*cat_it.second/record_cnt << "%)"<< logendl; + } } diff --git a/src/utils/hashchecker.h b/src/utils/hashchecker.h index 2317d26..1b2ab65 100644 --- a/src/utils/hashchecker.h +++ b/src/utils/hashchecker.h @@ -26,15 +26,15 @@ class HashChecker private: bool framing_used_{false}; - bool record_data_present_ {false}; std::map > hash_map_; // hash -> [RecordInfo] // data block index, length - void processRecord (unsigned int category, unsigned int index, unsigned int length, nlohmann::json& record); + void processRecord (unsigned int category, nlohmann::json& record); }; -extern bool check_artas_md5_hash; +extern std::string check_artas_md5_hash; +extern std::vector check_artas_md5_categories; extern HashChecker* hash_checker; extern void check_callback (std::unique_ptr data_chunk, size_t num_frames, size_t num_records,