diff --git a/.gitignore b/.gitignore index bceedb8d6..5096ee10c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .DS_Store .vscode/* src/*.o +src/xlcpp/*.o src/*.so src/*.dll src-i386/*.dll diff --git a/DESCRIPTION b/DESCRIPTION index befc45c3d..9aab09023 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,3 +43,5 @@ Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 Config/testthat/parallel: false Config/testthat/start-first: aaa +SystemRequirements: C++20 + diff --git a/R/RcppExports.R b/R/RcppExports.R index 3ab1ad0b4..212d4424d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,6 +1,10 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +read_encryption <- function(PATH, OUT, PASSWORD) { + invisible(.Call(`_openxlsx2_read_encryption`, PATH, OUT, PASSWORD)) +} + #' Check if path is to long to be an R file path #' @param path the file path used in file.exists() #' @noRd diff --git a/R/wb_load.R b/R/wb_load.R index 29d9a5309..b2fe24734 100644 --- a/R/wb_load.R +++ b/R/wb_load.R @@ -40,6 +40,7 @@ wb_load <- function( ... ) { + password <- NULL standardize_case_names(...) file <- xlsx_file %||% file @@ -49,6 +50,14 @@ wb_load <- function( stop("File does not exist.") } + pwd <- list(...)$password + + if (!is.null(pwd)) { + unencrypted_xlsx <- temp_xlsx() + read_encryption(PATH = path.expand(file), OUT = unencrypted_xlsx, PASSWORD = pwd) + file <- unencrypted_xlsx + } + ## create temp dir xmlDir <- temp_dir("_openxlsx_wb_load") diff --git a/src/Makevars b/src/Makevars index ee4055f80..1056930a6 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1 +1,34 @@ -PKG_CPPFLAGS = -I. -I../inst/include/pugixml \ No newline at end of file + +PKG_CXXFLAGS = -I. -Ixlcpp -I../inst/include/pugixml -DXLCPP_EXPORT + +PKG_LIBS = -Lpugixml -Lxlcpp + +PKGROOT = ./xlcpp + +OBJECTS = decrypt.o \ + helper_functions.o \ + load_workbook.o \ + pugi.o \ + strings_xml.o \ + styles_xml.o \ + write_file.o \ + $(PKGROOT)/aes.o \ + $(PKGROOT)/b64.o \ + $(PKGROOT)/cfbf.o \ + $(PKGROOT)/sha1.o \ + $(PKGROOT)/sha512.o \ + $(PKGROOT)/xlcpp.o \ + $(PKGROOT)/xml-reader.o \ + RcppExports.o + +XLCPP = $(PKGROOT)/aes.cpp \ + $(PKGROOT)/b64.cpp \ + $(PKGROOT)/cfbf.cpp \ + $(PKGROOT)/sha1.cpp \ + $(PKGROOT)/sha512.cpp \ + $(PKGROOT)/xml-reader.cpp \ + $(PKGROOT)/xlcpp.cpp + +PUGIXML = ../inst/include/pugixml/pugixml.cpp + +# CXX_STD=CXX20 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 22ac1e9d4..fefe5b0ec 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -11,6 +11,18 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif +// read_encryption +void read_encryption(std::string PATH, std::string OUT, std::string PASSWORD); +RcppExport SEXP _openxlsx2_read_encryption(SEXP PATHSEXP, SEXP OUTSEXP, SEXP PASSWORDSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::string >::type PATH(PATHSEXP); + Rcpp::traits::input_parameter< std::string >::type OUT(OUTSEXP); + Rcpp::traits::input_parameter< std::string >::type PASSWORD(PASSWORDSEXP); + read_encryption(PATH, OUT, PASSWORD); + return R_NilValue; +END_RCPP +} // to_long bool to_long(std::string path); RcppExport SEXP _openxlsx2_to_long(SEXP pathSEXP) { @@ -842,6 +854,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { + {"_openxlsx2_read_encryption", (DL_FUNC) &_openxlsx2_read_encryption, 3}, {"_openxlsx2_to_long", (DL_FUNC) &_openxlsx2_to_long, 1}, {"_openxlsx2_openxlsx2_type", (DL_FUNC) &_openxlsx2_openxlsx2_type, 1}, {"_openxlsx2_int_to_col", (DL_FUNC) &_openxlsx2_int_to_col, 1}, diff --git a/src/decrypt.cpp b/src/decrypt.cpp new file mode 100644 index 000000000..6063624e9 --- /dev/null +++ b/src/decrypt.cpp @@ -0,0 +1,11 @@ +#include + +// [[Rcpp::export]] +void read_encryption(std::string PATH, std::string OUT, std::string PASSWORD) { + + + xlcpp::workbook wb(PATH, PASSWORD, OUT); + + // this creates xlcpp workbook, not the unzipped xlsx file + // wb.save(OUT); +} diff --git a/src/xlcpp/aes.cpp b/src/xlcpp/aes.cpp new file mode 100644 index 000000000..683b2c397 --- /dev/null +++ b/src/xlcpp/aes.cpp @@ -0,0 +1,466 @@ +/* + +This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode. +Block size can be chosen in aes.h - available choices are AES128, AES192, AES256. + +The implementation is verified against the test vectors in: + National Institute of Standards and Technology Special Publication 800-38A 2001 ED + +ECB-AES128 +---------- + + plain-text: + 6bc1bee22e409f96e93d7e117393172a + ae2d8a571e03ac9c9eb76fac45af8e51 + 30c81c46a35ce411e5fbc1191a0a52ef + f69f2445df4f9b17ad2b417be66c3710 + + key: + 2b7e151628aed2a6abf7158809cf4f3c + + resulting cipher + 3ad77bb40d7a3660a89ecaf32466ef97 + f5d3d58503b9699de785895a96fdbaaf + 43b1cd7f598ece23881b00e3ed030688 + 7b0c785e27e8ad3f8223207104725dd4 + + +NOTE: String length must be evenly divisible by 16byte (str_len % 16 == 0) + You should pad the end of the string with zeros if this is not the case. + For AES192/256 the key size is proportionally larger. + +*/ + +/*****************************************************************************/ +/* Includes: */ +/*****************************************************************************/ +#include "aes.h" +#include + +/*****************************************************************************/ +/* Defines: */ +/*****************************************************************************/ +// The number of columns comprising a state in AES. This is a constant in AES. Value=4 +#define Nb 4 + +#define Nk_128 4 // The number of 32 bit words in a key. +#define Nr_128 10 // The number of rounds in AES Cipher. + +#define Nk_256 8 +#define Nr_256 14 + +/*****************************************************************************/ +/* Private variables: */ +/*****************************************************************************/ +// state - array holding the intermediate results during decryption. +typedef uint8_t state_t[4][4]; + +// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM +// The numbers below can be computed dynamically trading ROM for RAM - +// This can be useful in (embedded) bootloader applications, where ROM is often limited. +static const uint8_t sbox[256] = { + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; + +static const uint8_t rsbox[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }; + +// The round constant word array, Rcon[i], contains the values given by +// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) +static const uint8_t Rcon[11] = { + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; + +// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. +template +static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key) { + uint8_t tempa[4]; // Used for the column/row operations + + // The first round key is the key itself. + for (unsigned int i = 0; i < Nk; i++) { + RoundKey[i * 4] = Key[i * 4]; + RoundKey[(i * 4) + 1] = Key[(i * 4) + 1]; + RoundKey[(i * 4) + 2] = Key[(i * 4) + 2]; + RoundKey[(i * 4) + 3] = Key[(i * 4) + 3]; + } + + // All other round keys are found from the previous round keys. + for (unsigned int i = Nk; i < Nb * (Nr + 1); i++) { + tempa[0] = RoundKey[(i * 4) - 4]; + tempa[1] = RoundKey[(i * 4) - 3]; + tempa[2] = RoundKey[(i * 4) - 2]; + tempa[3] = RoundKey[(i * 4) - 1]; + + if (i % Nk == 0) { + // This function shifts the 4 bytes in a word to the left once. + // [a0,a1,a2,a3] becomes [a1,a2,a3,a0] + + // Function RotWord() + { + const uint8_t u8tmp = tempa[0]; + tempa[0] = tempa[1]; + tempa[1] = tempa[2]; + tempa[2] = tempa[3]; + tempa[3] = u8tmp; + } + + // SubWord() is a function that takes a four-byte input word and + // applies the S-box to each of the four bytes to produce an output word. + + // Function Subword() + { + tempa[0] = sbox[tempa[0]]; + tempa[1] = sbox[tempa[1]]; + tempa[2] = sbox[tempa[2]]; + tempa[3] = sbox[tempa[3]]; + } + + tempa[0] = tempa[0] ^ Rcon[i/Nk]; + } + + if constexpr (Nk == Nk_256) { + if (i % Nk == 4) { + // Function Subword() + tempa[0] = sbox[tempa[0]]; + tempa[1] = sbox[tempa[1]]; + tempa[2] = sbox[tempa[2]]; + tempa[3] = sbox[tempa[3]]; + } + } + + RoundKey[i * 4] = RoundKey[(i - Nk) * 4] ^ tempa[0]; + RoundKey[(i * 4) + 1] = RoundKey[((i - Nk) * 4) + 1] ^ tempa[1]; + RoundKey[(i * 4) + 2] = RoundKey[((i - Nk) * 4) + 2] ^ tempa[2]; + RoundKey[(i * 4) + 3] = RoundKey[((i - Nk) * 4) + 3] ^ tempa[3]; + } +} + +void AES128_init_ctx(struct AES_ctx* ctx, const uint8_t* key) { + KeyExpansion(ctx->RoundKey, key); +} + +void AES128_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv) { + KeyExpansion(ctx->RoundKey, key); + memcpy(ctx->Iv, iv, AES_BLOCKLEN); +} + +void AES256_init_ctx(struct AES_ctx* ctx, const uint8_t* key) { + KeyExpansion(ctx->RoundKey, key); +} + +void AES256_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv) { + KeyExpansion(ctx->RoundKey, key); + memcpy(ctx->Iv, iv, AES_BLOCKLEN); +} + +// This function adds the round key to state. +// The round key is added to the state by an XOR function. +static void AddRoundKey(uint8_t round, state_t& state, const uint8_t* RoundKey) { + for (uint8_t i = 0; i < 4; i++) { + for (uint8_t j = 0; j < 4; ++j) { + state[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j]; + } + } +} + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +static void SubBytes(state_t& state) { + for (uint8_t i = 0; i < 4; i++) { + for (uint8_t j = 0; j < 4; j++) { + state[j][i] = sbox[state[j][i]]; + } + } +} + +// The ShiftRows() function shifts the rows in the state to the left. +// Each row is shifted with different offset. +// Offset = Row number. So the first row is not shifted. +static void ShiftRows(state_t& state) { + uint8_t temp; + + // Rotate first row 1 columns to left + temp = state[0][1]; + state[0][1] = state[1][1]; + state[1][1] = state[2][1]; + state[2][1] = state[3][1]; + state[3][1] = temp; + + // Rotate second row 2 columns to left + temp = state[0][2]; + state[0][2] = state[2][2]; + state[2][2] = temp; + + temp = state[1][2]; + state[1][2] = state[3][2]; + state[3][2] = temp; + + // Rotate third row 3 columns to left + temp = state[0][3]; + state[0][3] = state[3][3]; + state[3][3] = state[2][3]; + state[2][3] = state[1][3]; + state[1][3] = temp; +} + +static constexpr uint8_t galois_double(uint8_t x) { + return (uint8_t)((x << 1) ^ (((x >> 7) & 1) * 0x1b)); +} + +static_assert(galois_double(0x00) == 0x00); +static_assert(galois_double(0x01) == 0x02); +static_assert(galois_double(0x7f) == 0xfe); +static_assert(galois_double(0x80) == 0x1b); +static_assert(galois_double(0xff) == 0xe5); + +// MixColumns function mixes the columns of the state matrix +static void MixColumns(state_t& state) { + for (uint8_t i = 0; i < 4; i++) { + auto t = state[i][0]; + uint8_t tmp = state[i][0] ^ state[i][1] ^ state[i][2] ^ state[i][3]; + + for (unsigned int j = 0; j < 4; j++) { + uint8_t tm = state[i][j]; + + if (j == 3) + tm ^= t; + else + tm ^= state[i][j+1]; + + tm = galois_double(tm); + state[i][j] ^= tm ^ tmp; + } + } +} + +// Multiply is used to multiply numbers in the field GF(2^8) +static constexpr uint8_t Multiply(uint8_t x, uint8_t y) { + uint8_t ret = (y & 1) * x; + + x = galois_double(x); + + if (y & 2) + ret ^= x; + + x = galois_double(x); + + if (y & 4) + ret ^= x; + + if (y & 8) { + x = galois_double(x); + ret ^= x; + } + + return ret; +} + +// MixColumns function mixes the columns of the state matrix. +// The method used to multiply may be difficult to understand for the inexperienced. +// Please use the references to gain more information. +static void InvMixColumns(state_t& state) { + for (unsigned int i = 0; i < 4; i++) { + auto a = state[i][0]; + auto b = state[i][1]; + auto c = state[i][2]; + auto d = state[i][3]; + + state[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09); + state[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d); + state[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b); + state[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e); + } +} + + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +static void InvSubBytes(state_t& state) { + for (uint8_t i = 0; i < 4; i++) { + for (uint8_t j = 0; j < 4; j++) { + state[j][i] = rsbox[state[j][i]]; + } + } +} + +static void InvShiftRows(state_t& state) { + uint8_t temp; + + // Rotate first row 1 columns to right + temp = state[3][1]; + state[3][1] = state[2][1]; + state[2][1] = state[1][1]; + state[1][1] = state[0][1]; + state[0][1] = temp; + + // Rotate second row 2 columns to right + temp = state[0][2]; + state[0][2] = state[2][2]; + state[2][2] = temp; + + temp = state[1][2]; + state[1][2] = state[3][2]; + state[3][2] = temp; + + // Rotate third row 3 columns to right + temp = state[0][3]; + state[0][3] = state[1][3]; + state[1][3] = state[2][3]; + state[2][3] = state[3][3]; + state[3][3] = temp; +} + +// Cipher is the main function that encrypts the PlainText. +template +static void Cipher(state_t& state, const uint8_t* RoundKey) { + // Add the First round key to the state before starting the rounds. + AddRoundKey(0, state, RoundKey); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr rounds are executed in the loop below. + // Last one without MixColumns() + for (uint8_t round = 1; ; round++) { + SubBytes(state); + ShiftRows(state); + + if (round == Nr) + break; + + MixColumns(state); + AddRoundKey(round, state, RoundKey); + } + + // Add round key to last round + AddRoundKey(Nr, state, RoundKey); +} + +template +static void InvCipher(state_t& state, const uint8_t* RoundKey) { + // Add the First round key to the state before starting the rounds. + AddRoundKey(Nr, state, RoundKey); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr rounds are executed in the loop below. + // Last one without InvMixColumn() + for (uint8_t round = Nr - 1; ; round--) { + InvShiftRows(state); + InvSubBytes(state); + AddRoundKey(round, state, RoundKey); + + if (round == 0) + break; + + InvMixColumns(state); + } +} + +/*****************************************************************************/ +/* Public functions: */ +/*****************************************************************************/ + +static void XorWithIv(uint8_t* buf, const uint8_t* Iv) { + for (uint8_t i = 0; i < AES_BLOCKLEN; i++) { // The block in AES is always 128bit no matter the key size + buf[i] ^= Iv[i]; + } +} + +void AES128_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf) { + // The next function call encrypts the PlainText with the Key using AES algorithm. + Cipher(*(state_t*)buf, ctx->RoundKey); +} + +void AES128_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf) { + // The next function call decrypts the PlainText with the Key using AES algorithm. + InvCipher(*(state_t*)buf, ctx->RoundKey); +} + +void AES128_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length) { + auto Iv = ctx->Iv; + + for (size_t i = 0; i < length; i += AES_BLOCKLEN) { + XorWithIv(buf, Iv); + Cipher(*(state_t*)buf, ctx->RoundKey); + Iv = buf; + buf += AES_BLOCKLEN; + } + /* store Iv in ctx for next call */ + memcpy(ctx->Iv, Iv, AES_BLOCKLEN); +} + +void AES128_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length) { + uint8_t storeNextIv[AES_BLOCKLEN]; + + for (size_t i = 0; i < length; i += AES_BLOCKLEN) { + memcpy(storeNextIv, buf, AES_BLOCKLEN); + InvCipher(*(state_t*)buf, ctx->RoundKey); + XorWithIv(buf, ctx->Iv); + memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN); + buf += AES_BLOCKLEN; + } +} + +void AES256_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf) { + // The next function call encrypts the PlainText with the Key using AES algorithm. + Cipher(*(state_t*)buf, ctx->RoundKey); +} + +void AES256_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf) { + // The next function call decrypts the PlainText with the Key using AES algorithm. + InvCipher(*(state_t*)buf, ctx->RoundKey); +} + +void AES256_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length) { + auto Iv = ctx->Iv; + + for (size_t i = 0; i < length; i += AES_BLOCKLEN) { + XorWithIv(buf, Iv); + Cipher(*(state_t*)buf, ctx->RoundKey); + Iv = buf; + buf += AES_BLOCKLEN; + } + /* store Iv in ctx for next call */ + memcpy(ctx->Iv, Iv, AES_BLOCKLEN); +} + +void AES256_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length) { + uint8_t storeNextIv[AES_BLOCKLEN]; + + for (size_t i = 0; i < length; i += AES_BLOCKLEN) { + memcpy(storeNextIv, buf, AES_BLOCKLEN); + InvCipher(*(state_t*)buf, ctx->RoundKey); + XorWithIv(buf, ctx->Iv); + memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN); + buf += AES_BLOCKLEN; + } +} diff --git a/src/xlcpp/aes.h b/src/xlcpp/aes.h new file mode 100644 index 000000000..bbe1d63c2 --- /dev/null +++ b/src/xlcpp/aes.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#define AES_BLOCKLEN 16 // Block length in bytes - AES is 128b block only +#define AES_keyExpSize 240 + +struct AES_ctx { + uint8_t RoundKey[AES_keyExpSize]; + uint8_t Iv[AES_BLOCKLEN]; +}; + +void AES128_init_ctx(struct AES_ctx* ctx, const uint8_t* key); +void AES128_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv); +void AES128_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf); +void AES128_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf); +void AES128_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length); +void AES128_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length); + +void AES256_init_ctx(struct AES_ctx* ctx, const uint8_t* key); +void AES256_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv); +void AES256_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf); +void AES256_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf); +void AES256_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length); +void AES256_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length); diff --git a/src/xlcpp/b64.cpp b/src/xlcpp/b64.cpp new file mode 100644 index 000000000..35250950a --- /dev/null +++ b/src/xlcpp/b64.cpp @@ -0,0 +1,106 @@ +/* +* Base64 encoding/decoding (RFC1341) +* Copyright (c) 2005-2011, Jouni Malinen +* +* This software may be distributed under the terms of the BSD license. +* See README for more details. +*/ + +// 2016-12-12 - Gaspard Petit : Slightly modified to return a std::string +// instead of a buffer allocated with malloc. + +#include +#include "b64.h" + +static const unsigned char base64_table[65] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** +* base64_encode - Base64 encode +* @src: Data to be encoded +* @len: Length of the data to be encoded +* @out_len: Pointer to output length variable, or %NULL if not used +* Returns: Allocated buffer of out_len bytes of encoded data, +* or empty string on failure +*/ +std::string b64encode(std::string_view sv) { + const unsigned char* src = (const unsigned char*)sv.data(); + size_t len = sv.length(); + unsigned char *out, *pos; + const unsigned char *end, *in; + + size_t olen; + + olen = 4*((len + 2) / 3); /* 3-byte blocks to 4-byte */ + + if (olen < len) + return std::string(); /* integer overflow */ + + std::string outStr; + outStr.resize(olen); + out = (unsigned char*)&outStr[0]; + + end = src + len; + in = src; + pos = out; + while (end - in >= 3) { + *pos++ = base64_table[in[0] >> 2]; + *pos++ = base64_table[((in[0] & 0x03) << 4) | (in[1] >> 4)]; + *pos++ = base64_table[((in[1] & 0x0f) << 2) | (in[2] >> 6)]; + *pos++ = base64_table[in[2] & 0x3f]; + in += 3; + } + + if (end - in) { + *pos++ = base64_table[in[0] >> 2]; + if (end - in == 1) { + *pos++ = base64_table[(in[0] & 0x03) << 4]; + *pos++ = '='; + } + else { + *pos++ = base64_table[((in[0] & 0x03) << 4) | + (in[1] >> 4)]; + *pos++ = base64_table[(in[1] & 0x0f) << 2]; + } + *pos++ = '='; + } + + return outStr; +} + +static const int B64index[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 63, 62, 62, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, + 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 +}; + +std::string b64decode(std::string_view sv) { + auto p = (unsigned char*)sv.data(); + int pad = sv.length() > 0 && (sv.length() % 4 || p[sv.length() - 1] == '='); + const size_t L = ((sv.length() + 3) / 4 - pad) * 4; + std::string str(L / 4 * 3 + pad, '\0'); + + for (size_t i = 0, j = 0; i < L; i += 4) { + int n = B64index[p[i]] << 18 | B64index[p[i + 1]] << 12 | B64index[p[i + 2]] << 6 | B64index[p[i + 3]]; + str[j++] = (char)(n >> 16); + str[j++] = (char)(n >> 8 & 0xFF); + str[j++] = (char)(n & 0xFF); + } + + if (pad) { + int n = B64index[p[L]] << 18 | B64index[p[L + 1]] << 12; + str[str.size() - 1] = (char)(n >> 16); + + if (sv.length() > L + 2 && p[L + 2] != '=') { + n |= B64index[p[L + 2]] << 6; + str.push_back((char)(n >> 8 & 0xFF)); + } + } + + return str; +} diff --git a/src/xlcpp/b64.h b/src/xlcpp/b64.h new file mode 100644 index 000000000..01dfaa9cb --- /dev/null +++ b/src/xlcpp/b64.h @@ -0,0 +1,6 @@ +#pragma once + +#include + +std::string b64encode(std::string_view sv); +std::string b64decode(std::string_view sv); diff --git a/src/xlcpp/cfbf.cpp b/src/xlcpp/cfbf.cpp new file mode 100644 index 000000000..2729c189b --- /dev/null +++ b/src/xlcpp/cfbf.cpp @@ -0,0 +1,846 @@ +#include "openxlsx2.h" + +#include +#include +#include "cfbf.h" +#include "utf16.h" +#include "sha1.h" +#include "sha512.h" +#include "aes.h" +#include "b64.h" +#include "xlcpp-pimpl.h" + +using namespace std; + +static const uint32_t NOSTREAM = 0xffffffff; + +struct structured_storage_header { + uint64_t sig; + uint8_t clsid[16]; + uint16_t minor_version; + uint16_t major_version; + uint16_t byte_order; + uint16_t sector_shift; + uint16_t mini_sector_shift; + uint16_t reserved1; + uint32_t reserved2; + uint32_t num_sect_dir; + uint32_t num_sect_fat; + uint32_t sect_dir_start; + uint32_t transaction_signature; + uint32_t mini_sector_cutoff; + uint32_t mini_fat_start; + uint32_t num_sect_mini_fat; + uint32_t sect_dif_start; + uint32_t num_sect_dif; + uint32_t sect_dif[109]; +}; + +static_assert(sizeof(structured_storage_header) == 0x200); + +enum class obj_type : uint8_t { + STGTY_INVALID = 0, + STGTY_STORAGE = 1, + STGTY_STREAM = 2, + STGTY_LOCKBYTES = 3, + STGTY_PROPERTY = 4, + STGTY_ROOT = 5 +}; + +enum class tree_colour : uint8_t { + red = 0, + black = 1 +}; + +#pragma pack(push,1) + +struct dirent { + char16_t name[32]; + uint16_t name_len; + obj_type type; + tree_colour colour; + uint32_t sid_left_sibling; + uint32_t sid_right_sibling; + uint32_t sid_child; + uint8_t clsid[16]; + uint32_t user_flags; + uint64_t create_time; + uint64_t modify_time; + uint32_t sect_start; + uint64_t size; +}; + +#pragma pack(pop) + +static_assert(sizeof(dirent) == 0x80); + +static const string_view NS_ENCRYPTION = "http://schemas.microsoft.com/office/2006/encryption"; +static const string_view NS_PASSWORD = "http://schemas.microsoft.com/office/2006/keyEncryptor/password"; + +cfbf::cfbf(span s) : s(s) { + auto& ssh = *(structured_storage_header*)s.data(); + + if (ssh.sig != CFBF_SIGNATURE) + Rcpp::stop("Incorrect signature."); + + auto& de = *(dirent*)(s.data() + (ssh.sect_dir_start + 1) * (1 << ssh.sector_shift)); + + if (de.type != obj_type::STGTY_ROOT) + Rcpp::stop("Root directory entry did not have type STGTY_ROOT."); + + add_entry("", 0, false); +} + +const dirent& cfbf::find_dirent(uint32_t num) { + auto& ssh = *(structured_storage_header*)s.data(); + + auto dirents_per_sector = (1 << ssh.sector_shift) / sizeof(dirent); + auto sector_skip = num / dirents_per_sector; + auto sector = ssh.sect_dir_start; + + while (sector_skip > 0) { + sector = next_sector(sector); + sector_skip--; + } + + return *(dirent*)(s.data() + ((sector + 1) << ssh.sector_shift) + ((num % dirents_per_sector) * sizeof(dirent))); +} + +void cfbf::add_entry(string_view path, uint32_t num, bool ignore_right) { + const auto& de = find_dirent(num); + + if (de.sid_left_sibling != NOSTREAM) + add_entry(path, de.sid_left_sibling, true); + + auto name = de.name_len >= sizeof(char16_t) && num != 0 ? utf16_to_utf8(u16string_view(de.name, (de.name_len / sizeof(char16_t)) - 1)) : ""; + + entries.emplace_back(*this, de, string(path) + name); + + if (de.sid_child != NOSTREAM) + add_entry(string(path) + string(name) + "/", de.sid_child, false); + + if (!ignore_right && de.sid_right_sibling != NOSTREAM) + add_entry(path, de.sid_right_sibling, false); +} + +cfbf_entry::cfbf_entry(cfbf& file, const dirent& de, string_view name) : file(file), de(de), name(name) { +} + +uint32_t cfbf::next_sector(uint32_t sector) const { + auto& ssh = *(structured_storage_header*)s.data(); + auto sectors_per_dif = (1 << ssh.sector_shift) / sizeof(uint32_t); + auto fat = (uint32_t*)(s.data() + ((ssh.sect_dif[sector / sectors_per_dif] + 1) << ssh.sector_shift)); + + return fat[sector % sectors_per_dif]; +} + +uint32_t cfbf::next_mini_sector(uint32_t sector) const { + auto& ssh = *(structured_storage_header*)s.data(); + auto mini_fat = (uint32_t*)(s.data() + ((ssh.mini_fat_start + 1) << ssh.sector_shift)); + + return mini_fat[sector]; +} + +size_t cfbf_entry::read(span buf, uint64_t off) const { + auto& ssh = *(structured_storage_header*)file.s.data(); + + if (off >= de.size) + return 0; + + if (off + buf.size() > de.size) + buf = buf.subspan(0, de.size - off); + + size_t read = 0; + + if (de.size < ssh.mini_sector_cutoff) { + auto mini_sector = de.sect_start; + auto mini_sector_skip = off >> ssh.mini_sector_shift; + + for (unsigned int i = 0; i < mini_sector_skip; i++) { + mini_sector = file.next_mini_sector(mini_sector); + } + + auto mini_sectors_per_sector = 1 << (ssh.sector_shift - ssh.mini_sector_shift); + + do { + auto mini_stream_sector = mini_sector / mini_sectors_per_sector; + auto sector = file.entries[0].de.sect_start; + + while (mini_stream_sector > 0) { + sector = file.next_sector(sector); + mini_stream_sector--; + } + + auto src = file.s.subspan(((sector + 1) << ssh.sector_shift) + ((mini_sector % mini_sectors_per_sector) << ssh.mini_sector_shift), 1 << ssh.mini_sector_shift); + auto to_copy = min(src.size(), buf.size()); + + memcpy(buf.data(), src.data(), to_copy); + + read += to_copy; + buf = buf.subspan(to_copy); + + if (buf.empty()) + break; + + mini_sector = file.next_mini_sector(mini_sector); + } while (true); + } else { + auto sector = de.sect_start; + auto sector_skip = off >> ssh.sector_shift; + + for (unsigned int i = 0; i < sector_skip; i++) { + sector = file.next_sector(sector); + } + + do { + auto src = file.s.subspan((sector + 1) << ssh.sector_shift, 1 << ssh.sector_shift); + auto to_copy = min(src.size(), buf.size()); + + memcpy(buf.data(), src.data(), to_copy); + + read += to_copy; + buf = buf.subspan(to_copy); + + if (buf.empty()) + break; + + sector = file.next_sector(sector); + } while (true); + } + + return read; +} + +size_t cfbf_entry::get_size() const { + return de.size; +} + +static array generate_key(u16string_view password, span salt, unsigned int spin_count) { + array h; + + { + SHA1_CTX ctx; + + ctx.update(salt); + ctx.update(span((uint8_t*)password.data(), password.size() * sizeof(char16_t))); + + ctx.finalize(h); + } + + for (uint32_t i = 0; i < spin_count; i++) { + SHA1_CTX ctx; + + ctx.update(span((uint8_t*)&i, sizeof(uint32_t))); + ctx.update(h); + + ctx.finalize(h); + } + + { + SHA1_CTX ctx; + uint32_t block = 0; + + ctx.update(h); + ctx.update(span((uint8_t*)&block, sizeof(uint32_t))); + + ctx.finalize(h); + } + + array buf1 = { + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36 + }; + + for (unsigned int i = 0; auto c : h) { + buf1[i] ^= c; + i++; + } + + auto x1 = sha1(buf1); + + array ret; + memcpy(ret.data(), x1.data(), ret.size()); + + return ret; +} + +static void generate_key44_sha1(u16string_view password, span salt, unsigned int spin_count, + span block_key, span ret) { + array h; + + { + SHA1_CTX ctx; + + ctx.update(salt); + ctx.update(span((uint8_t*)password.data(), password.size() * sizeof(char16_t))); + + ctx.finalize(h); + } + + for (uint32_t i = 0; i < spin_count; i++) { + SHA1_CTX ctx; + + ctx.update(span((uint8_t*)&i, sizeof(uint32_t))); + ctx.update(h); + + ctx.finalize(h); + } + + { + SHA1_CTX ctx; + + ctx.update(h); + ctx.update(block_key); + + ctx.finalize(h); + } + + memcpy(ret.data(), h.data(), 16); +} + +static void generate_key44_sha512(u16string_view password, span salt, unsigned int spin_count, + span block_key, span ret) { + array h; + + { + sha512_state ctx; + + sha_init(ctx); + sha_process(ctx, salt.data(), (uint32_t)salt.size()); + sha_process(ctx, password.data(), (uint32_t)(password.size() * sizeof(char16_t))); + sha_done(ctx, h.data()); + } + + for (uint32_t i = 0; i < spin_count; i++) { + sha512_state ctx; + + sha_init(ctx); + sha_process(ctx, &i, sizeof(uint32_t)); + sha_process(ctx, h.data(), h.size()); + sha_done(ctx, h.data()); + } + + { + sha512_state ctx; + + sha_init(ctx); + sha_process(ctx, h.data(), h.size()); + sha_process(ctx, block_key.data(), (uint32_t)block_key.size()); + sha_done(ctx, h.data()); + } + + memcpy(ret.data(), h.data(), 64); +} + +#pragma pack(push, 1) + +struct encryption_info { + uint16_t major; + uint16_t minor; + uint32_t flags; + uint32_t header_size; +}; + +struct encryption_header { + uint32_t flags; + uint32_t size_extra; + uint32_t alg_id; + uint32_t alg_id_hash; + uint32_t key_size; + uint32_t provider_type; + uint32_t reserved1; + uint32_t reserved2; + char16_t csp_name; +}; + +#pragma pack(pop) + +static const uint32_t ALG_ID_AES_128 = 0x660e; +static const uint32_t ALG_ID_SHA_1 = 0x8004; + +void cfbf::check_password(u16string_view password, span salt, + span encrypted_verifier, + span encrypted_verifier_hash) { + auto key = generate_key(password, salt, 50000); + AES_ctx ctx; + array verifier; + array verifier_hash; + + if (encrypted_verifier.size() != verifier.size()) + Rcpp::stop("encrypted_verifier.size() was {}, expected {}", encrypted_verifier.size(), verifier.size()); + + if (encrypted_verifier_hash.size() != verifier_hash.size()) + Rcpp::stop("encrypted_verifier_hash.size() was {}, expected {}", encrypted_verifier_hash.size(), verifier_hash.size()); + + AES128_init_ctx(&ctx, key.data()); + + memcpy(verifier.data(), encrypted_verifier.data(), encrypted_verifier.size()); + + AES128_ECB_decrypt(&ctx, verifier.data()); + +#if 0 + fmt::print("verifier = "); + for (auto c : verifier) { + fmt::print("{:02x} ", c); + } + fmt::print("\n"); +#endif + + memcpy(verifier_hash.data(), encrypted_verifier_hash.data(), encrypted_verifier_hash.size()); + + AES128_ECB_decrypt(&ctx, verifier_hash.data()); + AES128_ECB_decrypt(&ctx, verifier_hash.data() + 16); + +#if 0 + fmt::print("verifier hash = "); + for (auto c : verifier_hash) { + fmt::print("{:02x} ", c); + } + fmt::print("\n"); +#endif + + auto hash = sha1(verifier); + + if (memcmp(hash.data(), verifier_hash.data(), hash.size())) + Rcpp::stop("Incorrect password."); + + key_size = 16; + memcpy(this->key.data(), key.data(), key_size); +} + +void cfbf::parse_enc_info_44(span enc_info, u16string_view password) { + enc_info = enc_info.subspan(sizeof(uint32_t)); + + if (enc_info.size() < sizeof(uint32_t) || *(uint32_t*)enc_info.data() != 0x40) + Rcpp::stop("EncryptionInfo reserved value was not 0x40."); + + enc_info = enc_info.subspan(sizeof(uint32_t)); + + xml_reader r(string_view((char*)enc_info.data(), enc_info.size())); + + bool found_root = false, found_key_data = false, found_password = false; + + while (r.read()) { + if (r.node_type() == xml_node::element) { + if (!found_root) { + if (r.local_name() != "encryption" || !r.namespace_uri_raw().cmp(NS_ENCRYPTION)) + Rcpp::stop("Root tag was {{{}}}{}, expected {{{}}}encryption.", + r.namespace_uri_raw().decode(), r.local_name(), NS_ENCRYPTION); + + found_root = true; + } else { + if (r.local_name() == "keyData" && r.namespace_uri_raw().cmp(NS_ENCRYPTION)) { + string salt_value_b64, cipher_algorithm, key_bits_str, cipher_chaining, hash_algorithm; + unsigned int key_bits; + + r.attributes_loop_raw([&](string_view local_name, xml_enc_string_view namespace_uri_raw, + xml_enc_string_view value_raw) { + + if (local_name == "saltValue") + salt_value_b64 = value_raw.decode(); + else if (local_name == "cipherAlgorithm") + cipher_algorithm = value_raw.decode(); + else if (local_name == "keyBits") + key_bits_str = value_raw.decode(); + else if (local_name == "cipherChaining") + cipher_chaining = value_raw.decode(); + else if (local_name == "hashAlgorithm") + hash_algorithm = value_raw.decode(); + + return true; + }); + + if (salt_value_b64.empty()) + Rcpp::stop("saltValue not set"); + + if (cipher_algorithm.empty()) + Rcpp::stop("cipherAlgorithm not set"); + + if (key_bits_str.empty()) + Rcpp::stop("keyBits not set"); + + if (cipher_chaining.empty()) + Rcpp::stop("cipherChaining not set"); + + if (hash_algorithm.empty()) + Rcpp::stop("hashAlgorithm not set"); + + auto salt_value = b64decode(salt_value_b64); + + if (cipher_algorithm != "AES") + Rcpp::stop("cipherAlgorithm was {}, expected AES", cipher_algorithm); + + { + auto [ptr, ec] = from_chars(key_bits_str.data(), key_bits_str.data() + key_bits_str.size(), key_bits); + + if (ptr != key_bits_str.data() + key_bits_str.size()) + Rcpp::stop("Could not convert \"{}\" to integer.", key_bits_str); + } + + if (key_bits != 128 && key_bits != 256) + Rcpp::stop("keyBits was {}, expected 128 or 256", key_bits); + + if (cipher_chaining != "ChainingModeCBC") + Rcpp::stop("cipherChaining was {}, expected ChainingModeCBC", cipher_chaining); + + if (hash_algorithm != "SHA1" && hash_algorithm != "SHA512") + Rcpp::stop("hashAlgorithm was {}, expected SHA1 or SHA512", hash_algorithm); + + memcpy(salt.data(), salt_value.data(), min(salt_value.size(), salt.size())); + found_key_data = true; + } else if (r.local_name() == "encryptedKey" && r.namespace_uri_raw().cmp(NS_PASSWORD)) { + string spin_count_str, salt_value_b64, cipher_algorithm, key_bits_str, cipher_chaining, hash_algorithm, + encrypted_verifier_hash_input_b64, encrypted_verifier_hash_value_b64, encrypted_key_value_b64; + unsigned int spin_count, key_bits; + + r.attributes_loop_raw([&](string_view local_name, xml_enc_string_view namespace_uri_raw, + xml_enc_string_view value_raw) { + + if (local_name == "spinCount") + spin_count_str = value_raw.decode(); + else if (local_name == "saltValue") + salt_value_b64 = value_raw.decode(); + else if (local_name == "cipherAlgorithm") + cipher_algorithm = value_raw.decode(); + else if (local_name == "keyBits") + key_bits_str = value_raw.decode(); + else if (local_name == "cipherChaining") + cipher_chaining = value_raw.decode(); + else if (local_name == "hashAlgorithm") + hash_algorithm = value_raw.decode(); + else if (local_name == "encryptedVerifierHashInput") + encrypted_verifier_hash_input_b64 = value_raw.decode(); + else if (local_name == "encryptedVerifierHashValue") + encrypted_verifier_hash_value_b64 = value_raw.decode(); + else if (local_name == "encryptedKeyValue") + encrypted_key_value_b64 = value_raw.decode(); + + return true; + }); + + if (spin_count_str.empty()) + Rcpp::stop("spinCount not set"); + + if (salt_value_b64.empty()) + Rcpp::stop("saltValue not set"); + + if (cipher_algorithm.empty()) + Rcpp::stop("cipherAlgorithm not set"); + + if (key_bits_str.empty()) + Rcpp::stop("keyBits not set"); + + if (cipher_chaining.empty()) + Rcpp::stop("cipherChaining not set"); + + if (hash_algorithm.empty()) + Rcpp::stop("hashAlgorithm not set"); + + if (encrypted_verifier_hash_input_b64.empty()) + Rcpp::stop("encryptedVerifierHashInput not set"); + + if (encrypted_verifier_hash_value_b64.empty()) + Rcpp::stop("encryptedVerifierHashValue not set"); + + if (encrypted_key_value_b64.empty()) + Rcpp::stop("encryptedKeyValue not set"); + + { + auto [ptr, ec] = from_chars(spin_count_str.data(), spin_count_str.data() + spin_count_str.size(), spin_count); + + if (ptr != spin_count_str.data() + spin_count_str.size()) + Rcpp::stop("Could not convert \"{}\" to integer.", spin_count_str); + } + + auto salt_value = b64decode(salt_value_b64); + + if (cipher_algorithm != "AES") + Rcpp::stop("cipherAlgorithm was {}, expected AES", cipher_algorithm); + + { + auto [ptr, ec] = from_chars(key_bits_str.data(), key_bits_str.data() + key_bits_str.size(), key_bits); + + if (ptr != key_bits_str.data() + key_bits_str.size()) + Rcpp::stop("Could not convert \"{}\" to integer.", key_bits_str); + } + + if (key_bits != 128 && key_bits != 256) + Rcpp::stop("keyBits was {}, expected 128 or 256", key_bits); + + if (cipher_chaining != "ChainingModeCBC") + Rcpp::stop("cipherChaining was {}, expected ChainingModeCBC", cipher_chaining); + + if (hash_algorithm == "SHA1") + hashalgo = hash_algorithm::sha1; + else if (hash_algorithm == "SHA512") + hashalgo = hash_algorithm::sha512; + else + Rcpp::stop("hashAlgorithm was {}, expected SHA1 or SHA512", hash_algorithm); + + auto encrypted_verifier_hash_input = b64decode(encrypted_verifier_hash_input_b64); + + auto encrypted_verifier_hash_value = b64decode(encrypted_verifier_hash_value_b64); + + auto encrypted_key_value = b64decode(encrypted_key_value_b64); + + static const array block1 = { 0xfe, 0xa7, 0xd2, 0x76, 0x3b, 0x4b, 0x9e, 0x79 }; + static const array block2 = { 0xd7, 0xaa, 0x0f, 0x6d, 0x30, 0x61, 0x34, 0x4e }; + static const array block3 = { 0x14, 0x6e, 0x0b, 0xe7, 0xab, 0xac, 0xd0, 0xd6 }; + + // FIXME - we can save time by saving the partial hash for key1, key2, and key3 + + array key1, key2, key3; + + if (hashalgo == hash_algorithm::sha512) + generate_key44_sha512(password, span((uint8_t*)salt_value.data(), salt_value.size()), spin_count, block1, key1); + else + generate_key44_sha1(password, span((uint8_t*)salt_value.data(), salt_value.size()), spin_count, block1, key1); + + // FIXME - extend key if short + + AES_ctx ctx; + array verifier; + array verifier_hash; + + if (encrypted_verifier_hash_input.size() != verifier.size()) + Rcpp::stop("encrypted_verifier_hash_input.size() was {}, expected {}", encrypted_verifier_hash_input.size(), verifier.size()); + + if (encrypted_verifier_hash_value.size() > verifier_hash.size()) + Rcpp::stop("encrypted_verifier_hash_value.size() was {}, expected at most {}", encrypted_verifier_hash_value.size(), verifier_hash.size()); + + array iv; + + memcpy(iv.data(), salt_value.data(), min(salt_value.size(), sizeof(iv))); + + if (salt_value.size() < sizeof(iv)) + memset(&iv[salt_value.size()], 0, sizeof(iv) - salt_value.size()); + + memcpy(verifier.data(), encrypted_verifier_hash_input.data(), encrypted_verifier_hash_input.size()); + + if (key_bits == 256) { + AES256_init_ctx_iv(&ctx, key1.data(), iv.data()); + AES256_CBC_decrypt_buffer(&ctx, verifier.data(), verifier.size()); + } else { + AES128_init_ctx_iv(&ctx, key1.data(), iv.data()); + AES128_CBC_decrypt_buffer(&ctx, verifier.data(), verifier.size()); + } + + memcpy(verifier_hash.data(), encrypted_verifier_hash_value.data(), encrypted_verifier_hash_value.size()); + + if (hashalgo == hash_algorithm::sha512) + generate_key44_sha512(password, span((uint8_t*)salt_value.data(), salt_value.size()), spin_count, block2, key2); + else + generate_key44_sha1(password, span((uint8_t*)salt_value.data(), salt_value.size()), spin_count, block2, key2); + + if (key_bits == 256) { + AES256_init_ctx_iv(&ctx, key2.data(), iv.data()); + AES256_CBC_decrypt_buffer(&ctx, verifier_hash.data(), verifier_hash.size()); + } else { + AES128_init_ctx_iv(&ctx, key2.data(), iv.data()); + AES128_CBC_decrypt_buffer(&ctx, verifier_hash.data(), verifier_hash.size()); + } + + if (hashalgo == hash_algorithm::sha512) { + array hash; + sha512_state ctx; + + sha_init(ctx); + sha_process(ctx, verifier.data(), (uint32_t)verifier.size()); + sha_done(ctx, hash.data()); + + if (memcmp(hash.data(), verifier_hash.data(), hash.size())) + Rcpp::stop("Incorrect password."); + } else { + auto hash = sha1(verifier); + if (memcmp(hash.data(), verifier_hash.data(), hash.size())) + Rcpp::stop("Incorrect password."); + } + + if (hashalgo == hash_algorithm::sha512) + generate_key44_sha512(password, span((uint8_t*)salt_value.data(), salt_value.size()), spin_count, block3, key3); + else + generate_key44_sha1(password, span((uint8_t*)salt_value.data(), salt_value.size()), spin_count, block3, key3); + + if (key_bits == 256) { + AES256_init_ctx_iv(&ctx, key3.data(), iv.data()); + AES256_CBC_decrypt_buffer(&ctx, (uint8_t*)encrypted_key_value.data(), encrypted_key_value.size()); + } else { + AES128_init_ctx_iv(&ctx, key3.data(), iv.data()); + AES128_CBC_decrypt_buffer(&ctx, (uint8_t*)encrypted_key_value.data(), encrypted_key_value.size()); + } + + key_size = key_bits / 8; + memcpy(key.data(), encrypted_key_value.data(), min((size_t)key_size, encrypted_key_value.size())); + + found_password = true; + } + } + } + } + + if (!found_key_data) + Rcpp::stop("keyData not found"); + + if (!found_password) + Rcpp::stop("encryptedKey not found"); + + agile_enc = true; +} + +void cfbf::parse_enc_info(span enc_info, u16string_view password) { + if (enc_info.size() < sizeof(encryption_info)) + Rcpp::stop("EncryptionInfo was {} bytes, expected at least {}", enc_info.size(), sizeof(encryption_info)); + + auto& ei = *(encryption_info*)enc_info.data(); + + if (ei.major == 4 && ei.minor == 4) { + parse_enc_info_44(enc_info, password); + return; + } else if (ei.major != 3 || ei.minor != 2) + Rcpp::stop("Unsupported EncryptionInfo version {}.{}", ei.major, ei.minor); + + if (ei.flags != 0x24) // AES + Rcpp::stop("Unsupported EncryptionInfo flags {:x}", ei.flags); + + if (ei.header_size < offsetof(encryption_header, csp_name)) + Rcpp::stop("Encryption header was {} bytes, expected at least {}", ei.header_size, offsetof(encryption_header, csp_name)); + + if (ei.header_size > enc_info.size() - sizeof(encryption_info)) + Rcpp::stop("Encryption header was {} bytes, but only {} remaining", ei.header_size, enc_info.size() - sizeof(encryption_info)); + + auto& h = *(encryption_header*)(enc_info.data() + sizeof(encryption_info)); + + if (h.alg_id != ALG_ID_AES_128) + Rcpp::stop("Unsupported algorithm ID {:x}", h.alg_id); + + if (h.alg_id_hash != ALG_ID_SHA_1 && h.alg_id_hash != 0) + Rcpp::stop("Unsupported hash algorithm ID {:x}", h.alg_id_hash); + + if (h.key_size != 128) + Rcpp::stop("Key size was {}, expected 128", h.key_size); + + auto sp = enc_info.subspan(sizeof(encryption_info) + ei.header_size); + + if (sp.size() < sizeof(uint32_t)) + Rcpp::stop("Malformed EncryptionInfo"); + + auto salt_size = *(uint32_t*)sp.data(); + sp = sp.subspan(sizeof(uint32_t)); + + if (sp.size() < salt_size) + Rcpp::stop("Malformed EncryptionInfo"); + + auto salt = sp.subspan(0, salt_size); + sp = sp.subspan(salt_size); + + if (sp.size() < 16) + Rcpp::stop("Malformed EncryptionInfo"); + + auto encrypted_verifier = sp.subspan(0, 16); + sp = sp.subspan(16); + + if (sp.size() < sizeof(uint32_t)) + Rcpp::stop("Malformed EncryptionInfo"); + + // skip verifier_hash_size + sp = sp.subspan(sizeof(uint32_t)); + + if (sp.size() < 32) + Rcpp::stop("Malformed EncryptionInfo"); + + auto encrypted_verifier_hash = sp.subspan(0, 32); + + check_password(password, salt, encrypted_verifier, encrypted_verifier_hash); // throws if wrong + + memcpy(this->salt.data(), salt.data(), this->salt.size()); +} + +vector cfbf::decrypt44(span enc_package) { + uint32_t segment_no = 0; + vector ret; + + static const size_t SEGMENT_LENGTH = 0x1000; + + ret.resize(enc_package.size()); + auto ptr = ret.data(); + + while (true) { + array h; + AES_ctx ctx; + + auto seg = enc_package.subspan(0, min(SEGMENT_LENGTH, enc_package.size())); + + memcpy(ptr, seg.data(), seg.size()); + + if (hashalgo == hash_algorithm::sha512) { + sha512_state ctx; + + sha_init(ctx); + sha_process(ctx, salt.data(), (uint32_t)salt.size()); + sha_process(ctx, &segment_no, sizeof(segment_no)); + sha_done(ctx, h.data()); + } else { + SHA1_CTX ctx; + + ctx.update(salt); + ctx.update(span((uint8_t*)&segment_no, sizeof(segment_no))); + + ctx.finalize(h); + } + + if (key_size == 32) { + AES256_init_ctx_iv(&ctx, key.data(), h.data()); + AES256_CBC_decrypt_buffer(&ctx, ptr, seg.size()); + } else { + AES128_init_ctx_iv(&ctx, key.data(), h.data()); + AES128_CBC_decrypt_buffer(&ctx, ptr, seg.size()); + } + + if (enc_package.size() == seg.size()) + break; + + enc_package = enc_package.subspan(seg.size()); + segment_no++; + ptr += seg.size(); + } + + return ret; +} + +vector cfbf::decrypt(span enc_package) { + if (enc_package.size() < sizeof(uint64_t)) + Rcpp::stop("EncryptedPackage was {} bytes, expected at least {}", enc_package.size(), sizeof(uint64_t)); + + auto size = *(uint64_t*)enc_package.data(); + + enc_package = enc_package.subspan(sizeof(uint64_t)); + + if (enc_package.size() < size) + Rcpp::stop("EncryptedPackage was {} bytes, expected at least {}", enc_package.size() + sizeof(uint64_t), size + sizeof(uint64_t)); + + if (agile_enc) + return decrypt44(enc_package); + + AES_ctx ctx; + auto buf = enc_package; + + AES128_init_ctx(&ctx, key.data()); + + while (!buf.empty()) { + AES128_ECB_decrypt(&ctx, buf.data()); + + buf = buf.subspan(16); + } + + vector ret; + + ret.assign(enc_package.begin(), enc_package.end()); + + return ret; +} diff --git a/src/xlcpp/cfbf.h b/src/xlcpp/cfbf.h new file mode 100644 index 000000000..1d309bd6c --- /dev/null +++ b/src/xlcpp/cfbf.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +static const uint64_t CFBF_SIGNATURE = 0xe11ab1a1e011cfd0; + +#define formatted_error(s, ...) _formatted_error(FMT_COMPILE(s), ##__VA_ARGS__) + +class cfbf; +struct dirent; + +class cfbf_entry { +public: + cfbf_entry(cfbf& file, const dirent& de, std::string_view name); + size_t read(std::span buf, uint64_t off) const; + size_t get_size() const; + + cfbf& file; + const dirent& de; + std::string name; +}; + +enum class hash_algorithm { + sha1, + sha512 +}; + +class cfbf { +public: + cfbf(std::span s); + uint32_t next_sector(uint32_t sector) const; + uint32_t next_mini_sector(uint32_t sector) const; + void parse_enc_info(std::span enc_info, std::u16string_view password); + void parse_enc_info_44(std::span enc_info, std::u16string_view password); + std::vector decrypt(std::span enc_package); + + std::vector entries; + std::span s; + +private: + void add_entry(std::string_view path, uint32_t num, bool ignore_right); + void check_password(std::u16string_view password, std::span salt, + std::span encrypted_verifier, + std::span encrypted_verifier_hash); + const dirent& find_dirent(uint32_t num); + std::vector decrypt44(std::span enc_package); + + std::array key; + unsigned int key_size; + std::array salt; + bool agile_enc = false; + enum hash_algorithm hashalgo; +}; diff --git a/src/xlcpp/sha1.cpp b/src/xlcpp/sha1.cpp new file mode 100644 index 000000000..cc251480c --- /dev/null +++ b/src/xlcpp/sha1.cpp @@ -0,0 +1,263 @@ +/* +SHA-1 in C +By Steve Reid +100% Public Domain + +Test Vectors (from FIPS PUB 180-1) +"abc" +A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" +84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" +34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */ +/* #define SHA1HANDSOFF * Copies data before messing with it. */ + +#define SHA1HANDSOFF + +#include +#include +#include +#include + +#include "sha1.h" + +using namespace std; + +constexpr void R0(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z, uint32_t& bl) { + z += (w & (x^y)) ^ y; + + bl = (rotl(bl,24) & 0xff00ff00) | (rotl(bl,8) & 0x00ff00ff); + z += bl; + + z += 0x5a827999; + z += rotl(v, 5); + + w = rotl(w, 30); +} + +constexpr void R1(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z, uint32_t i, uint32_t* l) { + z += (w & (x ^ y)) ^ y; + + l[i&15] = rotl(l[(i+13)&15] ^ l[(i+8)&15] ^ l[(i+2)&15] ^ l[i&15], 1); + z += l[i&15]; + + z += 0x5a827999; + z += rotl(v, 5); + + w = rotl(w, 30); +} + +constexpr void R2(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z, uint32_t i, uint32_t* l) { + z += w ^ x ^ y; + + l[i&15] = rotl(l[(i+13)&15] ^ l[(i+8)&15] ^ l[(i+2)&15] ^ l[i&15], 1); + z += l[i&15]; + + z += 0x6ed9eba1; + z += rotl(v, 5); + + w = rotl(w, 30); +} + +constexpr void R3(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z, uint32_t i, uint32_t* l) { + z += ((w | x) & y) | (w & x); + + l[i&15] = rotl(l[(i+13)&15] ^ l[(i+8)&15] ^ l[(i+2)&15] ^ l[i&15], 1); + z += l[i&15]; + + z += 0x8f1bbcdc; + z += rotl(v, 5); + + w = rotl(w, 30); +} + +constexpr void R4(uint32_t v, uint32_t& w, uint32_t x, uint32_t y, uint32_t& z, uint32_t i, uint32_t* l) { + z += w ^ x ^ y; + + l[i&15] = rotl(l[(i+13)&15] ^ l[(i+8)&15] ^ l[(i+2)&15] ^ l[i&15], 1); + z += l[i&15]; + + z += 0xca62c1d6; + z += rotl(v,5); + + w = rotl(w,30); +} + + +/* Hash a single 512-bit block. This is the core of the algorithm. */ + +static void SHA1Transform(uint32_t state[5], uint8_t buffer[64]) { + uint32_t a, b, c, d, e; + uint32_t l[16]; + + memcpy(l, buffer, 64); + + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, l[0]); + R0(e,a,b,c,d, l[1]); + R0(d,e,a,b,c, l[2]); + R0(c,d,e,a,b, l[3]); + R0(b,c,d,e,a, l[4]); + R0(a,b,c,d,e, l[5]); + R0(e,a,b,c,d, l[6]); + R0(d,e,a,b,c, l[7]); + R0(c,d,e,a,b, l[8]); + R0(b,c,d,e,a, l[9]); + R0(a,b,c,d,e, l[10]); + R0(e,a,b,c,d, l[11]); + R0(d,e,a,b,c, l[12]); + R0(c,d,e,a,b, l[13]); + R0(b,c,d,e,a, l[14]); + R0(a,b,c,d,e, l[15]); + + R1(e,a,b,c,d, 16, l); + R1(d,e,a,b,c, 17, l); + R1(c,d,e,a,b, 18, l); + R1(b,c,d,e,a, 19, l); + + R2(a,b,c,d,e,20, l); + R2(e,a,b,c,d,21, l); + R2(d,e,a,b,c,22, l); + R2(c,d,e,a,b,23, l); + R2(b,c,d,e,a,24, l); + R2(a,b,c,d,e,25, l); + R2(e,a,b,c,d,26, l); + R2(d,e,a,b,c,27, l); + R2(c,d,e,a,b,28, l); + R2(b,c,d,e,a,29, l); + R2(a,b,c,d,e,30, l); + R2(e,a,b,c,d,31, l); + R2(d,e,a,b,c,32, l); + R2(c,d,e,a,b,33, l); + R2(b,c,d,e,a,34, l); + R2(a,b,c,d,e,35, l); + R2(e,a,b,c,d,36, l); + R2(d,e,a,b,c,37, l); + R2(c,d,e,a,b,38, l); + R2(b,c,d,e,a,39, l); + + R3(a,b,c,d,e,40, l); + R3(e,a,b,c,d,41, l); + R3(d,e,a,b,c,42, l); + R3(c,d,e,a,b,43, l); + R3(b,c,d,e,a,44, l); + R3(a,b,c,d,e,45, l); + R3(e,a,b,c,d,46, l); + R3(d,e,a,b,c,47, l); + R3(c,d,e,a,b,48, l); + R3(b,c,d,e,a,49, l); + R3(a,b,c,d,e,50, l); + R3(e,a,b,c,d,51, l); + R3(d,e,a,b,c,52, l); + R3(c,d,e,a,b,53, l); + R3(b,c,d,e,a,54, l); + R3(a,b,c,d,e,55, l); + R3(e,a,b,c,d,56, l); + R3(d,e,a,b,c,57, l); + R3(c,d,e,a,b,58, l); + R3(b,c,d,e,a,59, l); + + R4(a,b,c,d,e,60, l); + R4(e,a,b,c,d,61, l); + R4(d,e,a,b,c,62, l); + R4(c,d,e,a,b,63, l); + R4(b,c,d,e,a,64, l); + R4(a,b,c,d,e,65, l); + R4(e,a,b,c,d,66, l); + R4(d,e,a,b,c,67, l); + R4(c,d,e,a,b,68, l); + R4(b,c,d,e,a,69, l); + R4(a,b,c,d,e,70, l); + R4(e,a,b,c,d,71, l); + R4(d,e,a,b,c,72, l); + R4(c,d,e,a,b,73, l); + R4(b,c,d,e,a,74, l); + R4(a,b,c,d,e,75, l); + R4(e,a,b,c,d,76, l); + R4(d,e,a,b,c,77, l); + R4(c,d,e,a,b,78, l); + R4(b,c,d,e,a,79, l); + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; +} + +/* Run your data through this. */ + +void SHA1_CTX::update(std::span data) { + uint32_t i; + + auto j = count[0]; + + count[0] += (uint32_t)(data.size() << 3); + + if (count[0] < j) + count[1]++; + + count[1] += (uint32_t)(data.size() >> 29); + + j = (j >> 3) & 63; + + if (j + data.size() > 63) { + i = 64 - j; + memcpy(&buffer[j], data.data(), i); + SHA1Transform(state, buffer); + for ( ; i + 63 < data.size(); i += 64) { + SHA1Transform(state, (uint8_t*)&data[i]); + } + j = 0; + } else + i = 0; + + memcpy(&buffer[j], &data[i], data.size() - i); +} + + +/* Add padding and return the message digest. */ + +void SHA1_CTX::finalize(span digest) { + unsigned i; + unsigned char finalcount[8]; + unsigned char c; + + for (i = 0; i < 8; i++) { + finalcount[i] = (unsigned char)((count[(i >= 4 ? 0 : 1)] + >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ + } + + c = 0200; + update(span(&c, 1)); + while ((count[0] & 504) != 448) { + c = 0000; + update(span(&c, 1)); + } + update(span(finalcount, 8)); /* Should cause a SHA1Transform() */ + for (i = 0; i < 20; i++) { + digest[i] = (unsigned char) + ((state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } +} + +array sha1(span s) { + array digest; + SHA1_CTX ctx; + + ctx.update(s); + ctx.finalize(digest); + + return digest; +} + diff --git a/src/xlcpp/sha1.h b/src/xlcpp/sha1.h new file mode 100644 index 000000000..9e77d69a3 --- /dev/null +++ b/src/xlcpp/sha1.h @@ -0,0 +1,35 @@ +/* +SHA-1 in C +By Steve Reid +100% Public Domain +*/ + +#pragma once + +#include +#include + +struct SHA1_CTX { + constexpr SHA1_CTX() { + /* SHA1 initialization constants */ + state[0] = 0x67452301; + state[1] = 0xEFCDAB89; + state[2] = 0x98BADCFE; + state[3] = 0x10325476; + state[4] = 0xC3D2E1F0; + count[0] = count[1] = 0; + + for (unsigned int i = 0; i < sizeof(buffer); i++) { + buffer[i] = 0; + } + } + + void update(std::span data); + void finalize(std::span digest); + + uint32_t state[5]; + uint32_t count[2]; + unsigned char buffer[64]; +}; + +std::array sha1(std::span s); diff --git a/src/xlcpp/sha512.cpp b/src/xlcpp/sha512.cpp new file mode 100644 index 000000000..89e6f270f --- /dev/null +++ b/src/xlcpp/sha512.cpp @@ -0,0 +1,174 @@ +// SHA-512. Adapted from LibTomCrypt. This code is Public Domain +#include "sha512.h" +#include + +static const uint64_t K[80] = +{ + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, + 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, + 0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, 0x983e5152ee66dfabULL, + 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, + 0x53380d139d95b3dfULL, 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL, + 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, + 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, 0xca273eceea26619cULL, + 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, + 0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +static uint32_t min(uint32_t x, uint32_t y) +{ + return x < y ? x : y; +} + +static void store64(uint64_t x, unsigned char* y) +{ + for(int i = 0; i != 8; ++i) + y[i] = (x >> ((7-i) * 8)) & 255; +} + +static uint64_t load64(const unsigned char* y) +{ + uint64_t res = 0; + for(int i = 0; i != 8; ++i) + res |= uint64_t(y[i]) << ((7-i) * 8); + return res; +} + +static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z) { return z ^ (x & (y ^ z)); } +static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z) { return ((x | y) & z) | (x & y); } +static uint64_t Rot(uint64_t x, uint64_t n) { return (x >> (n & 63)) | (x << (64 - (n & 63))); } +static uint64_t Sh(uint64_t x, uint64_t n) { return x >> n; } +static uint64_t Sigma0(uint64_t x) { return Rot(x, 28) ^ Rot(x, 34) ^ Rot(x, 39); } +static uint64_t Sigma1(uint64_t x) { return Rot(x, 14) ^ Rot(x, 18) ^ Rot(x, 41); } +static uint64_t Gamma0(uint64_t x) { return Rot(x, 1) ^ Rot(x, 8) ^ Sh(x, 7); } +static uint64_t Gamma1(uint64_t x) { return Rot(x, 19) ^ Rot(x, 61) ^ Sh(x, 6); } + +static void sha_compress(sha512_state& md, const unsigned char *buf) +{ + uint64_t S[8], W[80], t0, t1; + + // Copy state into S + for(int i = 0; i < 8; i++) + S[i] = md.state[i]; + + // Copy the state into 1024-bits into W[0..15] + for(int i = 0; i < 16; i++) + W[i] = load64(buf + (8*i)); + + // Fill W[16..79] + for(int i = 16; i < 80; i++) + W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; + + // Compress + auto RND = [&](uint64_t a, uint64_t b, uint64_t c, uint64_t& d, uint64_t e, uint64_t f, uint64_t g, uint64_t& h, uint64_t i) + { + t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; + t1 = Sigma0(a) + Maj(a, b, c); + d += t0; + h = t0 + t1; + }; + + for(int i = 0; i < 80; i += 8) + { + RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); + RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); + RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); + RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); + RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); + RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); + RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); + RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); + } + + // Feedback + for(int i = 0; i < 8; i++) + md.state[i] = md.state[i] + S[i]; +} + +// Public interface + +void sha_init(sha512_state& md) +{ + md.curlen = 0; + md.length = 0; + md.state[0] = 0x6a09e667f3bcc908ULL; + md.state[1] = 0xbb67ae8584caa73bULL; + md.state[2] = 0x3c6ef372fe94f82bULL; + md.state[3] = 0xa54ff53a5f1d36f1ULL; + md.state[4] = 0x510e527fade682d1ULL; + md.state[5] = 0x9b05688c2b3e6c1fULL; + md.state[6] = 0x1f83d9abfb41bd6bULL; + md.state[7] = 0x5be0cd19137e2179ULL; +} + +void sha_process(sha512_state& md, const void* src, uint32_t inlen) +{ + const uint32_t block_size = sizeof(sha512_state::buf); + auto in = static_cast(src); + + while(inlen > 0) + { + if(md.curlen == 0 && inlen >= block_size) + { + sha_compress(md, in); + md.length += block_size * 8; + in += block_size; + inlen -= block_size; + } + else + { + uint32_t n = min(inlen, (block_size - md.curlen)); + memcpy(md.buf + md.curlen, in, n); + md.curlen += n; + in += n; + inlen -= n; + + if(md.curlen == block_size) + { + sha_compress(md, md.buf); + md.length += 8*block_size; + md.curlen = 0; + } + } + } +} + +void sha_done(sha512_state& md, void *out) +{ + // Increase the length of the message + md.length += md.curlen * 8ULL; + + // Append the '1' bit + md.buf[md.curlen++] = static_cast(0x80); + + // If the length is currently above 112 bytes we append zeros then compress. + // Then we can fall back to padding zeros and length encoding like normal. + if(md.curlen > 112) + { + while(md.curlen < 128) + md.buf[md.curlen++] = 0; + sha_compress(md, md.buf); + md.curlen = 0; + } + + // Pad upto 120 bytes of zeroes + // note: that from 112 to 120 is the 64 MSB of the length. We assume that + // you won't hash 2^64 bits of data... :-) + while(md.curlen < 120) + md.buf[md.curlen++] = 0; + + // Store length + store64(md.length, md.buf+120); + sha_compress(md, md.buf); + + // Copy output + for(int i = 0; i < 8; i++) + store64(md.state[i], static_cast(out)+(8*i)); +} diff --git a/src/xlcpp/sha512.h b/src/xlcpp/sha512.h new file mode 100644 index 000000000..0ba62cb19 --- /dev/null +++ b/src/xlcpp/sha512.h @@ -0,0 +1,16 @@ +// SHA-512. Adapted from LibTomCrypt. This code is Public Domain +#pragma once + +#include + +struct sha512_state +{ + uint64_t length; + uint64_t state[8]; + uint32_t curlen; + unsigned char buf[128]; +}; + +void sha_init(sha512_state& md); +void sha_process(sha512_state& md, const void* in, uint32_t inlen); +void sha_done(sha512_state& md, void* out); diff --git a/src/xlcpp/utf16.h b/src/xlcpp/utf16.h new file mode 100644 index 000000000..8c3c03a95 --- /dev/null +++ b/src/xlcpp/utf16.h @@ -0,0 +1,346 @@ +#pragma once + +#include + +template +requires (std::is_same_v || (sizeof(wchar_t) == 2 && std::is_same_v)) +static constexpr size_t utf16_to_utf8_len(std::basic_string_view sv) noexcept { + size_t ret = 0; + + while (!sv.empty()) { + if (sv[0] < 0x80) + ret++; + else if (sv[0] < 0x800) + ret += 2; + else if (sv[0] < 0xd800) + ret += 3; + else if (sv[0] < 0xdc00) { + if (sv.length() < 2 || (sv[1] & 0xdc00) != 0xdc00) { + ret += 3; + sv = sv.substr(1); + continue; + } + + ret += 4; + sv = sv.substr(1); + } else + ret += 3; + + sv = sv.substr(1); + } + + return ret; +} + +template +requires (std::is_same_v || (sizeof(wchar_t) == 2 && std::is_same_v)) +static constexpr size_t utf16_to_utf8_len(const T (&str)[N]) noexcept { + return utf16_to_utf8_len(std::basic_string_view{str, N - 1}); +} + +template +requires (std::is_same_v || (sizeof(wchar_t) == 2 && std::is_same_v)) && +((std::ranges::output_range && std::is_same_v, char>) || +(std::ranges::output_range && std::is_same_v, char8_t>)) +static constexpr void utf16_to_utf8_range(std::basic_string_view sv, U& t) noexcept { + auto ptr = t.begin(); + + if (ptr == t.end()) + return; + + while (!sv.empty()) { + if (sv[0] < 0x80) { + *ptr = (uint8_t)sv[0]; + ptr++; + + if (ptr == t.end()) + return; + } else if (sv[0] < 0x800) { + *ptr = (uint8_t)(0xc0 | (sv[0] >> 6)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | (sv[0] & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + } else if (sv[0] < 0xd800) { + *ptr = (uint8_t)(0xe0 | (sv[0] >> 12)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | ((sv[0] >> 6) & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | (sv[0] & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + } else if (sv[0] < 0xdc00) { + if (sv.length() < 2 || (sv[1] & 0xdc00) != 0xdc00) { + *ptr = (uint8_t)0xef; + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)0xbf; + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)0xbd; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(1); + continue; + } + + char32_t cp = 0x10000 | ((sv[0] & ~0xd800) << 10) | (sv[1] & ~0xdc00); + + *ptr = (uint8_t)(0xf0 | (cp >> 18)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | ((cp >> 12) & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | ((cp >> 6) & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | (cp & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(1); + } else if (sv[0] < 0xe000) { + *ptr = (uint8_t)0xef; + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)0xbf; + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)0xbd; + ptr++; + + if (ptr == t.end()) + return; + } else { + *ptr = (uint8_t)(0xe0 | (sv[0] >> 12)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | ((sv[0] >> 6) & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (uint8_t)(0x80 | (sv[0] & 0x3f)); + ptr++; + + if (ptr == t.end()) + return; + } + + sv = sv.substr(1); + } +} + +static std::string __inline utf16_to_utf8(std::u16string_view sv) { + if (sv.empty()) + return ""; + + std::string ret(utf16_to_utf8_len(sv), 0); + + utf16_to_utf8_range(sv, ret); + + return ret; +} + +static constexpr size_t utf8_to_utf16_len(std::string_view sv) noexcept { + size_t ret = 0; + + while (!sv.empty()) { + if ((uint8_t)sv[0] < 0x80) { + ret++; + sv = sv.substr(1); + } else if (((uint8_t)sv[0] & 0xe0) == 0xc0 && (uint8_t)sv.length() >= 2 && ((uint8_t)sv[1] & 0xc0) == 0x80) { + ret++; + sv = sv.substr(2); + } else if (((uint8_t)sv[0] & 0xf0) == 0xe0 && (uint8_t)sv.length() >= 3 && ((uint8_t)sv[1] & 0xc0) == 0x80 && ((uint8_t)sv[2] & 0xc0) == 0x80) { + ret++; + sv = sv.substr(3); + } else if (((uint8_t)sv[0] & 0xf8) == 0xf0 && (uint8_t)sv.length() >= 4 && ((uint8_t)sv[1] & 0xc0) == 0x80 && ((uint8_t)sv[2] & 0xc0) == 0x80 && ((uint8_t)sv[3] & 0xc0) == 0x80) { + char32_t cp = (char32_t)(((uint8_t)sv[0] & 0x7) << 18) | (char32_t)(((uint8_t)sv[1] & 0x3f) << 12) | (char32_t)(((uint8_t)sv[2] & 0x3f) << 6) | (char32_t)((uint8_t)sv[3] & 0x3f); + + if (cp > 0x10ffff) { + ret++; + sv = sv.substr(4); + continue; + } + + ret += 2; + sv = sv.substr(4); + } else { + ret++; + sv = sv.substr(1); + } + } + + return ret; +} + +static constexpr size_t utf8_to_utf16_len(std::u8string_view sv) noexcept { + return utf8_to_utf16_len(std::string_view(std::bit_cast(sv.data()), sv.length())); +} + +template +requires (std::ranges::output_range && std::is_same_v, char16_t>) || + (sizeof(wchar_t) == 2 && std::ranges::output_range && std::is_same_v, wchar_t>) +static constexpr void utf8_to_utf16_range(std::string_view sv, T& t) noexcept { + auto ptr = t.begin(); + + if (ptr == t.end()) + return; + + while (!sv.empty()) { + if ((uint8_t)sv[0] < 0x80) { + *ptr = (uint8_t)sv[0]; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(1); + } else if (((uint8_t)sv[0] & 0xe0) == 0xc0 && (uint8_t)sv.length() >= 2 && ((uint8_t)sv[1] & 0xc0) == 0x80) { + char16_t cp = (char16_t)(((uint8_t)sv[0] & 0x1f) << 6) | (char16_t)((uint8_t)sv[1] & 0x3f); + + *ptr = cp; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(2); + } else if (((uint8_t)sv[0] & 0xf0) == 0xe0 && (uint8_t)sv.length() >= 3 && ((uint8_t)sv[1] & 0xc0) == 0x80 && ((uint8_t)sv[2] & 0xc0) == 0x80) { + char16_t cp = (char16_t)(((uint8_t)sv[0] & 0xf) << 12) | (char16_t)(((uint8_t)sv[1] & 0x3f) << 6) | (char16_t)((uint8_t)sv[2] & 0x3f); + + if (cp >= 0xd800 && cp <= 0xdfff) { + *ptr = 0xfffd; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(3); + continue; + } + + *ptr = cp; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(3); + } else if (((uint8_t)sv[0] & 0xf8) == 0xf0 && (uint8_t)sv.length() >= 4 && ((uint8_t)sv[1] & 0xc0) == 0x80 && ((uint8_t)sv[2] & 0xc0) == 0x80 && ((uint8_t)sv[3] & 0xc0) == 0x80) { + char32_t cp = (char32_t)(((uint8_t)sv[0] & 0x7) << 18) | (char32_t)(((uint8_t)sv[1] & 0x3f) << 12) | (char32_t)(((uint8_t)sv[2] & 0x3f) << 6) | (char32_t)((uint8_t)sv[3] & 0x3f); + + if (cp > 0x10ffff) { + *ptr = 0xfffd; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(4); + continue; + } + + cp -= 0x10000; + + *ptr = (char16_t)(0xd800 | (cp >> 10)); + ptr++; + + if (ptr == t.end()) + return; + + *ptr = (char16_t)(0xdc00 | (cp & 0x3ff)); + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(4); + } else { + *ptr = 0xfffd; + ptr++; + + if (ptr == t.end()) + return; + + sv = sv.substr(1); + } + } +} + +template +requires (std::ranges::output_range && std::is_same_v, char16_t>) || + (sizeof(wchar_t) == 2 && std::ranges::output_range && std::is_same_v, wchar_t>) +static constexpr void utf8_to_utf16_range(std::u8string_view sv, T& t) noexcept { + utf8_to_utf16_range(std::string_view((char*)sv.data(), sv.length()), t); +} + +static __inline std::u16string utf8_to_utf16(std::string_view sv) { + if (sv.empty()) + return u""; + + std::u16string ret(utf8_to_utf16_len(sv), 0); + + utf8_to_utf16_range(sv, ret); + + return ret; +} + +static __inline std::u16string utf8_to_utf16(std::u8string_view sv) { + if (sv.empty()) + return u""; + + std::u16string ret(utf8_to_utf16_len(sv), 0); + + utf8_to_utf16_range(sv, ret); + + return ret; +} diff --git a/src/xlcpp/xlcpp-pimpl.h b/src/xlcpp/xlcpp-pimpl.h new file mode 100644 index 000000000..47e2e3f47 --- /dev/null +++ b/src/xlcpp/xlcpp-pimpl.h @@ -0,0 +1,92 @@ +#pragma once + +#include "xlcpp.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace xlcpp { + +struct shared_string { + unsigned int num; +}; + +typedef struct { + std::string content_type; + std::string data; +} file; + + +class workbook_pimpl { +public: + workbook_pimpl() = default; + workbook_pimpl(std::string& fn, std::string_view password, std::string_view outfile); + workbook_pimpl(std::span sv, std::string_view password, std::string_view outfile); + std::string data() const; + + // void write_archive(struct archive* a) const; + // la_ssize_t write_callback(struct archive* a, const void* buffer, size_t length) const; + void load_archive(struct archive* a); + +private: + void load_from_memory(std::span sv, std::string_view password, std::string_view outfile); +}; + +}; // end namespace xlcpp + + +enum class xml_node { + unknown, + text, + whitespace, + element, + end_element, + processing_instruction, + comment, + cdata +}; + +class xml_enc_string_view { +public: + xml_enc_string_view() { } + xml_enc_string_view(std::string_view sv) : sv(sv) { } + + bool empty() const noexcept { + return sv.empty(); + } + + std::string decode() const; + bool cmp(std::string_view str) const; + +private: + std::string_view sv; +}; + +using ns_list = std::vector>; + +class xml_reader { +public: + xml_reader(std::string_view sv) : sv(sv) { } + bool read(); + enum xml_node node_type() const; + bool is_empty() const; + void attributes_loop_raw(const std::function& func) const; + std::optional get_attribute(std::string_view name, std::string_view ns = "") const; + xml_enc_string_view namespace_uri_raw() const; + std::string_view name() const; + std::string_view local_name() const; + std::string value() const; + +private: + std::string_view sv, node; + enum xml_node type = xml_node::unknown; + bool empty_tag; + std::vector namespaces; +}; diff --git a/src/xlcpp/xlcpp.cpp b/src/xlcpp/xlcpp.cpp new file mode 100644 index 000000000..27bcd039f --- /dev/null +++ b/src/xlcpp/xlcpp.cpp @@ -0,0 +1,152 @@ +#include "openxlsx2.h" +#include +#include + +#include "xlcpp.h" +#include "xlcpp-pimpl.h" +#include "cfbf.h" +#include "utf16.h" +#include +#include +#include + +#ifdef _WIN32 +#include +// #include +#endif + +#define BLOCK_SIZE 20480 + +using namespace std; + +static const string NS_SPREADSHEET = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; +static const string NS_SPREADSHEET_STRICT = "http://purl.oclc.org/ooxml/spreadsheetml/main"; +static const string NS_RELATIONSHIPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; +static const string NS_RELATIONSHIPS_STRICT = "http://purl.oclc.org/ooxml/officeDocument/relationships"; +static const string NS_PACKAGE_RELATIONSHIPS = "http://schemas.openxmlformats.org/package/2006/relationships"; +static const string NS_CONTENT_TYPES = "http://schemas.openxmlformats.org/package/2006/content-types"; + +#define NUMFMT_OFFSET 165 + +namespace xlcpp { + +/* needed??? */ +#ifdef _WIN32 +__inline string utf16_to_utf8(const u16string_view& s) { + string ret; + + if (s.empty()) + return ""; + + auto len = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t*)s.data(), (int)s.length(), nullptr, 0, + nullptr, nullptr); + + if (len == 0) + Rcpp::stop("WideCharToMultiByte 1 failed."); + + ret.resize(len); + + len = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t*)s.data(), (int)s.length(), ret.data(), len, + nullptr, nullptr); + + if (len == 0) + Rcpp::stop("WideCharToMultiByte 2 failed."); + + return ret; +} +#endif + +std::vector loadFile(const std::string& filename) { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + Rcpp::stop("Failed to open file"); + } + + std::streamsize fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(fileSize); + if (file.read(reinterpret_cast(buffer.data()), fileSize)) { + return buffer; + } else { + Rcpp::stop("Failed to read file"); + } +} + +workbook_pimpl::workbook_pimpl(string& fn, string_view password, string_view outfile) { + + std::string path = fn; + + std::vector mem = loadFile(path); + + load_from_memory(mem, password, outfile); +} + +workbook_pimpl::workbook_pimpl(span sv, string_view password, string_view outfile) { + load_from_memory(sv, password, outfile); +} + +void workbook_pimpl::load_from_memory(span mem, string_view password, string_view outfile) { + vector plaintext; + + std::ofstream xlsx((std::string)outfile, ios::out | ios::binary); + if (mem.size() >= sizeof(uint64_t) && *(uint64_t*)mem.data() == CFBF_SIGNATURE) { + cfbf c(mem); + string enc_info, enc_package; + + // FIXME - handle old-style Excel files + + for (unsigned int num = 0; const auto& e : c.entries) { + if (num == 0) { // root + num++; + continue; + } + + if (e.name == "/EncryptionInfo" || e.name == "/EncryptedPackage") { + auto& str = e.name == "/EncryptionInfo" ? enc_info : enc_package; + + str.resize(e.get_size()); + + uint64_t off = 0; + auto buf = span((std::byte*)str.data(), str.size()); + + while (true) { + auto size = e.read(buf, off); + + if (size == 0) + break; + + off += size; + } + } + + num++; + } + + if (enc_info.empty()) + Rcpp::stop("EncryptionInfo not found."); + + auto u16password = utf8_to_utf16(password); + + c.parse_enc_info(span((uint8_t*)enc_info.data(), enc_info.size()), u16password); + plaintext = c.decrypt(span((uint8_t*)enc_package.data(), enc_package.size())); + + xlsx.write((char *) plaintext.data(), plaintext.size()); + } + xlsx.close(); + +} + +workbook::workbook(string& fn, std::string_view password, std::string_view outfile) { + impl = new workbook_pimpl(fn, password, outfile); +} + +workbook::workbook(span sv, std::string_view password, std::string_view outfile) { + impl = new workbook_pimpl(sv, password, outfile); +} + +workbook::~workbook() { + delete impl; +} + +} diff --git a/src/xlcpp/xlcpp.h b/src/xlcpp/xlcpp.h new file mode 100644 index 000000000..aee441495 --- /dev/null +++ b/src/xlcpp/xlcpp.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// #ifdef _WIN32 +// +// #include +// +// #ifdef XLCPP_EXPORT +// #define XLCPP __declspec(dllexport) +// #elif !defined(XLCPP_STATIC) +// #define XLCPP __declspec(dllimport) +// #else +// #define XLCPP +// #endif +// +// #else + +#ifdef XLCPP_EXPORT +#define XLCPP __attribute__ ((visibility ("default"))) +#elif !defined(XLCPP_STATIC) +#define XLCPP __attribute__ ((dllimport)) +#else +#define XLCPP +#endif + +// #endif + +namespace xlcpp { + +class workbook_pimpl; +class sheet; + +class XLCPP workbook { +public: + workbook(); + workbook(std::string& fn, std::string_view password = "", std::string_view outfile = ""); + workbook(std::span sv, std::string_view password = "", std::string_view outfile = ""); + ~workbook(); + + workbook_pimpl* impl; +}; + + +}; diff --git a/src/xlcpp/xml-reader.cpp b/src/xlcpp/xml-reader.cpp new file mode 100644 index 000000000..8549af33f --- /dev/null +++ b/src/xlcpp/xml-reader.cpp @@ -0,0 +1,398 @@ +#include "openxlsx2.h" + +#include "xlcpp-pimpl.h" +#include + +using namespace std; + +static bool __inline is_whitespace(char c) { + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +static void parse_attributes(string_view node, const function& func) { + auto s = node.substr(1, node.length() - 2); + + if (!s.empty() && s.back() == '/') { + s.remove_suffix(1); + } + + while (!s.empty() && !is_whitespace(s.front())) { + s.remove_prefix(1); + } + + while (!s.empty() && is_whitespace(s.front())) { + s.remove_prefix(1); + } + + while (!s.empty()) { + auto av = s; + + auto eq = av.find_first_of('='); + string_view n, v; + + if (eq == string::npos) { + n = av; + v = ""; + } else { + n = av.substr(0, eq); + v = av.substr(eq + 1); + + while (!v.empty() && is_whitespace(v.front())) { + v = v.substr(1); + } + + if (v.length() >= 2 && (v.front() == '"' || v.front() == '\'')) { + auto c = v.front(); + + v.remove_prefix(1); + + auto end = v.find_first_of(c); + + if (end != string::npos) { + v = v.substr(0, end); + av = av.substr(0, v.data() + v.length() - av.data() + 1); + } else { + for (size_t i = 0; i < av.length(); i++) { + if (is_whitespace(av[i])) { + av = av.substr(0, i); + break; + } + } + } + } + } + + while (!n.empty() && is_whitespace(n.back())) { + n.remove_suffix(1); + } + + if (!func(n, v)) + return; + + s.remove_prefix(av.length()); + + while (!s.empty() && is_whitespace(s.front())) { + s.remove_prefix(1); + } + } +} + +bool xml_reader::read() { + if (sv.empty()) + return false; + + // FIXME - DOCTYPE (, ]>) + + if (type == xml_node::element && empty_tag) + namespaces.pop_back(); + + if (sv.front() != '<') { // text + auto pos = sv.find_first_of('<'); + + if (pos == string::npos) { + node = sv; + sv = ""; + } else { + node = sv.substr(0, pos); + sv = sv.substr(pos); + } + + type = xml_node::whitespace; + + for (auto c : sv) { + if (!is_whitespace(c)) { + type = xml_node::text; + break; + } + } + } else { + if (sv.starts_with(""); + + if (pos == string::npos) { + node = sv; + sv = ""; + } else { + node = sv.substr(0, pos + 2); + sv = sv.substr(pos + 2); + } + + type = xml_node::processing_instruction; + } else if (sv.starts_with("'); + + if (pos == string::npos) { + node = sv; + sv = ""; + } else { + node = sv.substr(0, pos + 1); + sv = sv.substr(pos + 1); + } + + type = xml_node::end_element; + namespaces.pop_back(); + } else if (sv.starts_with(""); + + if (pos == string::npos) + Rcpp::stop("Malformed comment."); + + node = sv.substr(0, pos + 3); + sv = sv.substr(pos + 3); + + type = xml_node::comment; + } else if (sv.starts_with(""); + + if (pos == string::npos) + Rcpp::stop("Malformed CDATA."); + + node = sv.substr(0, pos + 3); + sv = sv.substr(pos + 3); + + type = xml_node::cdata; + } else { + auto pos = sv.find_first_of('>'); + + if (pos == string::npos) { + node = sv; + sv = ""; + } else { + node = sv.substr(0, pos + 1); + sv = sv.substr(pos + 1); + } + + type = xml_node::element; + ns_list ns; + + parse_attributes(node, [&](string_view name, xml_enc_string_view value) { + if (name.starts_with("xmlns:")) + ns.emplace_back(name.substr(6), value); + else if (name == "xmlns") + ns.emplace_back("", value); + + return true; + }); + + namespaces.push_back(ns); + + empty_tag = node.ends_with("/>"); + } + } + + return true; +} + +enum xml_node xml_reader::node_type() const { + return type; +} + +bool xml_reader::is_empty() const { + return type == xml_node::element && empty_tag; +} + +void xml_reader::attributes_loop_raw(const function& func) const { + if (type != xml_node::element) + return; + + parse_attributes(node, [&](string_view name, xml_enc_string_view value_raw) { + auto colon = name.find_first_of(':'); + + if (colon == string::npos) + return func(name, xml_enc_string_view{}, value_raw); + + auto prefix = name.substr(0, colon); + + for (auto it = namespaces.rbegin(); it != namespaces.rend(); it++) { + for (const auto& v : *it) { + if (v.first == prefix) + return func(name.substr(colon + 1), v.second, value_raw); + } + } + + return func(name.substr(colon + 1), xml_enc_string_view{}, value_raw); + }); +} + +optional xml_reader::get_attribute(string_view name, string_view ns) const { + if (type != xml_node::element) + return nullopt; + + optional xesv; + + attributes_loop_raw([&](string_view local_name, xml_enc_string_view namespace_uri_raw, + xml_enc_string_view value_raw) { + if (local_name == name && namespace_uri_raw.cmp(ns)) { + xesv = value_raw; + return false; + } + + return true; + }); + + return xesv; +} + +xml_enc_string_view xml_reader::namespace_uri_raw() const { + auto tag = name(); + auto colon = tag.find_first_of(':'); + string_view prefix; + + if (colon != string::npos) + prefix = tag.substr(0, colon); + + for (auto it = namespaces.rbegin(); it != namespaces.rend(); it++) { + for (const auto& v : *it) { + if (v.first == prefix) + return v.second; + } + } + + return {}; +} + +string_view xml_reader::name() const { + if (type != xml_node::element && type != xml_node::end_element) + return ""; + + auto tag = node.substr(type == xml_node::end_element ? 2 : 1); + + tag.remove_suffix(1); + + for (size_t i = 0; i < tag.length(); i++) { + if (is_whitespace(tag[i])) { + tag = tag.substr(0, i); + break; + } + } + + return tag; +} + +string_view xml_reader::local_name() const { + if (type != xml_node::element && type != xml_node::end_element) + return ""; + + auto tag = name(); + auto pos = tag.find_first_of(':'); + + if (pos == string::npos) + return tag; + else + return tag.substr(pos + 1); +} + +string xml_reader::value() const { + switch (type) { + case xml_node::text: + return xml_enc_string_view{node}.decode(); + + case xml_node::cdata: + return string{node.substr(9, node.length() - 12)}; + + default: + return {}; + } +} + +static string esc_char(string_view s) { + uint32_t c = 0; + from_chars_result fcr; + + if (s.starts_with("x")) + fcr = from_chars(s.data() + 1, s.data() + s.length(), c, 16); + else + fcr = from_chars(s.data(), s.data() + s.length(), c); + + if (c == 0 || c > 0x10ffff) + return ""; + + if (c < 0x80) + return string{(char)c, 1}; + else if (c < 0x800) { + char t[2]; + + t[0] = (char)(0xc0 | (c >> 6)); + t[1] = (char)(0x80 | (c & 0x3f)); + + return string{string_view(t, 2)}; + } else if (c < 0x10000) { + char t[3]; + + t[0] = (char)(0xe0 | (c >> 12)); + t[1] = (char)(0x80 | ((c >> 6) & 0x3f)); + t[2] = (char)(0x80 | (c & 0x3f)); + + return string{string_view(t, 3)}; + } else { + char t[4]; + + t[0] = (char)(0xf0 | (c >> 18)); + t[1] = (char)(0x80 | ((c >> 12) & 0x3f)); + t[2] = (char)(0x80 | ((c >> 6) & 0x3f)); + t[3] = (char)(0x80 | (c & 0x3f)); + + return string{string_view(t, 4)}; + } +} + +string xml_enc_string_view::decode() const { + auto v = sv; + string s; + + s.reserve(v.length()); + + while (!v.empty()) { + if (v.front() == '&') { + v.remove_prefix(1); + + if (v.starts_with("amp;")) { + s += "&"; + v.remove_prefix(4); + } else if (v.starts_with("lt;")) { + s += "<"; + v.remove_prefix(3); + } else if (v.starts_with("gt;")) { + s += ">"; + v.remove_prefix(3); + } else if (v.starts_with("quot;")) { + s += "\""; + v.remove_prefix(5); + } else if (v.starts_with("apos;")) { + s += "'"; + v.remove_prefix(5); + } else if (v.starts_with("#")) { + string_view bit; + + v.remove_prefix(1); + + auto sc = v.find_first_of(';'); + if (sc == string::npos) { + bit = v; + v = ""; + } else { + bit = v.substr(0, sc); + v.remove_prefix(sc + 1); + } + + s += esc_char(bit); + } else + s += "&"; + } else { + s += v.front(); + v.remove_prefix(1); + } + } + + return s; +} + +bool xml_enc_string_view::cmp(string_view str) const { + for (auto c : sv) { + if (c == '&') + return decode() == str; + } + + return sv == str; +} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 549a31fc6..b4fc1ab2d 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -1021,7 +1021,8 @@ download_testfiles <- function() { "umlauts.xlsx", "unemployment-nrw202208.xlsx", "update_test.xlsx", - "vml_numbering.xlsx" + "vml_numbering.xlsx", + "openxlsx2_example_pass.xlsx" ) test_path <- testthat::test_path("testfiles") diff --git a/tests/testthat/test-loading_workbook.R b/tests/testthat/test-loading_workbook.R index 8668a9e22..e75c17ea1 100644 --- a/tests/testthat/test-loading_workbook.R +++ b/tests/testthat/test-loading_workbook.R @@ -397,3 +397,15 @@ test_that("sheetView is not switched", { expect_equal(exp, got) }) + +test_that("loading password protected workbooks works", { + + fl <- testfile_path("openxlsx2_example_pass.xlsx") + + wb <- wb_load(fl, password = "openxlsx2") + + exp <- c(10, 9) + got <- dim(wb_to_df(wb)) + expect_equal(exp, got) + +})