Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Speed up Base58 encoding/decoding by 400%/200% via preliminary byte packing #29473

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
197 changes: 113 additions & 84 deletions src/base58.cpp
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
// Copyright (c) 2014-2022 The Bitcoin Core developers
// Copyright (c) 2014-2024 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <base58.h>

#include <hash.h>
#include <uint256.h>
#include <util/strencodings.h>
#include <util/string.h>

#include <assert.h>
#include <string.h>

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include <vector>
#include <cassert>
#include <cstring>

/** All alphanumeric characters except for "0", "I", "O", and "l" */
static const char* pszBase58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
static const int8_t mapBase58[256] = {
// The Base58 character set excluding "0", "I", "O", and "l" for clarity.
static constexpr auto pszBase58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
// Each ASCII character and its Base58 value, with -1 indicating an invalid character for Base58.
static constexpr int8_t mapBase58[256] = {
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
Expand All @@ -35,93 +38,119 @@ static const int8_t mapBase58[256] = {
-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
};

[[nodiscard]] static bool DecodeBase58(const char* psz, std::vector<unsigned char>& vch, int max_ret_len)
static constexpr int base{58};
static constexpr int64_t baseScale{1000LL};
static constexpr int64_t log58_256Ratio = 733LL; // Approximation of log(base)/log(256), scaled by baseScale.
static constexpr int64_t log256_58Ratio = 1366LL; // Approximation of log(256)/log(base), scaled by baseScale.

// Defines the size of groups that fit into 64 bit batches, processed together for encoding and decoding efficiency.
static constexpr int encodingBatch = 7;
static constexpr int decodingBatch = 9;
static constexpr int64_t decodingPowerOf58 = static_cast<int64_t>(base)*base*base*base*base*base*base*base*base; // pow(base, decodingBatch)

// The ceiling integer division of x by y.
static int CeilDiv(const int x, const int y)
{
return (x + (y - 1)) / y;
}

// The floor modulus of x by y, adjusting for negative values.
static int FloorMod(const int x, const int y)
{
// Skip leading spaces.
while (*psz && IsSpace(*psz))
psz++;
// Skip and count leading '1's.
int zeroes = 0;
int length = 0;
while (*psz == '1') {
zeroes++;
if (zeroes > max_ret_len) return false;
psz++;
const auto r = x % y;
return r < 0 ? r + y : r;
}

[[nodiscard]] static bool DecodeBase58(const char* input, std::vector<unsigned char>& result, const int max_ret_len)
{
while (*input && IsSpace(*input))
++input;

auto leading{0};
for (; *input == '1'; ++input, ++leading)
if (leading >= max_ret_len) return false;

auto effectiveLength{0};
for (auto p{input}; *p; ++p)
if (!IsSpace(*p)) ++effectiveLength;

const auto size = 1 + effectiveLength * log58_256Ratio / baseScale;
result.reserve(leading + static_cast<size_t>(size));
result.assign(leading, 0x00);

// Convert the Base58 string to a 64 bit representation for faster manipulation.
std::vector<int64_t> inputBatched(CeilDiv(effectiveLength, decodingBatch), 0);
const auto groupOffset = FloorMod(-effectiveLength, decodingBatch);
for (auto i{0U}; *input && !IsSpace(*input); ++input, ++i) {
const auto digit = mapBase58[static_cast<uint8_t>(*input)];
if (digit == -1) return false;
const auto index = (groupOffset + i) / decodingBatch;
inputBatched[index] *= base;
inputBatched[index] += digit;
}
// Allocate enough space in big-endian base256 representation.
int size = strlen(psz) * 733 /1000 + 1; // log(58) / log(256), rounded up.
std::vector<unsigned char> b256(size);
// Process the characters.
static_assert(std::size(mapBase58) == 256, "mapBase58.size() should be 256"); // guarantee not out of range
while (*psz && !IsSpace(*psz)) {
// Decode base58 character
int carry = mapBase58[(uint8_t)*psz];
if (carry == -1) // Invalid b58 character
return false;
int i = 0;
for (std::vector<unsigned char>::reverse_iterator it = b256.rbegin(); (carry != 0 || i < length) && (it != b256.rend()); ++it, ++i) {
carry += 58 * (*it);
*it = carry % 256;
carry /= 256;
for (; *input; ++input)
if (!IsSpace(*input)) return false; // Ensure no non-space characters after processing.

auto resultLength{leading};
for (auto i{0U}; i < inputBatched.size(); ++resultLength) {
int64_t remainder = 0;
for (auto j{i}; j < inputBatched.size(); ++j) { // Calculate next digit, dividing inputBatched
const auto accumulator = (remainder * decodingPowerOf58) + inputBatched[j];
inputBatched[j] = accumulator / 256;
remainder = accumulator % 256;
}
assert(carry == 0);
length = i;
if (length + zeroes > max_ret_len) return false;
psz++;
if (resultLength >= max_ret_len) return false;
result.push_back(remainder);

while (i < inputBatched.size() && inputBatched[i] == 0)
++i; // Skip new leading zeros
}
// Skip trailing spaces.
while (IsSpace(*psz))
psz++;
if (*psz != 0)
return false;
// Skip leading zeroes in b256.
std::vector<unsigned char>::iterator it = b256.begin() + (size - length);
// Copy result into output vector.
vch.reserve(zeroes + (b256.end() - it));
vch.assign(zeroes, 0x00);
while (it != b256.end())
vch.push_back(*(it++));

std::reverse(result.begin() + leading, result.end());

return true;
}

std::string EncodeBase58(Span<const unsigned char> input)
auto BatchInput(const Span<const unsigned char>& input, const int start) -> std::vector<int64_t>
{
// Skip & count leading zeroes.
int zeroes = 0;
int length = 0;
while (input.size() > 0 && input[0] == 0) {
input = input.subspan(1);
zeroes++;
const int effectiveLength = input.size() - start;
std::vector<int64_t> inputBatched(CeilDiv(effectiveLength, encodingBatch), 0);
const int groupOffset = FloorMod(-effectiveLength, encodingBatch) - start;

for (uint32_t i = start; i < input.size(); ++i) {
const int index = (groupOffset + static_cast<int>(i)) / encodingBatch;
inputBatched[index] <<= 8;
inputBatched[index] |= input[i];
}
// Allocate enough space in big-endian base58 representation.
int size = input.size() * 138 / 100 + 1; // log(256) / log(58), rounded up.
std::vector<unsigned char> b58(size);
// Process the bytes.
while (input.size() > 0) {
int carry = input[0];
int i = 0;
// Apply "b58 = b58 * 256 + ch".
for (std::vector<unsigned char>::reverse_iterator it = b58.rbegin(); (carry != 0 || i < length) && (it != b58.rend()); it++, i++) {
carry += 256 * (*it);
*it = carry % 58;
carry /= 58;
}

assert(carry == 0);
length = i;
input = input.subspan(1);
return inputBatched;
}

std::string EncodeBase58(const Span<const unsigned char> input)
{
auto leading{0U};
while (leading < input.size() && input[leading] == 0)
++leading;

std::string result;
const auto size = 1 + input.size() * log256_58Ratio / baseScale;
result.reserve(leading + static_cast<size_t>(size));
result.assign(leading, '1'); // Fill in leading '1's for each zero byte in input.

auto inputBatched = BatchInput(input, leading);
for (auto i{0U}; i < inputBatched.size();) {
int64_t remainder{0};
for (auto j{i}; j < inputBatched.size(); ++j) { // Calculate next digit, dividing inputBatched
const auto accumulator = (remainder << (encodingBatch * 8)) | inputBatched[j];
inputBatched[j] = accumulator / base;
remainder = accumulator % base;
}
result += pszBase58[remainder];
while (i < inputBatched.size() && inputBatched[i] == 0)
++i; // Skip new leading zeros
}
// Skip leading zeroes in base58 result.
std::vector<unsigned char>::iterator it = b58.begin() + (size - length);
while (it != b58.end() && *it == 0)
it++;
// Translate the result into a string.
std::string str;
str.reserve(zeroes + (b58.end() - it));
str.assign(zeroes, '1');
while (it != b58.end())
str += pszBase58[*(it++)];
return str;
std::reverse(result.begin() + leading, result.end());
return result;
}

bool DecodeBase58(const std::string& str, std::vector<unsigned char>& vchRet, int max_ret_len)
Expand Down