bitcoin · paplorinc · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024 · Feb 24, 2024
diff --git a/src/base58.cpp b/src/base58.cpp
@@ -1,22 +1,25 @@
-// Copyright (c) 2014-2022 The Bitcoin Core developers
+// Copyright (c) 2014-2024 The Bitcoin Core developers
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include <base58.h>
-
 #include <hash.h>
 #include <uint256.h>
 #include <util/strencodings.h>
 #include <util/string.h>
 
-#include <assert.h>
-#include <string.h>
-
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
 #include <limits>
+#include <vector>
+#include <cassert>
+#include <cstring>
 
-/** All alphanumeric characters except for "0", "I", "O", and "l" */
-static const char* pszBase58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
-static const int8_t mapBase58[256] = {
+// The Base58 character set excluding "0", "I", "O", and "l" for clarity.
+static constexpr auto pszBase58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
+// Each ASCII character and its Base58 value, with -1 indicating an invalid character for Base58.
+static constexpr int8_t mapBase58[256] = {
     -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
     -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
     -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
@@ -35,93 +38,119 @@ static const int8_t mapBase58[256] = {
     -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,
 };
 
-[[nodiscard]] static bool DecodeBase58(const char* psz, std::vector<unsigned char>& vch, int max_ret_len)
+static constexpr int base{58};
+static constexpr int64_t baseScale{1000LL};
+static constexpr int64_t log58_256Ratio =  733LL;  // Approximation of log(base)/log(256), scaled by baseScale.
+static constexpr int64_t log256_58Ratio = 1366LL; // Approximation of log(256)/log(base), scaled by baseScale.
+
+// Defines the size of groups that fit into 64 bit batches, processed together for encoding and decoding efficiency.
+static constexpr int encodingBatch = 7;
+static constexpr int decodingBatch = 9;
+static constexpr int64_t decodingPowerOf58 = static_cast<int64_t>(base)*base*base*base*base*base*base*base*base; // pow(base, decodingBatch)
+
+// The ceiling integer division of x by y.
+static int CeilDiv(const int x, const int y)
+{
+    return (x + (y - 1)) / y;
+}
+
+// The floor modulus of x by y, adjusting for negative values.
+static int FloorMod(const int x, const int y)
 {
-    // Skip leading spaces.
-    while (*psz && IsSpace(*psz))
-        psz++;
-    // Skip and count leading '1's.
-    int zeroes = 0;
-    int length = 0;
-    while (*psz == '1') {
-        zeroes++;
-        if (zeroes > max_ret_len) return false;
-        psz++;
+    const auto r = x % y;
+    return r < 0 ? r + y : r;
+}
+
+[[nodiscard]] static bool DecodeBase58(const char* input, std::vector<unsigned char>& result, const int max_ret_len)
+{
+    while (*input && IsSpace(*input))
+        ++input;
+
+    auto leading{0};
+    for (; *input == '1'; ++input, ++leading)
+        if (leading >= max_ret_len) return false;
+
+    auto effectiveLength{0};
+    for (auto p{input}; *p; ++p)
+        if (!IsSpace(*p)) ++effectiveLength;
+
+    const auto size = 1 + effectiveLength * log58_256Ratio / baseScale;
+    result.reserve(leading + static_cast<size_t>(size));
+    result.assign(leading, 0x00);
+
+    // Convert the Base58 string to a 64 bit representation for faster manipulation.
+    std::vector<int64_t> inputBatched(CeilDiv(effectiveLength, decodingBatch), 0);
+    const auto groupOffset = FloorMod(-effectiveLength, decodingBatch);
+    for (auto i{0U}; *input && !IsSpace(*input); ++input, ++i) {
+        const auto digit = mapBase58[static_cast<uint8_t>(*input)];
+        if (digit == -1) return false;
+        const auto index = (groupOffset + i) / decodingBatch;
+        inputBatched[index] *= base;
+        inputBatched[index] += digit;
     }
-    // Allocate enough space in big-endian base256 representation.
-    int size = strlen(psz) * 733 /1000 + 1; // log(58) / log(256), rounded up.
-    std::vector<unsigned char> b256(size);
-    // Process the characters.
-    static_assert(std::size(mapBase58) == 256, "mapBase58.size() should be 256"); // guarantee not out of range
-    while (*psz && !IsSpace(*psz)) {
-        // Decode base58 character
-        int carry = mapBase58[(uint8_t)*psz];
-        if (carry == -1)  // Invalid b58 character
-            return false;
-        int i = 0;
-        for (std::vector<unsigned char>::reverse_iterator it = b256.rbegin(); (carry != 0 || i < length) && (it != b256.rend()); ++it, ++i) {
-            carry += 58 * (*it);
-            *it = carry % 256;
-            carry /= 256;
+    for (; *input; ++input)
+        if (!IsSpace(*input)) return false; // Ensure no non-space characters after processing.
+
+    auto resultLength{leading};
+    for (auto i{0U}; i < inputBatched.size(); ++resultLength) {
+        int64_t remainder = 0;
+        for (auto j{i}; j < inputBatched.size(); ++j) { // Calculate next digit, dividing inputBatched
+            const auto accumulator = (remainder * decodingPowerOf58) + inputBatched[j];
+            inputBatched[j] = accumulator / 256;
+            remainder = accumulator % 256;
         }
-        assert(carry == 0);
-        length = i;
-        if (length + zeroes > max_ret_len) return false;
-        psz++;
+        if (resultLength >= max_ret_len) return false;
+        result.push_back(remainder);
+
+        while (i < inputBatched.size() && inputBatched[i] == 0)
+            ++i; // Skip new leading zeros
     }
-    // Skip trailing spaces.
-    while (IsSpace(*psz))
-        psz++;
-    if (*psz != 0)
-        return false;
-    // Skip leading zeroes in b256.
-    std::vector<unsigned char>::iterator it = b256.begin() + (size - length);
-    // Copy result into output vector.
-    vch.reserve(zeroes + (b256.end() - it));
-    vch.assign(zeroes, 0x00);
-    while (it != b256.end())
-        vch.push_back(*(it++));
+
+    std::reverse(result.begin() + leading, result.end());
+
     return true;
 }
 
-std::string EncodeBase58(Span<const unsigned char> input)
+auto BatchInput(const Span<const unsigned char>& input, const int start) -> std::vector<int64_t>
 {
-    // Skip & count leading zeroes.
-    int zeroes = 0;
-    int length = 0;
-    while (input.size() > 0 && input[0] == 0) {
-        input = input.subspan(1);
-        zeroes++;
+    const int effectiveLength = input.size() - start;
+    std::vector<int64_t> inputBatched(CeilDiv(effectiveLength, encodingBatch), 0);
+    const int groupOffset = FloorMod(-effectiveLength, encodingBatch) - start;
+
+    for (uint32_t i = start; i < input.size(); ++i) {
+        const int index = (groupOffset + static_cast<int>(i)) / encodingBatch;
+        inputBatched[index] <<= 8;
+        inputBatched[index] |= input[i];
     }
-    // Allocate enough space in big-endian base58 representation.
-    int size = input.size() * 138 / 100 + 1; // log(256) / log(58), rounded up.
-    std::vector<unsigned char> b58(size);
-    // Process the bytes.
-    while (input.size() > 0) {
-        int carry = input[0];
-        int i = 0;
-        // Apply "b58 = b58 * 256 + ch".
-        for (std::vector<unsigned char>::reverse_iterator it = b58.rbegin(); (carry != 0 || i < length) && (it != b58.rend()); it++, i++) {
-            carry += 256 * (*it);
-            *it = carry % 58;
-            carry /= 58;
-        }
 
-        assert(carry == 0);
-        length = i;
-        input = input.subspan(1);
+    return inputBatched;
+}
+
+std::string EncodeBase58(const Span<const unsigned char> input)
+{
+    auto leading{0U};
+    while (leading < input.size() && input[leading] == 0)
+        ++leading;
+
+    std::string result;
+    const auto size = 1 + input.size() * log256_58Ratio / baseScale;
+    result.reserve(leading + static_cast<size_t>(size));
+    result.assign(leading, '1'); // Fill in leading '1's for each zero byte in input.
+
+    auto inputBatched = BatchInput(input, leading);
+    for (auto i{0U}; i < inputBatched.size();) {
+        int64_t remainder{0};
+        for (auto j{i}; j < inputBatched.size(); ++j) { // Calculate next digit, dividing inputBatched
+            const auto accumulator = (remainder << (encodingBatch * 8)) | inputBatched[j];
+            inputBatched[j] = accumulator / base;
+            remainder = accumulator % base;
+        }
+        result += pszBase58[remainder];
+        while (i < inputBatched.size() && inputBatched[i] == 0)
+            ++i; // Skip new leading zeros
     }
-    // Skip leading zeroes in base58 result.
-    std::vector<unsigned char>::iterator it = b58.begin() + (size - length);
-    while (it != b58.end() && *it == 0)
-        it++;
-    // Translate the result into a string.
-    std::string str;
-    str.reserve(zeroes + (b58.end() - it));
-    str.assign(zeroes, '1');
-    while (it != b58.end())
-        str += pszBase58[*(it++)];
-    return str;
+    std::reverse(result.begin() + leading, result.end());
+    return result;
 }
 
 bool DecodeBase58(const std::string& str, std::vector<unsigned char>& vchRet, int max_ret_len)