refactor: Model the bech32 charlimit as an Enum

Bech32(m) was defined with a 90 character limit so that certain guarantees for error detection could be made for segwit addresses. However, there is nothing about the encoding scheme itself that requires a limit and in practice bech32(m) has been used without the 90 char limit (e.g. lightning invoices). Further, increasing the character limit doesn't do away with error detection, it simply lessons the guarantees. Model charlimit as an Enum, so that if a different address scheme is using bech32(m), the character limit for that address scheme can be used, rather than always using the 90 charlimit defined for segwit addresses.
bitcoin · May 8, 2024 · 696e0a5 · 696e0a5
1 parent 00ac1b9
commit 696e0a5
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 8 deletions.
diff --git a/src/bech32.cpp b/src/bech32.cpp
@@ -370,11 +370,12 @@ std::string Encode(Encoding encoding, const std::string& hrp, const data& values
 }
 
 /** Decode a Bech32 or Bech32m string. */
-DecodeResult Decode(const std::string& str) {
+DecodeResult Decode(const std::string& str, CharLimit limit) {
     std::vector<int> errors;
     if (!CheckCharacters(str, errors)) return {};
     size_t pos = str.rfind('1');
-    if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) {
+    if (str.size() > limit) return {};
+    if (pos == str.npos || pos == 0 || pos + 7 > str.size()) {
         return {};
     }
     data values(str.size() - 1 - pos);
@@ -397,12 +398,12 @@ DecodeResult Decode(const std::string& str) {
 }
 
 /** Find index of an incorrect character in a Bech32 string. */
-std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
+std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) {
     std::vector<int> error_locations{};
 
-    if (str.size() > 90) {
-        error_locations.resize(str.size() - 90);
-        std::iota(error_locations.begin(), error_locations.end(), 90);
+    if (str.size() > limit) {
+        error_locations.resize(str.size() - limit);
+        std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit));
         return std::make_pair("Bech32 string too long", std::move(error_locations));
     }
 

diff --git a/src/bech32.h b/src/bech32.h
@@ -28,6 +28,14 @@ enum class Encoding {
     BECH32M, //!< Bech32m encoding as defined in BIP350
 };
 
+/** Character limits for bech32(m) encoded strings. Character limits are how we provide error location guarantees.
+ *  These values should never exceed 2^31 - 1 (max value for a 32-bit int), since there are places where we may need to
+ *  convert the CharLimit::VALUE to an int. In practice, this should never happen since this CharLimit applies to an address encoding
+ *  and we would never encode an address with such a massive value */
+enum CharLimit : size_t {
+    SEGWIT = 90,            //!< BIP173/350 imposed 90 character limit on Bech32(m) encoded addresses. This guarantees finding up to 4 errors
+};
+
 /** Encode a Bech32 or Bech32m string. If hrp contains uppercase characters, this will cause an
  *  assertion error. Encoding must be one of BECH32 or BECH32M. */
 std::string Encode(Encoding encoding, const std::string& hrp, const std::vector<uint8_t>& values);
@@ -43,10 +51,10 @@ struct DecodeResult
 };
 
 /** Decode a Bech32 or Bech32m string. */
-DecodeResult Decode(const std::string& str);
+DecodeResult Decode(const std::string& str, CharLimit limit = CharLimit::SEGWIT);
 
 /** Return the positions of errors in a Bech32 string. */
-std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str);
+std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit = CharLimit::SEGWIT);
 
 } // namespace bech32