Replace Unicode.u_unpack with String#codepoints

3fe7ca1d · Norman Clarke · 51648a6f · 3fe7ca1d · 3fe7ca1d · 51648a6f
5 changed file
--- a/activesupport/lib/active_support/multibyte.rb
+++ b/activesupport/lib/active_support/multibyte.rb
@@ -3,7 +3,6 @@

 module ActiveSupport #:nodoc:
  module Multibyte
-    autoload :EncodingError, 'active_support/multibyte/exceptions'
    autoload :Chars, 'active_support/multibyte/chars'
    autoload :Unicode, 'active_support/multibyte/unicode'


--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -153,7 +153,7 @@ def normalize(form = nil)
      #   'é'.length                         # => 2
      #   'é'.mb_chars.decompose.to_s.length # => 3
      def decompose
-        chars(Unicode.decompose(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
+        chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack('U*'))
      end

      # Performs composition on all the characters.
@@ -162,7 +162,7 @@ def decompose
      #   'é'.length                       # => 3
      #   'é'.mb_chars.compose.to_s.length # => 2
      def compose
-        chars(Unicode.compose(Unicode.u_unpack(@wrapped_string)).pack('U*'))
+        chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack('U*'))
      end

      # Returns the number of grapheme clusters in the string.

--- a/activesupport/lib/active_support/multibyte/exceptions.rb
+++ b/activesupport/lib/active_support/multibyte/exceptions.rb
-# encoding: utf-8
-
-module ActiveSupport #:nodoc:
-  module Multibyte #:nodoc:
-    # Raised when a problem with the encoding was found.
-    class EncodingError < StandardError; end
-  end
-end
\ No newline at end of file
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -61,19 +61,6 @@ def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
      TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
      LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u

-      # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
-      # valid UTF-8.
-      #
-      # Example:
-      #   Unicode.u_unpack('Café') # => [67, 97, 102, 233]
-      def u_unpack(string)
-        begin
-          string.unpack 'U*'
-        rescue ArgumentError
-          raise EncodingError, 'malformed UTF-8 character'
-        end
-      end
-
      # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
      # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
      # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
@@ -89,7 +76,7 @@ def in_char_class?(codepoint, classes)
      #   Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]]
      #   Unicode.g_unpack('Café') # => [[67], [97], [102], [233]]
      def g_unpack(string)
-        codepoints = u_unpack(string)
+        codepoints = string.codepoints.to_a
        unpacked = []
        pos = 0
        marker = 0
@@ -283,7 +270,7 @@ def tidy_bytes(string, force = false)
      def normalize(string, form=nil)
        form ||= @default_normalization_form
        # See http://www.unicode.org/reports/tr15, Table 1
-        codepoints = u_unpack(string)
+        codepoints = string.codepoints.to_a
        case form
          when :d
            reorder_characters(decompose(:canonical, codepoints))
@@ -299,7 +286,7 @@ def normalize(string, form=nil)
      end

      def apply_mapping(string, mapping) #:nodoc:
-        u_unpack(string).map do |codepoint|
+        string.each_codepoint.map do |codepoint|
          cp = database.codepoints[codepoint]
          if cp and (ncp = cp.send(mapping)) and ncp > 0
            ncp

--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -72,17 +72,6 @@ def test_consumes_utf8_strings
    assert !@proxy_class.consumes?(BYTE_STRING)
  end

-  def test_unpack_utf8_strings
-    assert_equal 4, ActiveSupport::Multibyte::Unicode.u_unpack(UNICODE_STRING).length
-    assert_equal 5, ActiveSupport::Multibyte::Unicode.u_unpack(ASCII_STRING).length
-  end
-
-  def test_unpack_raises_encoding_error_on_broken_strings
-    assert_raise(ActiveSupport::Multibyte::EncodingError) do
-      ActiveSupport::Multibyte::Unicode.u_unpack(BYTE_STRING)
-    end
-  end
-
  def test_concatenation_should_return_a_proxy_class_instance
    assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars + 'b').class
    assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars << 'b').class