提交 3fe7ca1d 编写于 作者: N Norman Clarke

Replace Unicode.u_unpack with String#codepoints

上级 51648a6f
......@@ -3,7 +3,6 @@
module ActiveSupport #:nodoc:
module Multibyte
autoload :EncodingError, 'active_support/multibyte/exceptions'
autoload :Chars, 'active_support/multibyte/chars'
autoload :Unicode, 'active_support/multibyte/unicode'
......
......@@ -153,7 +153,7 @@ def normalize(form = nil)
# 'é'.length # => 2
# 'é'.mb_chars.decompose.to_s.length # => 3
def decompose
chars(Unicode.decompose(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack('U*'))
end
# Performs composition on all the characters.
......@@ -162,7 +162,7 @@ def decompose
# 'é'.length # => 3
# 'é'.mb_chars.compose.to_s.length # => 2
def compose
chars(Unicode.compose(Unicode.u_unpack(@wrapped_string)).pack('U*'))
chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack('U*'))
end
# Returns the number of grapheme clusters in the string.
......
# encoding: utf-8
module ActiveSupport #:nodoc:
module Multibyte #:nodoc:
# Raised when a problem with the encoding was found.
class EncodingError < StandardError; end
end
end
\ No newline at end of file
......@@ -61,19 +61,6 @@ def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
# Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
# valid UTF-8.
#
# Example:
# Unicode.u_unpack('Café') # => [67, 97, 102, 233]
def u_unpack(string)
begin
string.unpack 'U*'
rescue ArgumentError
raise EncodingError, 'malformed UTF-8 character'
end
end
# Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
# character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
# <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
......@@ -89,7 +76,7 @@ def in_char_class?(codepoint, classes)
# Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]]
# Unicode.g_unpack('Café') # => [[67], [97], [102], [233]]
def g_unpack(string)
codepoints = u_unpack(string)
codepoints = string.codepoints.to_a
unpacked = []
pos = 0
marker = 0
......@@ -283,7 +270,7 @@ def tidy_bytes(string, force = false)
def normalize(string, form=nil)
form ||= @default_normalization_form
# See http://www.unicode.org/reports/tr15, Table 1
codepoints = u_unpack(string)
codepoints = string.codepoints.to_a
case form
when :d
reorder_characters(decompose(:canonical, codepoints))
......@@ -299,7 +286,7 @@ def normalize(string, form=nil)
end
def apply_mapping(string, mapping) #:nodoc:
u_unpack(string).map do |codepoint|
string.each_codepoint.map do |codepoint|
cp = database.codepoints[codepoint]
if cp and (ncp = cp.send(mapping)) and ncp > 0
ncp
......
......@@ -72,17 +72,6 @@ def test_consumes_utf8_strings
assert !@proxy_class.consumes?(BYTE_STRING)
end
def test_unpack_utf8_strings
assert_equal 4, ActiveSupport::Multibyte::Unicode.u_unpack(UNICODE_STRING).length
assert_equal 5, ActiveSupport::Multibyte::Unicode.u_unpack(ASCII_STRING).length
end
def test_unpack_raises_encoding_error_on_broken_strings
assert_raise(ActiveSupport::Multibyte::EncodingError) do
ActiveSupport::Multibyte::Unicode.u_unpack(BYTE_STRING)
end
end
def test_concatenation_should_return_a_proxy_class_instance
assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars + 'b').class
assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars << 'b').class
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册