diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb index 51b870de8cdeaeff372d0cff8dbeb982e3df3a5b..d372b0ab1ff00535bb81eb62003929f268d59369 100644 --- a/activesupport/lib/active_support/multibyte/chars.rb +++ b/activesupport/lib/active_support/multibyte/chars.rb @@ -665,6 +665,11 @@ def tidy_bytes(string) def translate_offset(byte_offset) #:nodoc: return nil if byte_offset.nil? return 0 if @wrapped_string == '' + + if @wrapped_string.respond_to?(:force_encoding) + @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT) + end + begin @wrapped_string[0...byte_offset].unpack('U*').length rescue ArgumentError => e diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb index fc7b5e50819d0490475c566d907ee801f51e9a6f..4ff74abf61cd90e47a0602ba5271318d09923ccf 100644 --- a/activesupport/test/multibyte_chars_test.rb +++ b/activesupport/test/multibyte_chars_test.rb @@ -100,15 +100,14 @@ class MultibyteCharsUTF8BehaviourTest < Test::Unit::TestCase def setup @chars = UNICODE_STRING.dup.mb_chars - # NEWLINE, SPACE, EM SPACE - @whitespace = "\n#{[32, 8195].pack('U*')}" - - # Ruby 1.9 doesn't recognize EM SPACE as whitespace! - if @whitespace.respond_to?(:force_encoding) - @whitespace.slice!(2) - @whitespace.force_encoding(Encoding::UTF_8) + if RUBY_VERSION < '1.9' + # Multibyte support all kinds of whitespace (ie. NEWLINE, SPACE, EM SPACE) + @whitespace = "\n\t#{[32, 8195].pack('U*')}" + else + # Ruby 1.9 only supports basic whitespace + @whitespace = "\n\t ".force_encoding(Encoding::UTF_8) end - + @byte_order_mark = [65279].pack('U') end @@ -497,28 +496,27 @@ def test_capitalize_should_be_unicode_aware end def test_limit_should_not_break_on_blank_strings - chars = ''.mb_chars - - assert_equal '', chars.limit(0) - assert_equal '', chars.limit(1) + example = chars('') + assert_equal example, example.limit(0) + assert_equal example, example.limit(1) end def test_limit_should_work_on_a_multibyte_string - chars = UNICODE_STRING.mb_chars + example = chars(UNICODE_STRING) + bytesize = UNICODE_STRING.respond_to?(:bytesize) ? UNICODE_STRING.bytesize : UNICODE_STRING.size - assert_equal UNICODE_STRING, chars.limit(UNICODE_STRING.length) - assert_equal '', chars.limit(0) - assert_equal '', chars.limit(1) - assert_equal 'こ', chars.limit(3) - assert_equal 'こに', chars.limit(6) - assert_equal 'こに', chars.limit(8) - assert_equal 'こにち', chars.limit(9) - assert_equal 'こにちわ', chars.limit(50) + assert_equal UNICODE_STRING, example.limit(bytesize) + assert_equal '', example.limit(0) + assert_equal '', example.limit(1) + assert_equal 'こ', example.limit(3) + assert_equal 'こに', example.limit(6) + assert_equal 'こに', example.limit(8) + assert_equal 'こにち', example.limit(9) + assert_equal 'こにちわ', example.limit(50) end def test_limit_should_work_on_an_ascii_string - ascii = ASCII_STRING.mb_chars - + ascii = chars(ASCII_STRING) assert_equal ASCII_STRING, ascii.limit(ASCII_STRING.length) assert_equal '', ascii.limit(0) assert_equal 'o', ascii.limit(1) @@ -528,12 +526,12 @@ def test_limit_should_work_on_an_ascii_string end def test_limit_should_keep_under_the_specified_byte_limit - chars = UNICODE_STRING.mb_chars + example = chars(UNICODE_STRING) (1..UNICODE_STRING.length).each do |limit| - assert chars.limit(limit).to_s.length <= limit + assert example.limit(limit).to_s.length <= limit end end - + def test_composition_exclusion_is_set_up_properly # Normalization of DEVANAGARI LETTER QA breaks when composition exclusion isn't used correctly qa = [0x915, 0x93c].pack('U*') @@ -647,9 +645,9 @@ def string_from_classes(classes) class MultibyteInternalsTest < ActiveSupport::TestCase include MultibyteTestHelpers - + test "Chars translates a character offset to a byte offset" do - chars = "Puisque c'était son erreur, il m'a aidé".mb_chars + example = chars("Puisque c'était son erreur, il m'a aidé") [ [0, 0], [3, 3], @@ -657,7 +655,7 @@ class MultibyteInternalsTest < ActiveSupport::TestCase [14, 13], [41, 39] ].each do |byte_offset, character_offset| - assert_equal character_offset, chars.send(:translate_offset, byte_offset), + assert_equal character_offset, example.send(:translate_offset, byte_offset), "Expected byte offset #{byte_offset} to translate to #{character_offset}" end end