diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb index ec6e9ccb5989a61c8905a40656e8cee182759a4e..ea7161a6bae62eca2d6a446c4c3f54d84f509cfd 100644 --- a/activesupport/lib/active_support/inflector/transliterate.rb +++ b/activesupport/lib/active_support/inflector/transliterate.rb @@ -5,8 +5,9 @@ module ActiveSupport module Inflector - # Replaces non-ASCII characters with an ASCII approximation, or if none - # exists, a replacement character which defaults to "?". + # Replaces non-ASCII characters in a UTF-8 encoded string with an ASCII + # approximation, or if none exists, a replacement character which + # defaults to "?". # # transliterate('Ærøskøbing') # # => "AEroskobing" @@ -56,8 +57,12 @@ module Inflector # # transliterate('Jürgen', locale: :de) # # => "Juergen" + # + # This method requires that `string` be UTF-8 encoded. Passing an argument + # with a different string encoding will raise an ArgumentError. def transliterate(string, replacement = "?", locale: nil) raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String) + raise ArgumentError, "Can only transliterate UTF-8 strings. Received string with encoding #{string.encoding}" unless string.encoding == ::Encoding::UTF_8 I18n.transliterate( ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc), diff --git a/activesupport/test/transliterate_test.rb b/activesupport/test/transliterate_test.rb index 9e29a93ea0d5ef8bf9746b922143e1d6be03632f..525b4a8559d874d2e509490026ac8e069d7e4d72 100644 --- a/activesupport/test/transliterate_test.rb +++ b/activesupport/test/transliterate_test.rb @@ -57,4 +57,12 @@ def test_transliterate_handles_unknown_object end assert_equal "Can only transliterate strings. Received Object", exception.message end + + def test_transliterate_handles_non_unicode_strings + ascii_8bit_string = "A".b + exception = assert_raises ArgumentError do + assert_equal "A", ActiveSupport::Inflector.transliterate(ascii_8bit_string) + end + assert_equal "Can only transliterate UTF-8 strings. Received string with encoding ASCII-8BIT", exception.message + end end