From 64d109e3539ad600f58536d3ecabd2f87b67fd1c Mon Sep 17 00:00:00 2001 From: wycats Date: Sun, 16 May 2010 10:25:55 +0400 Subject: [PATCH] Significantly improved internal encoding heuristics and support. * Default Encoding.default_internal to UTF-8 * Eliminated the use of file-wide magic comments to coerce code evaluated inside the file * Read templates as BINARY, use default_external or template-wide magic comments inside the Template to set the initial encoding * This means that template handlers in Ruby 1.9 will receive Strings encoded in default_internal (UTF-8 by default) * Create a better Exception for encoding issues, and use it when the template source has bytes that are not compatible with the specified encoding * Allow template handlers to opt-into handling BINARY. If they do so, they need to do some of their own manual encoding work * Added a "Configuration Gotchas" section to the intro Rails Guide instructing users to use UTF-8 for everything * Use config.encoding= in Ruby 1.8, and raise if a value that is an invalid $KCODE value is used Also: * Fixed a few tests that were assert() rather than assert_equal() and were caught by Minitest requiring a String for the message * Fixed a test where an assert_select was misformed, also caught by Minitest being more restrictive * Fixed a test where a Rack response was returning a String rather than an Enumerable --- actionpack/lib/action_view.rb | 6 +- actionpack/lib/action_view/template.rb | 201 +++++++++++++++--- actionpack/lib/action_view/template/error.rb | 18 ++ .../lib/action_view/template/handlers/erb.rb | 45 +++- .../lib/action_view/template/resolver.rb | 5 +- actionpack/test/abstract_unit.rb | 4 + .../test/controller/assert_select_test.rb | 4 +- actionpack/test/controller/capture_test.rb | 2 +- actionpack/test/controller/render_test.rb | 4 +- actionpack/test/fixtures/test/content_for.erb | 3 +- .../test/content_for_concatenated.erb | 2 +- .../test/content_for_with_parameter.erb | 2 +- .../test/non_erb_block_content_for.builder | 2 +- actionpack/test/template/render_test.rb | 10 +- actionpack/test/template/template_test.rb | 128 +++++++++++ .../guides/source/getting_started.textile | 21 ++ railties/lib/rails.rb | 1 + .../lib/rails/application/configuration.rb | 10 +- .../test/application/configuration_test.rb | 3 +- 19 files changed, 417 insertions(+), 54 deletions(-) create mode 100644 actionpack/test/template/template_test.rb diff --git a/actionpack/lib/action_view.rb b/actionpack/lib/action_view.rb index 5e3b2ec51b..9f56cca869 100644 --- a/actionpack/lib/action_view.rb +++ b/actionpack/lib/action_view.rb @@ -51,7 +51,9 @@ module ActionView autoload :MissingTemplate, 'action_view/template/error' autoload :ActionViewError, 'action_view/template/error' - autoload :TemplateError, 'action_view/template/error' + autoload :EncodingError, 'action_view/template/error' + autoload :TemplateError, 'action_view/template/error' + autoload :WrongEncodingError, 'action_view/template/error' autoload :TemplateHandler, 'action_view/template' autoload :TemplateHandlers, 'action_view/template' @@ -59,7 +61,7 @@ module ActionView autoload :TestCase, 'action_view/test_case' - ENCODING_FLAG = "#.*coding[:=]\s*(\S+)[ \t]*" + ENCODING_FLAG = '#.*coding[:=]\s*(\S+)[ \t]*' end require 'active_support/i18n' diff --git a/actionpack/lib/action_view/template.rb b/actionpack/lib/action_view/template.rb index ce249e2a96..5d8ac6b115 100644 --- a/actionpack/lib/action_view/template.rb +++ b/actionpack/lib/action_view/template.rb @@ -1,12 +1,89 @@ -# encoding: utf-8 -# This is so that templates compiled in this file are UTF-8 require 'active_support/core_ext/array/wrap' require 'active_support/core_ext/object/blank' +require 'active_support/core_ext/kernel/singleton_class' module ActionView class Template extend ActiveSupport::Autoload + # === Encodings in ActionView::Template + # + # ActionView::Template is one of a few sources of potential + # encoding issues in Rails. This is because the source for + # templates are usually read from disk, and Ruby (like most + # encoding-aware programming languages) assumes that the + # String retrieved through File IO is encoded in the + # default_external encoding. In Rails, the default + # default_external encoding is UTF-8. + # + # As a result, if a user saves their template as ISO-8859-1 + # (for instance, using a non-Unicode-aware text editor), + # and uses characters outside of the ASCII range, their + # users will see diamonds with question marks in them in + # the browser. + # + # To mitigate this problem, we use a few strategies: + # 1. If the source is not valid UTF-8, we raise an exception + # when the template is compiled to alert the user + # to the problem. + # 2. The user can specify the encoding using Ruby-style + # encoding comments in any template engine. If such + # a comment is supplied, Rails will apply that encoding + # to the resulting compiled source returned by the + # template handler. + # 3. In all cases, we transcode the resulting String to + # the default_internal encoding (which defaults + # to UTF-8). + # + # This means that other parts of Rails can always assume + # that templates are encoded in UTF-8, even if the original + # source of the template was not UTF-8. + # + # From a user's perspective, the easiest thing to do is + # to save your templates as UTF-8. If you do this, you + # do not need to do anything else for things to "just work". + # + # === Instructions for template handlers + # + # The easiest thing for you to do is to simply ignore + # encodings. Rails will hand you the template source + # as the default_internal (generally UTF-8), raising + # an exception for the user before sending the template + # to you if it could not determine the original encoding. + # + # For the greatest simplicity, you can support only + # UTF-8 as the default_internal. This means + # that from the perspective of your handler, the + # entire pipeline is just UTF-8. + # + # === Advanced: Handlers with alternate metadata sources + # + # If you want to provide an alternate mechanism for + # specifying encodings (like ERB does via <%# encoding: ... %>), + # you may indicate that you are willing to accept + # BINARY data by implementing self.accepts_binary? + # on your handler. + # + # If you do, Rails will not raise an exception if + # the template's encoding could not be determined, + # assuming that you have another mechanism for + # making the determination. + # + # In this case, make sure you return a String from + # your handler encoded in the default_internal. Since + # you are handling out-of-band metadata, you are + # also responsible for alerting the user to any + # problems with converting the user's data to + # the default_internal. + # + # To do so, simply raise the raise WrongEncodingError + # as follows: + # + # raise WrongEncodingError.new( + # problematic_string, + # expected_encoding + # ) + eager_autoload do autoload :Error autoload :Handler @@ -16,26 +93,22 @@ class Template extend Template::Handlers - attr_reader :source, :identifier, :handler, :virtual_path, :formats + attr_reader :source, :identifier, :handler, :virtual_path, :formats, + :original_encoding - Finalizer = proc do |method_name| + Finalizer = proc do |method_name, mod| proc do - ActionView::CompiledTemplates.module_eval do + mod.module_eval do remove_possible_method method_name end end end def initialize(source, identifier, handler, details) - if source.encoding_aware? && source =~ %r{\A#{ENCODING_FLAG}} - # don't snip off the \n to preserve line numbers - source.sub!(/\A[^\n]*/, '') - source.force_encoding($1).encode - end - - @source = source - @identifier = identifier - @handler = handler + @source = source + @identifier = identifier + @handler = handler + @original_encoding = nil @virtual_path = details[:virtual_path] @method_names = {} @@ -48,7 +121,13 @@ def render(view, locals, &block) # Notice that we use a bang in this instrumentation because you don't want to # consume this in production. This is only slow if it's being listened to. ActiveSupport::Notifications.instrument("!render_template.action_view", :virtual_path => @virtual_path) do - method_name = compile(locals, view) + if view.is_a?(ActionView::CompiledTemplates) + mod = ActionView::CompiledTemplates + else + mod = view.singleton_class + end + + method_name = compile(locals, view, mod) view.send(method_name, locals, &block) end rescue Exception => e @@ -56,7 +135,7 @@ def render(view, locals, &block) e.sub_template_of(self) raise e else - raise Template::Error.new(self, view.assigns, e) + raise Template::Error.new(self, view.respond_to?(:assigns) ? view.assigns : {}, e) end end @@ -81,37 +160,97 @@ def inspect end private - def compile(locals, view) + # Among other things, this method is responsible for properly setting + # the encoding of the source. Until this point, we assume that the + # source is BINARY data. If no additional information is supplied, + # we assume the encoding is the same as Encoding.default_external. + # + # The user can also specify the encoding via a comment on the first + # line of the template (# encoding: NAME-OF-ENCODING). This will work + # with any template engine, as we process out the encoding comment + # before passing the source on to the template engine, leaving a + # blank line in its stead. + # + # Note that after we figure out the correct encoding, we then + # encode the source into Encoding.default_internal. In general, + # this means that templates will be UTF-8 inside of Rails, + # regardless of the original source encoding. + def compile(locals, view, mod) method_name = build_method_name(locals) return method_name if view.respond_to?(method_name) locals_code = locals.keys.map! { |key| "#{key} = local_assigns[:#{key}];" }.join - code = @handler.call(self) - if code.sub!(/\A(#.*coding.*)\n/, '') - encoding_comment = $1 - elsif defined?(Encoding) && Encoding.respond_to?(:default_external) - encoding_comment = "#coding:#{Encoding.default_external}" + if source.encoding_aware? + if source.sub!(/\A#{ENCODING_FLAG}/, '') + encoding = $1 + else + encoding = Encoding.default_external + end + + # Tag the source with the default external encoding + # or the encoding specified in the file + source.force_encoding(encoding) + + # If the original encoding is BINARY, the actual + # encoding is either stored out-of-band (such as + # in ERB <%# %> style magic comments) or missing. + # This is also true if the original encoding is + # something other than BINARY, but it's invalid. + if source.encoding != Encoding::BINARY && source.valid_encoding? + source.encode! + # If the assumed encoding is incorrect, check to + # see whether the handler accepts BINARY. If it + # does, it has another mechanism for determining + # the true encoding of the String. + elsif @handler.respond_to?(:accepts_binary?) && @handler.accepts_binary? + source.force_encoding(Encoding::BINARY) + # If the handler does not accept BINARY, the + # assumed encoding (either the default_external, + # or the explicit encoding specified by the user) + # is incorrect. We raise an exception here. + else + raise WrongEncodingError.new(source, encoding) + end + + # Don't validate the encoding yet -- the handler + # may treat the String as raw bytes and extract + # the encoding some other way end + code = @handler.call(self) + source = <<-end_src def #{method_name}(local_assigns) - _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = output_buffer;#{locals_code};#{code} + _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = @output_buffer;#{locals_code};#{code} ensure - @_virtual_path, self.output_buffer = _old_virtual_path, _old_output_buffer + @_virtual_path, @output_buffer = _old_virtual_path, _old_output_buffer end end_src - if encoding_comment - source = "#{encoding_comment}\n#{source}" - line = -1 - else - line = 0 + if source.encoding_aware? + # Handlers should return their source Strings in either the + # default_internal or BINARY. If the handler returns a BINARY + # String, we assume its encoding is the one we determined + # earlier, and encode the resulting source in the default_internal. + if source.encoding == Encoding::BINARY + source.force_encoding(Encoding.default_internal) + end + + # In case we get back a String from a handler that is not in + # BINARY or the default_internal, encode it to the default_internal + source.encode! + + # Now, validate that the source we got back from the template + # handler is valid in the default_internal + unless source.valid_encoding? + raise WrongEncodingError.new(@source, Encoding.default_internal) + end end begin - ActionView::CompiledTemplates.module_eval(source, identifier, line) - ObjectSpace.define_finalizer(self, Finalizer[method_name]) + mod.module_eval(source, identifier, 0) + ObjectSpace.define_finalizer(self, Finalizer[method_name, mod]) method_name rescue Exception => e # errors from template code diff --git a/actionpack/lib/action_view/template/error.rb b/actionpack/lib/action_view/template/error.rb index 6866eabf77..d3a53d2147 100644 --- a/actionpack/lib/action_view/template/error.rb +++ b/actionpack/lib/action_view/template/error.rb @@ -4,6 +4,24 @@ module ActionView class ActionViewError < StandardError #:nodoc: end + class EncodingError < StandardError #:nodoc: + end + + class WrongEncodingError < EncodingError #:nodoc: + def initialize(string, encoding) + @string, @encoding = string, encoding + end + + def message + "Your template was not saved as valid #{@encoding}. Please " \ + "either specify #{@encoding} as the encoding for your template " \ + "in your text editor, or mark the template with its " \ + "encoding by inserting the following as the first line " \ + "of the template:\n\n# encoding: .\n\n" \ + "The source of your template was:\n\n#{@string}" + end + end + class MissingTemplate < ActionViewError #:nodoc: attr_reader :path diff --git a/actionpack/lib/action_view/template/handlers/erb.rb b/actionpack/lib/action_view/template/handlers/erb.rb index 17652d6d1f..bbf012ab15 100644 --- a/actionpack/lib/action_view/template/handlers/erb.rb +++ b/actionpack/lib/action_view/template/handlers/erb.rb @@ -5,6 +5,11 @@ module ActionView class OutputBuffer < ActiveSupport::SafeBuffer + def initialize(*) + super + encode! + end + def <<(value) super(value.to_s) end @@ -72,16 +77,50 @@ class ERB < Handler cattr_accessor :erb_implementation self.erb_implementation = Erubis - ENCODING_TAG = Regexp.new("\A(<%#{ENCODING_FLAG}-?%>)[ \t]*") + ENCODING_TAG = Regexp.new("\\A(<%#{ENCODING_FLAG}-?%>)[ \\t]*") + + def self.accepts_binary? + true + end def compile(template) - erb = template.source.gsub(ENCODING_TAG, '') + if template.source.encoding_aware? + # Even though Rails has given us a String tagged with the + # default_internal encoding (likely UTF-8), it is possible + # that the String is actually encoded using a different + # encoding, specified via an ERB magic comment. If the + # String is not actually UTF-8, the regular expression + # engine will (correctly) raise an exception. For now, + # we'll reset the String to BINARY so we can run regular + # expressions against it + template_source = template.source.dup.force_encoding("BINARY") + + # Erubis does not have direct support for encodings. + # As a result, we will extract the ERB-style magic + # comment, give the String to Erubis as BINARY data, + # and then tag the resulting String with the extracted + # encoding later + erb = template_source.gsub(ENCODING_TAG, '') + encoding = $2 + + if !encoding && (template.source.encoding == Encoding::BINARY) + raise WrongEncodingError.new(template_source, Encoding.default_external) + end + end + result = self.class.erb_implementation.new( erb, :trim => (self.class.erb_trim_mode == "-") ).src - result = "#{$2}\n#{result}" if $2 + # If an encoding tag was found, tag the String + # we're returning with that encoding. Otherwise, + # return a BINARY String, which is what ERB + # returns. Note that if a magic comment was + # not specified, we will return the data to + # Rails as BINARY, which will then use its + # own encoding logic to create a UTF-8 String. + result = "\n#{result}".force_encoding(encoding).encode if encoding result end end diff --git a/actionpack/lib/action_view/template/resolver.rb b/actionpack/lib/action_view/template/resolver.rb index a223b3a55f..ef44925951 100644 --- a/actionpack/lib/action_view/template/resolver.rb +++ b/actionpack/lib/action_view/template/resolver.rb @@ -70,7 +70,10 @@ def query(path, exts, formats) Dir[query].reject { |p| File.directory?(p) }.map do |p| handler, format = extract_handler_and_format(p, formats) - Template.new(File.read(p), File.expand_path(p), handler, + + contents = File.open(p, "rb") {|io| io.read } + + Template.new(contents, File.expand_path(p), handler, :virtual_path => path, :format => format) end end diff --git a/actionpack/test/abstract_unit.rb b/actionpack/test/abstract_unit.rb index 89ba0990f1..479e62b23d 100644 --- a/actionpack/test/abstract_unit.rb +++ b/actionpack/test/abstract_unit.rb @@ -12,6 +12,10 @@ ENV['TMPDIR'] = File.join(File.dirname(__FILE__), 'tmp') +if defined?(Encoding.default_internal) + Encoding.default_internal = "UTF-8" +end + require 'test/unit' require 'abstract_controller' require 'action_controller' diff --git a/actionpack/test/controller/assert_select_test.rb b/actionpack/test/controller/assert_select_test.rb index 7012c4c9b0..4f8ad23174 100644 --- a/actionpack/test/controller/assert_select_test.rb +++ b/actionpack/test/controller/assert_select_test.rb @@ -212,12 +212,12 @@ def test_assert_select_text_match assert_nothing_raised { assert_select "div", "bar" } assert_nothing_raised { assert_select "div", /\w*/ } assert_nothing_raised { assert_select "div", :text => /\w*/, :count=>2 } - assert_raise(Assertion) { assert_select "div", :text=>"foo", :count=>2 } + assert_raise(Assertion) { assert_select "div", :text=>"foo", :count=>2 } assert_nothing_raised { assert_select "div", :html=>"bar" } assert_nothing_raised { assert_select "div", :html=>"bar" } assert_nothing_raised { assert_select "div", :html=>/\w*/ } assert_nothing_raised { assert_select "div", :html=>/\w*/, :count=>2 } - assert_raise(Assertion) { assert_select "div", :html=>"foo", :count=>2 } + assert_raise(Assertion) { assert_select "div", :html=>"foo", :count=>2 } end end diff --git a/actionpack/test/controller/capture_test.rb b/actionpack/test/controller/capture_test.rb index d1dbd535c4..47253f22b8 100644 --- a/actionpack/test/controller/capture_test.rb +++ b/actionpack/test/controller/capture_test.rb @@ -68,6 +68,6 @@ def test_proper_block_detection private def expected_content_for_output - "Putting stuff in the title!\n\nGreat stuff!" + "Putting stuff in the title!\nGreat stuff!" end end diff --git a/actionpack/test/controller/render_test.rb b/actionpack/test/controller/render_test.rb index 52049f2a8a..2b1f2a27df 100644 --- a/actionpack/test/controller/render_test.rb +++ b/actionpack/test/controller/render_test.rb @@ -1079,7 +1079,7 @@ def test_rendering_with_conflicting_local_vars def test_action_talk_to_layout get :action_talk_to_layout - assert_equal "Talking to the layout\n\nAction was here!", @response.body + assert_equal "Talking to the layout\nAction was here!", @response.body end # :addressed: @@ -1096,7 +1096,7 @@ def test_template_with_locals def test_yield_content_for assert_not_deprecated { get :yield_content_for } - assert_equal "Putting stuff in the title!\n\nGreat stuff!\n", @response.body + assert_equal "Putting stuff in the title!\nGreat stuff!\n", @response.body end def test_overwritting_rendering_relative_file_with_extension diff --git a/actionpack/test/fixtures/test/content_for.erb b/actionpack/test/fixtures/test/content_for.erb index 0e47ca8c3d..1fb829f54c 100644 --- a/actionpack/test/fixtures/test/content_for.erb +++ b/actionpack/test/fixtures/test/content_for.erb @@ -1,2 +1 @@ -<% content_for :title do %>Putting stuff in the title!<% end %> -Great stuff! \ No newline at end of file +<% content_for :title do -%>Putting stuff in the title!<% end -%>Great stuff! \ No newline at end of file diff --git a/actionpack/test/fixtures/test/content_for_concatenated.erb b/actionpack/test/fixtures/test/content_for_concatenated.erb index fb6b4b05d7..e65f629574 100644 --- a/actionpack/test/fixtures/test/content_for_concatenated.erb +++ b/actionpack/test/fixtures/test/content_for_concatenated.erb @@ -1,3 +1,3 @@ <% content_for :title, "Putting stuff " - content_for :title, "in the title!" %> + content_for :title, "in the title!" -%> Great stuff! \ No newline at end of file diff --git a/actionpack/test/fixtures/test/content_for_with_parameter.erb b/actionpack/test/fixtures/test/content_for_with_parameter.erb index 57aecbac05..aeb6f73ce0 100644 --- a/actionpack/test/fixtures/test/content_for_with_parameter.erb +++ b/actionpack/test/fixtures/test/content_for_with_parameter.erb @@ -1,2 +1,2 @@ -<% content_for :title, "Putting stuff in the title!" %> +<% content_for :title, "Putting stuff in the title!" -%> Great stuff! \ No newline at end of file diff --git a/actionpack/test/fixtures/test/non_erb_block_content_for.builder b/actionpack/test/fixtures/test/non_erb_block_content_for.builder index a94643561c..d539a425a4 100644 --- a/actionpack/test/fixtures/test/non_erb_block_content_for.builder +++ b/actionpack/test/fixtures/test/non_erb_block_content_for.builder @@ -1,4 +1,4 @@ content_for :title do 'Putting stuff in the title!' end -xml << "\nGreat stuff!" +xml << "Great stuff!" diff --git a/actionpack/test/template/render_test.rb b/actionpack/test/template/render_test.rb index d0212024ae..aca96e0a24 100644 --- a/actionpack/test/template/render_test.rb +++ b/actionpack/test/template/render_test.rb @@ -232,13 +232,13 @@ def test_render_with_layout # TODO: Move to deprecated_tests.rb def test_render_with_nested_layout_deprecated assert_deprecated do - assert_equal %(title\n\n\n
column
\n
content
\n), + assert_equal %(title\n\n
column
\n
content
\n), @view.render(:file => "test/deprecated_nested_layout.erb", :layout => "layouts/yield") end end def test_render_with_nested_layout - assert_equal %(title\n\n\n
column
\n
content
\n), + assert_equal %(title\n\n
column
\n
content
\n), @view.render(:file => "test/nested_layout.erb", :layout => "layouts/yield") end @@ -284,7 +284,7 @@ def test_render_utf8_template_with_magic_comment with_external_encoding Encoding::ASCII_8BIT do result = @view.render(:file => "test/utf8_magic.html.erb", :layouts => "layouts/yield") assert_equal Encoding::UTF_8, result.encoding - assert_equal "Русский текст\n\nUTF-8\nUTF-8\nUTF-8\n", result + assert_equal "\nРусский \nтекст\n\nUTF-8\nUTF-8\nUTF-8\n", result end end @@ -302,7 +302,7 @@ def test_render_utf8_template_with_incompatible_external_encoding result = @view.render(:file => "test/utf8.html.erb", :layouts => "layouts/yield") flunk 'Should have raised incompatible encoding error' rescue ActionView::Template::Error => error - assert_match 'invalid byte sequence in Shift_JIS', error.original_exception.message + assert_match 'Your template was not saved as valid Shift_JIS', error.original_exception.message end end end @@ -313,7 +313,7 @@ def test_render_utf8_template_with_partial_with_incompatible_encoding result = @view.render(:file => "test/utf8_magic_with_bare_partial.html.erb", :layouts => "layouts/yield") flunk 'Should have raised incompatible encoding error' rescue ActionView::Template::Error => error - assert_match 'invalid byte sequence in Shift_JIS', error.original_exception.message + assert_match 'Your template was not saved as valid Shift_JIS', error.original_exception.message end end end diff --git a/actionpack/test/template/template_test.rb b/actionpack/test/template/template_test.rb new file mode 100644 index 0000000000..c4a65d84fc --- /dev/null +++ b/actionpack/test/template/template_test.rb @@ -0,0 +1,128 @@ +require "abstract_unit" + +# These are the normal settings that will be set up by Railties +# TODO: Have these tests support other combinations of these values +Encoding.default_internal = "UTF-8" +Encoding.default_external = "UTF-8" + +class TestERBTemplate < ActiveSupport::TestCase + ERBHandler = ActionView::Template::Handlers::ERB + + class Context + def initialize + @output_buffer = "original" + end + + def hello + "Hello" + end + + def partial + ActionView::Template.new( + "<%= @_virtual_path %>", + "partial", + ERBHandler, + :virtual_path => "partial" + ) + end + + def logger + require "logger" + Logger.new(STDERR) + end + + def my_buffer + @output_buffer + end + end + + def new_template(body = "<%= hello %>", handler = ERBHandler, details = {}) + ActionView::Template.new(body, "hello template", ERBHandler, {:virtual_path => "hello"}) + end + + def render(locals = {}) + @template.render(@obj, locals) + end + + def setup + @obj = Context.new + end + + def test_basic_template + @template = new_template + assert_equal "Hello", render + end + + def test_locals + @template = new_template("<%= my_local %>") + assert_equal "I'm a local", render(:my_local => "I'm a local") + end + + def test_restores_buffer + @template = new_template + assert_equal "Hello", render + assert_equal "original", @obj.my_buffer + end + + def test_virtual_path + @template = new_template("<%= @_virtual_path %>" \ + "<%= partial.render(self, {}) %>" \ + "<%= @_virtual_path %>") + assert_equal "hellopartialhello", render + end + + if "ruby".encoding_aware? + def test_resulting_string_is_utf8 + @template = new_template + assert_equal Encoding::UTF_8, render.encoding + end + + def test_no_magic_comment_word_with_utf_8 + @template = new_template("hello \u{fc}mlat") + assert_equal Encoding::UTF_8, render.encoding + assert_equal "hello \u{fc}mlat", render + end + + # This test ensures that if the default_external + # is set to something other than UTF-8, we don't + # get any errors and get back a UTF-8 String. + def test_default_external_works + Encoding.default_external = "ISO-8859-1" + @template = new_template("hello \xFCmlat") + assert_equal Encoding::UTF_8, render.encoding + assert_equal "hello \u{fc}mlat", render + ensure + Encoding.default_external = "UTF-8" + end + + def test_encoding_can_be_specified_with_magic_comment + @template = new_template("# encoding: ISO-8859-1\nhello \xFCmlat") + assert_equal Encoding::UTF_8, render.encoding + assert_equal "\nhello \u{fc}mlat", render + end + + # TODO: This is currently handled inside ERB. The case of explicitly + # lying about encodings via the normal Rails API should be handled + # inside Rails. + def test_lying_with_magic_comment + assert_raises(ActionView::Template::Error) do + @template = new_template("# encoding: UTF-8\nhello \xFCmlat") + render + end + end + + def test_encoding_can_be_specified_with_magic_comment_in_erb + @template = new_template("<%# encoding: ISO-8859-1 %>hello \xFCmlat") + result = render + assert_equal Encoding::UTF_8, render.encoding + assert_equal "hello \u{fc}mlat", render + end + + def test_error_when_template_isnt_valid_utf8 + assert_raises(ActionView::Template::Error, /\xFC/) do + @template = new_template("hello \xFCmlat") + render + end + end + end +end \ No newline at end of file diff --git a/railties/guides/source/getting_started.textile b/railties/guides/source/getting_started.textile index 5da7ff7daa..b7301bff20 100644 --- a/railties/guides/source/getting_started.textile +++ b/railties/guides/source/getting_started.textile @@ -1462,11 +1462,32 @@ Rails also comes with built-in help that you can generate using the rake command * Running +rake doc:guides+ will put a full copy of the Rails Guides in the +doc/guides+ folder of your application. Open +doc/guides/index.html+ in your web browser to explore the Guides. * Running +rake doc:rails+ will put a full copy of the API documentation for Rails in the +doc/api+ folder of your application. Open +doc/api/index.html+ in your web browser to explore the API documentation. +h3. Configuration Gotchas + +The easiest way to work with Rails is to store all external data as UTF-8. If you don't, Ruby libraries and Rails will often be able to convert your native data into UTF-8, but this doesn't always work reliably, so you're better off ensuring that all external data is UTF-8. + +If you have made a mistake in this area, the most common symptom is a black diamond with a question mark inside appearing in the browser. Another common symptom is characters like "ü" appearing instead of "ü". Rails takes a number of internal steps to mitigate common causes of these problems that can be automatically detected and corrected. However, if you have external data that is not stored as UTF-8, it can occasionally result in these kinds of issues that cannot be automatically detected by Rails and corrected. + +Two very common sources of data that are not UTF-8: +* Your text editor: Most text editors (such as Textmate), default to saving files as + UTF-8. If your text editor does not, this can result in special characters that you + enter in your templates (such as é) to appear as a diamond with a question mark inside + in the browser. This also applies to your I18N translation files. + Most editors that do not already default to UTF-8 (such as some versions of + Dreamweaver) offer a way to change the default to UTF-8. Do so. +* Your database. Rails defaults to converting data from your database into UTF-8 at + the boundary. However, if your database is not using UTF-8 internally, it may not + be able to store all characters that your users enter. For instance, if your database + is using Latin-1 internally, and your user enters a Russian, Hebrew, or Japanese + character, the data will be lost forever once it enters the database. If possible, + use UTF-8 as the internal storage of your database. h3. Changelog "Lighthouse ticket":http://rails.lighthouseapp.com/projects/16213-rails-guides/tickets/2 +* May 16, 2010: Added a section on configuration gotchas to address common encoding + problems that people might have * April 30, 2010: Fixes, editing and updating of code samples by "Rohit Arondekar":http://rohitarondekar.com * April 25, 2010: Couple of more minor fixups "Mikel Lindsaar":credits:html#raasdnil * April 1, 2010: Fixed document to validate XHTML 1.0 Strict. "Jaime Iniesta":http://jaimeiniesta.com diff --git a/railties/lib/rails.rb b/railties/lib/rails.rb index 0611b2a9f5..be486ef2ac 100644 --- a/railties/lib/rails.rb +++ b/railties/lib/rails.rb @@ -23,6 +23,7 @@ $KCODE='u' else Encoding.default_external = Encoding::UTF_8 + Encoding.default_internal = Encoding::UTF_8 end module Rails diff --git a/railties/lib/rails/application/configuration.rb b/railties/lib/rails/application/configuration.rb index 9353fbefef..8afe423973 100644 --- a/railties/lib/rails/application/configuration.rb +++ b/railties/lib/rails/application/configuration.rb @@ -1,4 +1,5 @@ require 'active_support/deprecation' +require 'active_support/core_ext/string/encoding' require 'rails/engine/configuration' module Rails @@ -27,8 +28,15 @@ def initialize(*) def encoding=(value) @encoding = value - if defined?(Encoding) && Encoding.respond_to?(:default_external=) + if "ruby".encoding_aware? Encoding.default_external = value + Encoding.default_internal = value + else + $KCODE = value + if $KCODE == "NONE" + raise "The value you specified for config.encoding is " \ + "invalid. The possible values are UTF8, SJIS, or EUC" + end end end diff --git a/railties/test/application/configuration_test.rb b/railties/test/application/configuration_test.rb index dfc4e2359b..c08bd2ef22 100644 --- a/railties/test/application/configuration_test.rb +++ b/railties/test/application/configuration_test.rb @@ -180,7 +180,8 @@ def teardown require "#{app_path}/config/application" unless RUBY_VERSION < '1.9' - assert_equal Encoding.find("utf-8"), Encoding.default_external + assert_equal Encoding::UTF_8, Encoding.default_external + assert_equal Encoding::UTF_8, Encoding.default_internal end end -- GitLab