From 16ee4b4d1b125bd3edb5c191d58c7afdf6d3232e Mon Sep 17 00:00:00 2001
From: wycats <wycats@gmail.com>
Date: Fri, 4 Jun 2010 11:50:34 -0700
Subject: [PATCH] Small optimization of 1.9 unescape. We should make sure that
 inbound ASCII always means UTF-8. It seems so based on a quick survey of
 common browsers, but let's be sure

---
 activesupport/lib/active_support/core_ext/uri.rb | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/activesupport/lib/active_support/core_ext/uri.rb b/activesupport/lib/active_support/core_ext/uri.rb
index 28eabd2111..b7fe0a6209 100644
--- a/activesupport/lib/active_support/core_ext/uri.rb
+++ b/activesupport/lib/active_support/core_ext/uri.rb
@@ -6,11 +6,15 @@
   str = "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E" # Ni-ho-nn-go in UTF-8, means Japanese.
 
   parser = URI::Parser.new
+
   unless str == parser.unescape(parser.escape(str))
     URI::Parser.class_eval do
       remove_method :unescape
-      def unescape(str, escaped = @regexp[:ESCAPED])
-        enc = (str.encoding == Encoding::US_ASCII) ? Encoding::UTF_8 : str.encoding
+      def unescape(str, escaped = /%[a-fA-F\d]{2}/)
+        # TODO: Are we actually sure that ASCII == UTF-8?
+        # YK: My initial experiments say yes, but let's be sure please
+        enc = str.encoding
+        enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
         str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(enc)
       end
     end
-- 
GitLab