diff --git a/lib/gitlab/encode.rb b/lib/gitlab/encode.rb index 1c95a9477bd1d26d5521ffe2b1cfac10abe3f2a3..cee3ace202a83d0efc98f9932a5255b23b47b1ab 100644 --- a/lib/gitlab/encode.rb +++ b/lib/gitlab/encode.rb @@ -1,25 +1,29 @@ # Patch Strings to enable detect_encoding! on views require 'charlock_holmes/string' module Gitlab - module Encode + module Encode extend self def utf8 message + # return nil if message is nil return nil unless message - detect = CharlockHolmes::EncodingDetector.detect(message) rescue {} + # if message is utf-8 encoding, just return it + message.force_encoding("utf-8") + return message if message.valid_encoding? - # It's better to default to UTF-8 as sometimes it's wrongly detected as another charset - if detect[:encoding] && detect[:confidence] == 100 - CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8') - else - message - end.force_encoding("utf-8") + # if message is not utf-8 encoding, detect and convert it + detect = CharlockHolmes::EncodingDetector.detect(message) + if detect[:encoding] && detect[:confidence] > 60 + message.force_encoding(detect[:encoding]) + message.encode!("utf-8", detect[:encoding], :undef => :replace, :replace => "", :invalid => :replace) + end - # Prevent app from crash cause of - # encoding errors + message.valid_encoding? ? message : raise + + # Prevent app from crash cause of encoding errors rescue - "--broken encoding: #{encoding}" + "--broken encoding: #{detect[:encoding]}" end def detect_encoding message