Make String#utf8_encode more robust (fixes #2176)
This commit is contained in:
parent
be11e9a1d5
commit
a93f81e20b
2 changed files with 19 additions and 5 deletions
|
@ -474,10 +474,15 @@ class String
|
|||
end
|
||||
|
||||
def utf8_encode!(**options)
|
||||
return self if (encoding == Encoding::UTF_8) && valid_encoding?
|
||||
return force_encoding('utf-8') if dup.force_encoding('utf-8').valid_encoding?
|
||||
|
||||
input_encoding = viable_encodings(try_first: options[:from]).first
|
||||
return encode!('utf-8', input_encoding) if input_encoding.present?
|
||||
viable_encodings(try_first: options[:from]).each do |e|
|
||||
begin
|
||||
return encode!('utf-8', e)
|
||||
rescue Encoding::UndefinedConversionError
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
case options[:fallback]
|
||||
when :output_to_binary
|
||||
|
@ -501,6 +506,7 @@ class String
|
|||
[provided, original, detected]
|
||||
.compact
|
||||
.reject { |e| Encoding.find(e) == Encoding::ASCII_8BIT }
|
||||
.reject { |e| Encoding.find(e) == Encoding::UTF_8 }
|
||||
.select { |e| force_encoding(e).valid_encoding? }
|
||||
.tap { force_encoding(original) } # clean up changes from previous line
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ require 'rails_helper'
|
|||
|
||||
RSpec.describe String do
|
||||
describe '#utf8_encode' do
|
||||
context 'for valid, UTF-8-encoded strings' do
|
||||
context 'on valid, UTF-8-encoded strings' do
|
||||
let(:subject) { 'hello' }
|
||||
|
||||
it 'returns an identical copy' do
|
||||
|
@ -10,9 +10,17 @@ RSpec.describe String do
|
|||
expect(subject.utf8_encode.encoding).to be(subject.encoding)
|
||||
expect(subject.utf8_encode).not_to be(subject)
|
||||
end
|
||||
|
||||
context 'which are incorrectly set to other, technically valid encodings' do
|
||||
let(:subject) { 'ö'.force_encoding('tis-620') }
|
||||
|
||||
it 'sets input encoding to UTF-8 instead of attempting conversion' do
|
||||
expect(subject.utf8_encode).to eq(subject.force_encoding('utf-8'))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'for strings in other encodings' do
|
||||
context 'on strings in other encodings' do
|
||||
let(:subject) { original_string.encode(input_encoding) }
|
||||
|
||||
context 'with no from: option' do
|
||||
|
|
Loading…
Reference in a new issue