2018-06-01 11:32:59 +00:00
|
|
|
require 'rails_helper'
|
|
|
|
|
|
|
|
RSpec.describe String do
|
|
|
|
describe '#utf8_encode' do
|
2018-08-09 04:05:09 +00:00
|
|
|
context 'on valid, UTF-8-encoded strings' do
|
2018-06-01 11:32:59 +00:00
|
|
|
let(:subject) { 'hello' }
|
|
|
|
|
|
|
|
it 'returns an identical copy' do
|
|
|
|
expect(subject.utf8_encode).to eq(subject)
|
|
|
|
expect(subject.utf8_encode.encoding).to be(subject.encoding)
|
|
|
|
expect(subject.utf8_encode).not_to be(subject)
|
|
|
|
end
|
2018-08-09 04:05:09 +00:00
|
|
|
|
|
|
|
context 'which are incorrectly set to other, technically valid encodings' do
|
|
|
|
let(:subject) { 'ö'.force_encoding('tis-620') }
|
|
|
|
|
|
|
|
it 'sets input encoding to UTF-8 instead of attempting conversion' do
|
|
|
|
expect(subject.utf8_encode).to eq(subject.force_encoding('utf-8'))
|
|
|
|
end
|
|
|
|
end
|
2018-06-01 11:32:59 +00:00
|
|
|
end
|
|
|
|
|
2018-08-09 04:05:09 +00:00
|
|
|
context 'on strings in other encodings' do
|
2018-06-01 11:32:59 +00:00
|
|
|
let(:subject) { original_string.encode(input_encoding) }
|
|
|
|
|
|
|
|
context 'with no from: option' do
|
|
|
|
let(:original_string) { 'Tschüss!' }
|
|
|
|
let(:input_encoding) { Encoding::ISO_8859_2 }
|
|
|
|
|
|
|
|
it 'detects the input encoding' do
|
|
|
|
expect(subject.utf8_encode).to eq(original_string)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'with a valid from: option' do
|
|
|
|
let(:original_string) { 'Tschüss!' }
|
|
|
|
let(:input_encoding) { Encoding::ISO_8859_2 }
|
|
|
|
|
|
|
|
it 'uses the specified input encoding' do
|
|
|
|
expect(subject.utf8_encode(from: 'iso-8859-2')).to eq(original_string)
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'uses any valid input encoding, even if not correct' do
|
|
|
|
expect(subject.utf8_encode(from: 'gb18030')).to eq('Tsch黶s!')
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'with an invalid from: option' do
|
|
|
|
let(:original_string) { '―陈志' }
|
|
|
|
let(:input_encoding) { Encoding::GB18030 }
|
|
|
|
|
|
|
|
it 'does not try it' do
|
|
|
|
expect { subject.encode('utf-8', 'gb2312') }
|
|
|
|
.to raise_error(Encoding::InvalidByteSequenceError)
|
|
|
|
|
|
|
|
expect { subject.utf8_encode(from: 'gb2312') }
|
|
|
|
.not_to raise_error(Encoding::InvalidByteSequenceError)
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'uses the detected input encoding instead' do
|
|
|
|
expect(subject.utf8_encode(from: 'gb2312')).to eq(original_string)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2018-11-26 19:47:35 +00:00
|
|
|
|
|
|
|
context 'perforamnce' do
|
|
|
|
let(:subject) { original_string.encode(input_encoding) }
|
|
|
|
|
|
|
|
context 'with utf8_encode in iso-8859-1' do
|
|
|
|
let(:original_string) { 'äöü0' * 999_999 }
|
|
|
|
let(:input_encoding) { Encoding::ISO_8859_1 }
|
|
|
|
|
|
|
|
it 'detects the input encoding' do
|
|
|
|
Timeout.timeout(1) do
|
|
|
|
expect(subject.utf8_encode(from: 'iso-8859-1')).to eq(original_string)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'with utf8_encode in utf-8' do
|
|
|
|
let(:original_string) { 'äöü0' * 999_999 }
|
|
|
|
let(:input_encoding) { Encoding::UTF_8 }
|
|
|
|
|
|
|
|
it 'detects the input encoding' do
|
|
|
|
Timeout.timeout(1) do
|
|
|
|
expect(subject.utf8_encode(from: 'utf-8')).to eq(original_string)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'with utf8_encode in iso-8859-1 and charset detection' do
|
|
|
|
let(:original_string) { 'äöü0' * 199_999 }
|
|
|
|
let(:input_encoding) { Encoding::ISO_8859_1 }
|
|
|
|
|
|
|
|
it 'detects the input encoding' do
|
2018-12-03 14:10:36 +00:00
|
|
|
Timeout.timeout(12) do
|
2018-11-26 19:47:35 +00:00
|
|
|
expect(subject.utf8_encode(from: 'utf-8')).to eq(original_string)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2018-06-01 11:32:59 +00:00
|
|
|
end
|
|
|
|
end
|