From bf42e989bb4ebf37e4e0665d70e3de6ab4ca1caa Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Sun, 1 Jun 2014 10:29:58 +0200 Subject: [PATCH] Because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis). Unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes. More details: http://pjambet.github.io/blog/emojis-and-mysql/ --- app/models/application_model.rb | 7 ++++++- lib/core_ext/string.rb | 15 +++++++++++++++ test/unit/ticket_test.rb | 4 +++- test/unit/twitter_test.rb | 6 ++++-- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/app/models/application_model.rb b/app/models/application_model.rb index 1fc31cf78..c79684762 100644 --- a/app/models/application_model.rb +++ b/app/models/application_model.rb @@ -863,9 +863,14 @@ check string/varchar size and cut them if needed current_length = attribute[1].to_s.length if limit < current_length puts "WARNING: cut string because of database length #{self.class.to_s}.#{attribute[0]}(#{limit} but is #{current_length}:#{attribute[1].to_s})" - self[attribute[0]] = attribute[1][ 0, limit ] + self[ attribute[0] ] = attribute[1][ 0, limit ] end end + + # strip 4 bytes utf8 chars if needed + if column && self[ attribute[0] ] + self[attribute[0]] = self[ attribute[0] ].utf8_to_3bytesutf8 + end } end diff --git a/lib/core_ext/string.rb b/lib/core_ext/string.rb index 6ada19ba0..e93564fe2 100644 --- a/lib/core_ext/string.rb +++ b/lib/core_ext/string.rb @@ -23,4 +23,19 @@ class String camel_cased_word = self.to_s camel_cased_word.gsub(/::/, '/').downcase end + + # because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis) + # unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes + # More details: http://pjambet.github.io/blog/emojis-and-mysql/ + def utf8_to_3bytesutf8 + return if ActiveRecord::Base.connection_config[:adapter] != 'mysql2' + self.each_char.select {|c| + if c.bytes.count > 3 + puts "WARNING: strip out 4 bytes utf8 chars '#{c}' of '#{ self }'" + next + end + c + } + .join('') + end end diff --git a/test/unit/ticket_test.rb b/test/unit/ticket_test.rb index bff20d047..a153f0269 100644 --- a/test/unit/ticket_test.rb +++ b/test/unit/ticket_test.rb @@ -25,13 +25,15 @@ class TicketTest < ActiveSupport::TestCase :to => 'some_recipient@example.com', :subject => 'some subject', :message_id => 'some@id', - :body => 'some message', + :body => 'some message article_inbound 😍😍😍', :internal => false, :ticket_article_sender => Ticket::Article::Sender.where(:name => 'Customer').first, :ticket_article_type => Ticket::Article::Type.where(:name => 'email').first, :updated_by_id => 1, :created_by_id => 1, ) + assert_equal( article_inbound.body, 'some message article_inbound 😍😍😍'.utf8_to_3bytesutf8, 'article_inbound.body verify - inbound' ) + ticket = Ticket.find(ticket.id) assert_equal( ticket.article_count, 1, 'ticket.article_count verify - inbound' ) assert_equal( ticket.last_contact.to_s, article_inbound.created_at.to_s, 'ticket.last_contact verify - inbound' ) diff --git a/test/unit/twitter_test.rb b/test/unit/twitter_test.rb index 65a329059..9b4fbd4ec 100644 --- a/test/unit/twitter_test.rb +++ b/test/unit/twitter_test.rb @@ -96,7 +96,7 @@ class TwitterTest < ActiveSupport::TestCase end reply_hash = '#weather' + rand(9999).to_s - reply_text = '@armin_theo on my side the weather is also nice! ' + reply_hash + reply_text = '@armin_theo on my side the weather is also nice! 😍😍😍 ' + reply_hash tweet = client.update( reply_text, { @@ -109,10 +109,12 @@ class TwitterTest < ActiveSupport::TestCase # fetch check system account Channel.fetch + reply_text = reply_text.utf8_to_3bytesutf8 + # check if follow up article has been created assert_equal( article.ticket.articles.count, 2 ) reply_article = article.ticket.articles.last - assert_equal( reply_article.body, reply_text ) + assert_equal( reply_article.body, ) end