Testing: Speed up Twitter tests by paring down VCR cassette content

This commit reduces the execution time of the specs for
Channel::Driver::Twitter#fetch by a little over 50%.

The #fetch method calls out to the Twitter API
and receives a list of tweets (serialized as JSON),
then converts them to tickets and articles.
Even with HTTP caching (via VCR), this is an expensive operation:
each transaction may contain dozens of tweets to be processed.

This commit reduces the processing burden
by manually editing the tests' VCR cassettes,
removing the vast majority of incoming tweets
and keeping only those necessary to run meaningful tests.

The result is a set of tests for the #fetch method that,
on a given machine, now take 65 seconds instead of 140.
As an ancillary benefit, the file size of the associated
VCR cassette directory has been reduced from 16MB to under 5MB.
This commit is contained in:
Ryan Lue 2020-03-13 01:37:05 +08:00 committed by Thorsten Eckel
parent 2c0b858312
commit 227b2b25b3
16 changed files with 8669 additions and 97693 deletions

View file

@ -821,7 +821,7 @@ RSpec.describe Channel::Driver::Twitter do
context 'with search term configured (at .options[:sync][:search])' do context 'with search term configured (at .options[:sync][:search])' do
it 'creates an article for each recent tweet' do it 'creates an article for each recent tweet' do
expect { channel.fetch } expect { channel.fetch }
.to change(Ticket, :count).by(8) .to change(Ticket, :count).by(2)
expect(Ticket.last.attributes).to include( expect(Ticket.last.attributes).to include(
'title' => "Come and join our team to bring Zammad even further forward! It's gonna be ama...", 'title' => "Come and join our team to bring Zammad even further forward! It's gonna be ama...",
@ -831,115 +831,87 @@ RSpec.describe Channel::Driver::Twitter do
) )
end end
context 'for responses to other tweets' do
let(:thread) do
Ticket.joins(articles: :type).where(ticket_article_types: { name: 'twitter status' })
.group('tickets.id').having(
case ActiveRecord::Base.connection_config[:adapter]
when 'mysql2'
'COUNT("ticket_articles.*") > 1'
when 'postgresql'
'COUNT(ticket_articles.*) > 1'
end
).first
end
it 'creates articles for parent tweets as well' do
channel.fetch
expect(thread.articles.last.body).to match(/zammad/i) # search result
expect(thread.articles.first.body).not_to match(/zammad/i) # parent tweet
end
end
context 'and "track_retweets" option' do
context 'is false (default)' do
it 'skips retweets' do it 'skips retweets' do
expect { channel.fetch } expect { channel.fetch }
.not_to change { Ticket.where('title LIKE ?', 'RT @%').count }.from(0) .not_to change { Ticket.where('title LIKE ?', 'RT @%').count }.from(0)
end end
it 'skips tweets 15+ days older than channel itself' do
expect { channel.fetch }
.not_to change { Ticket.where('title LIKE ?', 'GitHub Trending Archive, 2_ Nov 2018, Ruby. %').count }.from(0)
end end
context 'is true' do
subject(:channel) { create(:twitter_channel, custom_options: { sync: { track_retweets: true } }) }
it 'creates an article for each recent tweet/retweet' do
expect { channel.fetch }
.to change { Ticket.where('title LIKE ?', 'RT @%').count }.by(1)
.and change(Ticket, :count).by(3)
end
end
end
context 'and "import_older_tweets" option (legacy)' do
context 'is false (default)' do
it 'skips tweets 15+ days older than channel itself' do
expect { channel.fetch }
.not_to change { Ticket.where('title LIKE ?', 'GitHub Trending Archive, 29 Nov 2018, Ruby. %').count }.from(0)
end
end
context 'is true' do
subject(:channel) { create(:twitter_channel, :legacy) }
it 'creates an article for each tweet' do
expect { channel.fetch }
.to change { Ticket.where('title LIKE ?', 'GitHub Trending Archive, 29 Nov 2018, Ruby. %').count }.by(1)
.and change(Ticket, :count).by(3)
end
end
end
describe 'duplicate handling' do
context 'when fetched tweets have already been imported' do context 'when fetched tweets have already been imported' do
before do before do
tweet_ids.each { |tweet_id| create(:ticket_article, message_id: tweet_id) } tweet_ids.each { |tweet_id| create(:ticket_article, message_id: tweet_id) }
end end
let(:tweet_ids) do let(:tweet_ids) { [1222126386334388225, 1222109934923460608] } # rubocop:disable Style/NumericLiterals
[1224440380881428480,
1224426978557800449,
1224427517869809666,
1224427776654135297,
1224428510225354753,
1223188240078909440,
1223273797987508227,
1223103807283810304,
1223121619561799682,
1222872891320143878,
1222881209384161283,
1222896407524212736,
1222237955588227075,
1222108036795334657,
1222126386334388225,
1222109934923460608]
end
it 'does not import duplicates' do it 'does not import duplicates' do
expect { channel.fetch }.not_to change(Ticket::Article, :count) expect { channel.fetch }.not_to change(Ticket::Article, :count)
end end
end end
context 'for very common search terms' do
subject(:channel) { create(:twitter_channel, custom_options: custom_options) }
let(:custom_options) do
{
sync: {
search: [
{
term: 'coronavirus',
group_id: Group.first.id
}
]
}
}
end
let(:twitter_articles) { Ticket::Article.joins(:type).where(ticket_article_types: { name: 'twitter status' }) }
it 'stops importing threads after 120 new articles' do
channel.fetch
expect((twitter_articles - Ticket.last.articles).count).to be < 120
expect(twitter_articles.count).to be >= 120
end
it 'refuses to import any other tweets for the next 15 minutes' do
channel.fetch
travel(14.minutes)
expect { create(:twitter_channel).fetch }
.not_to change(Ticket::Article, :count)
end
it 'resumes importing again after 15 minutes' do
channel.fetch
travel(15.minutes)
expect { create(:twitter_channel).fetch }
.to change(Ticket::Article, :count)
end
end
context 'and "track_retweets" option' do
subject(:channel) { create(:twitter_channel, custom_options: { sync: { track_retweets: true } }) }
it 'creates an article for each recent tweet/retweet' do
expect { channel.fetch }
.to change { Ticket.where('title LIKE ?', 'RT @%').count }
.and change(Ticket, :count).by(21)
end
end
context 'and "import_older_tweets" option (legacy)' do
subject(:channel) { create(:twitter_channel, :legacy) }
it 'creates an article for each tweet' do
expect { channel.fetch }
.to change { Ticket.where('title LIKE ?', 'GitHub Trending Archive, 2_ Nov 2018, Ruby. %').count }
.and change(Ticket, :count).by(21)
end
end
describe 'Race condition: when #fetch finds a half-processed, outgoing tweet' do describe 'Race condition: when #fetch finds a half-processed, outgoing tweet' do
subject!(:channel) { create(:twitter_channel, custom_options: custom_options) } subject!(:channel) { create(:twitter_channel, custom_options: custom_options) }
let(:custom_options) do let(:custom_options) do
{ {
user: { user: {
# Must match outgoing tweet author Twitter user ID # Must match connected Twitter account's user ID (on Twitter)--
id: '1205290247124217856', # that's how #fetch distinguishes between agents' tweets and other people's.
id: '1205290247124217856'
}, },
sync: { sync: {
search: [ search: [
@ -995,6 +967,47 @@ RSpec.describe Channel::Driver::Twitter do
end end
end end
end end
context 'for very common search terms' do
subject(:channel) { create(:twitter_channel, custom_options: custom_options) }
let(:custom_options) do
{
sync: {
search: [
{
term: 'coronavirus',
group_id: Group.first.id
}
]
}
}
end
let(:twitter_articles) { Ticket::Article.joins(:type).where(ticket_article_types: { name: 'twitter status' }) }
# NOTE: Ordinarily, RSpec examples should be kept as small as possible.
# In this case, we bundle these examples together because
# separating them would duplicate expensive setup:
# even with HTTP caching, this single example takes nearly a minute.
it 'imports max. ~120 articles every 15 minutes' do
channel.fetch
expect((twitter_articles - Ticket.last.articles).count).to be <= 120
expect(twitter_articles.count).to be > 120
travel(14.minutes)
expect { create(:twitter_channel).fetch }
.not_to change(Ticket::Article, :count)
travel(1.minute)
expect { create(:twitter_channel).fetch }
.to change(Ticket::Article, :count)
end
end
end
end end
end end
end end

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long