From 9e4fae6360cf677e947a86f533ab6cc9c1b334b6 Mon Sep 17 00:00:00 2001 From: Mantas Masalskis Date: Tue, 1 Jun 2021 15:57:06 +0000 Subject: [PATCH] Fixes #3164 - Robots.txt should allow indexing Knowledge Base on custom URLs --- app/controllers/robots_txt_controller.rb | 37 +++++++++++++ app/views/robots_txt/index.text.erb | 12 +++++ config/routes/robots_txt.rb | 5 ++ public/robots.txt | 6 --- spec/requests/robots_txt_spec.rb | 67 ++++++++++++++++++++++++ 5 files changed, 121 insertions(+), 6 deletions(-) create mode 100644 app/controllers/robots_txt_controller.rb create mode 100644 app/views/robots_txt/index.text.erb create mode 100644 config/routes/robots_txt.rb delete mode 100644 public/robots.txt create mode 100644 spec/requests/robots_txt_spec.rb diff --git a/app/controllers/robots_txt_controller.rb b/app/controllers/robots_txt_controller.rb new file mode 100644 index 000000000..b08accfbe --- /dev/null +++ b/app/controllers/robots_txt_controller.rb @@ -0,0 +1,37 @@ +# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/ + +class RobotsTxtController < ApplicationController + + helper_method :custom_address_uri, :custom_path?, :custom_domain_path? + + def index + render layout: false, content_type: 'text/plain' + end + + private + + def knowledge_base + @knowledge_base ||= KnowledgeBase.active.first + end + + def custom_address_uri + @custom_address_uri ||= knowledge_base&.custom_address_uri + end + + def custom_address_host + custom_address_uri&.host + end + + def custom_path? + custom_address_uri && custom_address_host.blank? + end + + def custom_domain_path? + return false if custom_address_uri.blank? + + given_fqdn = request.headers.env['SERVER_NAME']&.downcase + kb_fqdn = custom_address_host&.downcase + + given_fqdn == kb_fqdn + end +end diff --git a/app/views/robots_txt/index.text.erb b/app/views/robots_txt/index.text.erb new file mode 100644 index 000000000..c00214c4d --- /dev/null +++ b/app/views/robots_txt/index.text.erb @@ -0,0 +1,12 @@ +# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file +# +User-Agent: * +<% if custom_path? %> +Allow: <%= custom_address_uri.path %> +Disallow: / +<% elsif custom_domain_path? %> +Allow: / +<% else %> +Allow: /help/ +Disallow: / +<% end %> diff --git a/config/routes/robots_txt.rb b/config/routes/robots_txt.rb new file mode 100644 index 000000000..b9ee14082 --- /dev/null +++ b/config/routes/robots_txt.rb @@ -0,0 +1,5 @@ +# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/ + +Zammad::Application.routes.draw do + match '/robots.txt', to: 'robots_txt#index', via: :get +end diff --git a/public/robots.txt b/public/robots.txt deleted file mode 100644 index c5ec9b638..000000000 --- a/public/robots.txt +++ /dev/null @@ -1,6 +0,0 @@ -# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file -# -# To ban all spiders from the entire site uncomment the next two lines: -User-Agent: * -Allow: /help/ -Disallow: / diff --git a/spec/requests/robots_txt_spec.rb b/spec/requests/robots_txt_spec.rb new file mode 100644 index 000000000..b9496f802 --- /dev/null +++ b/spec/requests/robots_txt_spec.rb @@ -0,0 +1,67 @@ +# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/ + +require 'rails_helper' + +RSpec.describe 'RobotsTxt', type: :request do + + context 'when no Knowledge Base exists' do + + before do + get '/robots.txt' + end + + it 'returns success' do + expect(response).to have_http_status(:ok) + end + + it 'returns text' do + expect(response.content_type).to eq('text/plain') + end + + it 'returns robot instructions' do + expect(response.body).to include('Allow:').and(include('Disallow:')) + end + end + + context 'when Knowledge Base exists' do + + let(:custom_address) { nil } + let(:server_name) { Setting.get('fqdn') } + + before do + create(:knowledge_base, custom_address: custom_address) + get '/robots.txt', headers: { SERVER_NAME: server_name } + end + + it 'returns robot instructions' do + expect(response.body).to include('Allow:').and(include('Disallow:')) + end + + context 'when custom path is configured' do + let(:custom_address) { '/knowledge_base' } + + it 'returns rules with custom path' do + expect(response.body).to match(%r{^Allow: /knowledge_base$}).and match(%r{^Disallow: /$}) + end + end + + context 'when custom domain is configured' do + let(:custom_address) { 'kb.com/knowledge_base' } + + context 'when requesting main domain' do # rubocop:disable RSpec/NestedGroups + + it 'returns default rules' do + expect(response.body).to include('Allow:').and(include('Disallow:')) + end + end + + context 'when requesting KB domain' do # rubocop:disable RSpec/NestedGroups + let(:server_name) { 'kb.com' } + + it 'returns domain rules' do + expect(response.body).to match(%r{^Allow: /$}).and satisfy { |val| !val.match?(%r{^Disallow}) } + end + end + end + end +end