Fixes #3164 - Robots.txt should allow indexing Knowledge Base on custom URLs

2021-06-01 15:57:06 +00:00 · 2021-06-01 15:57:06 +00:00 · 9e4fae6360
commit 9e4fae6360
parent 29ecba88aa
5 changed files with 121 additions and 6 deletions
--- a/app/controllers/robots_txt_controller.rb
+++ b/app/controllers/robots_txt_controller.rb
@ -0,0 +1,37 @@
+# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/
+
+class RobotsTxtController < ApplicationController
+
+  helper_method :custom_address_uri, :custom_path?, :custom_domain_path?
+
+  def index
+    render layout: false, content_type: 'text/plain'
+  end
+
+  private
+
+  def knowledge_base
+    @knowledge_base ||= KnowledgeBase.active.first
+  end
+
+  def custom_address_uri
+    @custom_address_uri ||= knowledge_base&.custom_address_uri
+  end
+
+  def custom_address_host
+    custom_address_uri&.host
+  end
+
+  def custom_path?
+    custom_address_uri && custom_address_host.blank?
+  end
+
+  def custom_domain_path?
+    return false if custom_address_uri.blank?
+
+    given_fqdn = request.headers.env['SERVER_NAME']&.downcase
+    kb_fqdn    = custom_address_host&.downcase
+
+    given_fqdn == kb_fqdn
+  end
+end
--- a/app/views/robots_txt/index.text.erb
+++ b/app/views/robots_txt/index.text.erb
@ -0,0 +1,12 @@
+# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file
+#
+User-Agent: *
+<% if custom_path? %>
+Allow: <%= custom_address_uri.path %>
+Disallow: /
+<% elsif custom_domain_path? %>
+Allow: /
+<% else %>
+Allow: /help/
+Disallow: /
+<% end %>
--- a/config/routes/robots_txt.rb
+++ b/config/routes/robots_txt.rb
@ -0,0 +1,5 @@
+# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/
+
+Zammad::Application.routes.draw do
+  match '/robots.txt', to: 'robots_txt#index', via: :get
+end
--- a/public/robots.txt
+++ b/public/robots.txt
@ -1,6 +0,0 @@
-# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
-#
-# To ban all spiders from the entire site uncomment the next two lines:
-User-Agent: *
-Allow: /help/
-Disallow: /
--- a/spec/requests/robots_txt_spec.rb
+++ b/spec/requests/robots_txt_spec.rb
@ -0,0 +1,67 @@
+# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/
+
+require 'rails_helper'
+
+RSpec.describe 'RobotsTxt', type: :request do
+
+  context 'when no Knowledge Base exists' do
+
+    before do
+      get '/robots.txt'
+    end
+
+    it 'returns success' do
+      expect(response).to have_http_status(:ok)
+    end
+
+    it 'returns text' do
+      expect(response.content_type).to eq('text/plain')
+    end
+
+    it 'returns robot instructions' do
+      expect(response.body).to include('Allow:').and(include('Disallow:'))
+    end
+  end
+
+  context 'when Knowledge Base exists' do
+
+    let(:custom_address) { nil }
+    let(:server_name)    { Setting.get('fqdn') }
+
+    before do
+      create(:knowledge_base, custom_address: custom_address)
+      get '/robots.txt', headers: { SERVER_NAME: server_name }
+    end
+
+    it 'returns robot instructions' do
+      expect(response.body).to include('Allow:').and(include('Disallow:'))
+    end
+
+    context 'when custom path is configured' do
+      let(:custom_address) { '/knowledge_base' }
+
+      it 'returns rules with custom path' do
+        expect(response.body).to match(%r{^Allow: /knowledge_base$}).and match(%r{^Disallow: /$})
+      end
+    end
+
+    context 'when custom domain is configured' do
+      let(:custom_address) { 'kb.com/knowledge_base' }
+
+      context 'when requesting main domain' do # rubocop:disable RSpec/NestedGroups
+
+        it 'returns default rules' do
+          expect(response.body).to include('Allow:').and(include('Disallow:'))
+        end
+      end
+
+      context 'when requesting KB domain' do # rubocop:disable RSpec/NestedGroups
+        let(:server_name) { 'kb.com' }
+
+        it 'returns domain rules' do
+          expect(response.body).to match(%r{^Allow: /$}).and satisfy { |val| !val.match?(%r{^Disallow}) }
+        end
+      end
+    end
+  end
+end