Fixes #3164 - Robots.txt should allow indexing Knowledge Base on custom URLs

This commit is contained in:
Mantas Masalskis 2021-06-01 15:57:06 +00:00 committed by Thorsten Eckel
parent 29ecba88aa
commit 9e4fae6360
5 changed files with 121 additions and 6 deletions

View file

@ -0,0 +1,37 @@
# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/
class RobotsTxtController < ApplicationController
helper_method :custom_address_uri, :custom_path?, :custom_domain_path?
def index
render layout: false, content_type: 'text/plain'
end
private
def knowledge_base
@knowledge_base ||= KnowledgeBase.active.first
end
def custom_address_uri
@custom_address_uri ||= knowledge_base&.custom_address_uri
end
def custom_address_host
custom_address_uri&.host
end
def custom_path?
custom_address_uri && custom_address_host.blank?
end
def custom_domain_path?
return false if custom_address_uri.blank?
given_fqdn = request.headers.env['SERVER_NAME']&.downcase
kb_fqdn = custom_address_host&.downcase
given_fqdn == kb_fqdn
end
end

View file

@ -0,0 +1,12 @@
# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file
#
User-Agent: *
<% if custom_path? %>
Allow: <%= custom_address_uri.path %>
Disallow: /
<% elsif custom_domain_path? %>
Allow: /
<% else %>
Allow: /help/
Disallow: /
<% end %>

View file

@ -0,0 +1,5 @@
# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/
Zammad::Application.routes.draw do
match '/robots.txt', to: 'robots_txt#index', via: :get
end

View file

@ -1,6 +0,0 @@
# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
#
# To ban all spiders from the entire site uncomment the next two lines:
User-Agent: *
Allow: /help/
Disallow: /

View file

@ -0,0 +1,67 @@
# Copyright (C) 2012-2021 Zammad Foundation, http://zammad-foundation.org/
require 'rails_helper'
RSpec.describe 'RobotsTxt', type: :request do
context 'when no Knowledge Base exists' do
before do
get '/robots.txt'
end
it 'returns success' do
expect(response).to have_http_status(:ok)
end
it 'returns text' do
expect(response.content_type).to eq('text/plain')
end
it 'returns robot instructions' do
expect(response.body).to include('Allow:').and(include('Disallow:'))
end
end
context 'when Knowledge Base exists' do
let(:custom_address) { nil }
let(:server_name) { Setting.get('fqdn') }
before do
create(:knowledge_base, custom_address: custom_address)
get '/robots.txt', headers: { SERVER_NAME: server_name }
end
it 'returns robot instructions' do
expect(response.body).to include('Allow:').and(include('Disallow:'))
end
context 'when custom path is configured' do
let(:custom_address) { '/knowledge_base' }
it 'returns rules with custom path' do
expect(response.body).to match(%r{^Allow: /knowledge_base$}).and match(%r{^Disallow: /$})
end
end
context 'when custom domain is configured' do
let(:custom_address) { 'kb.com/knowledge_base' }
context 'when requesting main domain' do # rubocop:disable RSpec/NestedGroups
it 'returns default rules' do
expect(response.body).to include('Allow:').and(include('Disallow:'))
end
end
context 'when requesting KB domain' do # rubocop:disable RSpec/NestedGroups
let(:server_name) { 'kb.com' }
it 'returns domain rules' do
expect(response.body).to match(%r{^Allow: /$}).and satisfy { |val| !val.match?(%r{^Disallow}) }
end
end
end
end
end