2017-02-01 11:48:50 +00:00
|
|
|
class HtmlSanitizer
|
|
|
|
|
|
|
|
def self.strict(string)
|
2017-02-02 18:49:34 +00:00
|
|
|
|
|
|
|
# config
|
|
|
|
tags_remove_content = Rails.configuration.html_sanitizer_tags_remove_content
|
|
|
|
tags_whitelist = Rails.configuration.html_sanitizer_tags_whitelist
|
|
|
|
attributes_whitelist = Rails.configuration.html_sanitizer_attributes_whitelist
|
|
|
|
css_properties_whitelist = Rails.configuration.html_sanitizer_css_properties_whitelist
|
2017-02-01 11:48:50 +00:00
|
|
|
|
|
|
|
scrubber = Loofah::Scrubber.new do |node|
|
|
|
|
|
2017-02-02 18:49:34 +00:00
|
|
|
# remove tags with subtree
|
|
|
|
if tags_remove_content.include?(node.name)
|
2017-02-01 11:48:50 +00:00
|
|
|
node.remove
|
|
|
|
end
|
|
|
|
|
2017-02-02 18:49:34 +00:00
|
|
|
# replace tags, keep subtree
|
|
|
|
if !tags_whitelist.include?(node.name)
|
|
|
|
traversal(node, scrubber)
|
2017-02-01 11:48:50 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
# prepare src attribute
|
|
|
|
if node['src']
|
2017-02-02 18:49:34 +00:00
|
|
|
src = cleanup(node['src'])
|
|
|
|
if src =~ /(javascript|livescript|vbscript):/i || src.start_with?('http', 'ftp', '//')
|
|
|
|
traversal(node, scrubber)
|
2017-02-01 11:48:50 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-02-02 18:49:34 +00:00
|
|
|
# clean style / only use allowed style properties
|
|
|
|
if node['style']
|
|
|
|
pears = node['style'].downcase.gsub(/\t|\n|\r/, '').split(';')
|
|
|
|
style = ''
|
|
|
|
pears.each { |pear|
|
|
|
|
prop = pear.split(':')
|
|
|
|
next if !prop[0]
|
|
|
|
key = prop[0].strip
|
|
|
|
next if !css_properties_whitelist.include?(key)
|
|
|
|
style += "#{pear};"
|
|
|
|
}
|
|
|
|
node['style'] = style
|
|
|
|
if style == ''
|
|
|
|
node.delete('style')
|
2017-02-01 11:48:50 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-02-02 18:49:34 +00:00
|
|
|
# scan for invalid link content
|
|
|
|
%w(href style).each { |attribute_name|
|
|
|
|
next if !node[attribute_name]
|
|
|
|
href = cleanup(node[attribute_name])
|
|
|
|
next if href !~ /(javascript|livescript|vbscript):/i
|
|
|
|
node.delete(attribute_name)
|
|
|
|
}
|
|
|
|
|
|
|
|
# remove attributes if not whitelisted
|
2017-02-01 11:48:50 +00:00
|
|
|
node.each { |attribute, _value|
|
2017-02-02 18:49:34 +00:00
|
|
|
attribute_name = attribute.downcase
|
|
|
|
next if attributes_whitelist[:all].include?(attribute_name) || (attributes_whitelist[node.name] && attributes_whitelist[node.name].include?(attribute_name))
|
2017-02-01 11:48:50 +00:00
|
|
|
node.delete(attribute)
|
|
|
|
}
|
2017-02-02 18:49:34 +00:00
|
|
|
|
|
|
|
# prepare links
|
|
|
|
if node['href']
|
|
|
|
href = cleanup(node['href'])
|
|
|
|
next if !href.start_with?('http', 'ftp', '//')
|
|
|
|
node.set_attribute('rel', 'nofollow')
|
|
|
|
node.set_attribute('target', '_blank')
|
|
|
|
end
|
2017-02-01 11:48:50 +00:00
|
|
|
end
|
|
|
|
Loofah.fragment(string).scrub!(scrubber).to_s
|
|
|
|
end
|
|
|
|
|
2017-02-02 18:49:34 +00:00
|
|
|
def self.traversal(node, scrubber)
|
|
|
|
node.children.each { |child|
|
|
|
|
if child.class == Nokogiri::XML::CDATA
|
|
|
|
node.before Nokogiri::XML::Text.new(node.content, node.document)
|
|
|
|
else
|
|
|
|
node.before Loofah.fragment(child.to_s).scrub!(scrubber)
|
|
|
|
end
|
|
|
|
}
|
|
|
|
node.remove
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.cleanup(string)
|
|
|
|
string.downcase.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
|
|
|
|
end
|
|
|
|
|
|
|
|
private_class_method :traversal
|
|
|
|
private_class_method :cleanup
|
|
|
|
|
2017-02-01 11:48:50 +00:00
|
|
|
end
|