Add two-pass redirection rewriter

... to avoid having to maintain huge lists of redirects. This works by doing a first pass to detect which internal URL is redirected where, before doing a second (normal) pass that rewrites all these URLs (links) with their final destination. There's a bit of monkey-patching I'm not proud of, but this works(tm).
pull/200/head
Thibaut 10 years ago
parent 87763ac07a
commit 018628ea7d

@ -100,6 +100,7 @@ module Docs
(options[:only] ||= []).concat initial_paths + (root_path? ? [root_path] : ['', '/']) (options[:only] ||= []).concat initial_paths + (root_path? ? [root_path] : ['', '/'])
end end
options.merge!(additional_options) if respond_to?(:additional_options, true)
options.freeze options.freeze
end end
end end

@ -28,5 +28,66 @@ module Docs
def process_response?(response) def process_response?(response)
response.success? && response.html? && base_url.contains?(response.effective_url) response.success? && response.html? && base_url.contains?(response.effective_url)
end end
module FixRedirectionsBehavior
def self.included(base)
base.extend ClassMethods
end
module ClassMethods
attr_accessor :fix_redirections
attr_reader :redirections
def store_pages(store)
return super unless fix_redirections
instrument 'info.doc', msg: 'Fetching redirections...'
with_redirections do
instrument 'info.doc', msg: 'Building pages...'
super
end
end
private
def with_redirections
@redirections = new.fetch_redirections
yield
ensure
@redirections = nil
end
end
def fetch_redirections
result = {}
with_filters 'container', 'normalize_urls', 'internal_urls' do
build_pages do |page|
next if page[:response_effective_path] == page[:response_path]
result[page[:response_path].downcase] = page[:response_effective_path]
end
end
result
end
private
def process_response(response)
super.merge! response_effective_path: response.effective_path, response_path: response.path
end
def additional_options
{ redirections: self.class.redirections }
end
def with_filters(*filters)
stack = FilterStack.new
stack.push(*filters)
pipeline.instance_variable_set :@filters, stack.to_a.freeze
yield
ensure
@pipeline = nil
end
end
include FixRedirectionsBehavior
end end
end end

@ -19,8 +19,12 @@ module Docs
def normalize_url(str) def normalize_url(str)
url = to_absolute_url(str) url = to_absolute_url(str)
fix_url(url)
fix_url_string(url.to_s) while new_url = fix_url(url)
url = new_url
end
url.to_s
rescue URI::InvalidURIError rescue URI::InvalidURIError
'#' '#'
end end
@ -31,18 +35,40 @@ module Docs
end end
def fix_url(url) def fix_url(url)
return unless context[:replace_paths] if context[:redirections]
path = subpath_to(url) url = URL.parse(url)
path = url.path.downcase
if context[:replace_paths].has_key?(path) if context[:redirections].key?(path)
url.path = url.path.sub %r[#{path}\z], context[:replace_paths][path] url.path = context[:redirections][path]
return url
end
end end
end
def fix_url_string(str) if context[:replace_paths]
str = context[:replace_urls][str] || str if context[:replace_urls] url = URL.parse(url)
str = context[:fix_urls].call(str) || str if context[:fix_urls] path = subpath_to(url)
str
if context[:replace_paths].key?(path)
url.path = url.path.sub %r[#{path}\z], context[:replace_paths][path]
return url
end
end
if context[:replace_urls]
url = url.to_s
if context[:replace_urls].key?(url)
return context[:replace_urls][url]
end
end
if context[:fix_urls]
url = url.to_s
orig_url = url.dup
new_url = context[:fix_urls].call(url)
return new_url if new_url != orig_url
end
end end
end end
end end

@ -16,6 +16,10 @@ module Docs
log_diff before.keys, after.keys log_diff before.keys, after.keys
end end
def info(event)
log event.payload[:msg]
end
private private
def parse_payload(event) def parse_payload(event)

@ -116,7 +116,7 @@ class NormalizeUrlsFilterTest < MiniTest::Spec
end end
it "calls the block with each absolute url" do it "calls the block with each absolute url" do
context[:fix_urls] = ->(arg) { (@args ||= []).push(arg) } context[:fix_urls] = ->(arg) { (@args ||= []).push(arg); nil }
@body += link_to '/path?#' @body += link_to '/path?#'
filter.call filter.call
assert_equal ['http://example.com/path?#'] * 2, @args assert_equal ['http://example.com/path?#'] * 2, @args
@ -139,4 +139,28 @@ class NormalizeUrlsFilterTest < MiniTest::Spec
refute @called refute @called
end end
end end
context "when context[:redirections] is a hash" do
before do
@body = link_to 'http://example.com/path?query#frag'
end
it "replaces the path of matching urls, case-insensitive" do
@body = link_to('http://example.com/PATH?query#frag') + link_to('http://example.com/path/two')
context[:redirections] = { '/path' => '/fixed' }
expected = link_to('http://example.com/fixed?query#frag') + link_to('http://example.com/path/two')
assert_equal expected, filter_output_string
end
it "does a multi pass with context[:fix_urls]" do
@body = link_to('http://example.com/path')
context[:fix_urls] = ->(url) do
url.sub! 'example.com', 'example.org'
url.sub! '/Fixed', '/fixed'
url
end
context[:redirections] = { '/path' => '/Fixed' }
assert_equal link_to('http://example.org/fixed'), filter_output_string
end
end
end end

Loading…
Cancel
Save