Improve MDN scrapers

Closes #488.
Closes #572.
pull/570/merge
Thibaut Courouble 8 years ago
parent 1d3abd0c6c
commit 476c69e419

@ -126,7 +126,7 @@ module Docs
(options[:only] ||= []).concat initial_paths + (root_path? ? [root_path] : ['', '/'])
end
options.merge!(additional_options) if respond_to?(:additional_options, true)
options.merge!(additional_options)
options.freeze
end
end
@ -197,18 +197,31 @@ module Docs
@pipeline = nil
end
def additional_options
{}
end
module FixInternalUrlsBehavior
def self.included(base)
base.extend ClassMethods
end
def self.prepended(base)
class << base
prepend ClassMethods
end
end
module ClassMethods
attr_reader :internal_urls
def internal_urls
@internal_urls
end
def store_pages(store)
instrument 'info.doc', msg: 'Building internal urls...'
with_internal_urls do
instrument 'info.doc', msg: 'Building pages...'
puts @internal_urls
instrument 'info.doc', msg: 'Continuing...'
super
end
end
@ -226,7 +239,7 @@ module Docs
def fetch_internal_urls
result = []
build_pages do |page|
result << base_url.subpath_to(page[:response_url]) if page[:entries].present?
result << page[:subpath] if page[:entries].present?
end
result
end
@ -240,16 +253,15 @@ module Docs
def additional_options
if self.class.internal_urls
{
super.merge! \
only: self.class.internal_urls.to_set,
only_patterns: nil,
skip: nil,
skip_patterns: nil,
skip_links: nil,
fixed_internal_urls: true
}
else
{}
super
end
end

@ -106,13 +106,21 @@ module Docs
base.extend ClassMethods
end
def self.prepended(base)
class << base
prepend ClassMethods
end
end
module ClassMethods
attr_reader :redirections
def redirections
@redirections
end
def store_pages(store)
instrument 'info.doc', msg: 'Fetching redirections...'
with_redirections do
instrument 'info.doc', msg: 'Building pages...'
instrument 'info.doc', msg: 'Continuing...'
super
end
end
@ -145,7 +153,7 @@ module Docs
end
def additional_options
{ redirections: self.class.redirections }
super.merge! redirections: self.class.redirections
end
end
end

@ -3,9 +3,12 @@
module Docs
class InternalUrlsFilter < Filter
def call
result[:subpath] = subpath
unless skip_links?
follow_links? ? update_and_follow_links : update_links
end
doc
end

@ -1,6 +1,7 @@
module Docs
class Dom < Mdn
include FixRedirectionsBehavior
prepend FixInternalUrlsBehavior
prepend FixRedirectionsBehavior
self.name = 'DOM'
self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API'

@ -1,6 +1,7 @@
module Docs
class Javascript < Mdn
include FixRedirectionsBehavior
prepend FixInternalUrlsBehavior
prepend FixRedirectionsBehavior
self.name = 'JavaScript'
self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference'

@ -1,6 +1,7 @@
module Docs
class Svg < Mdn
include FixRedirectionsBehavior
prepend FixInternalUrlsBehavior
prepend FixRedirectionsBehavior
self.name = 'SVG'
self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/SVG'

Loading…
Cancel
Save