Decouple Relay scraper from React scraper

pull/667/merge
Thibaut Courouble 7 years ago
parent 94044b7da8
commit 6d580a9255

@ -0,0 +1,63 @@
module Docs
class Relay
class CleanHtmlFilter < Filter
def call
@doc = at_css('.inner-content, article.withtoc')
if root_page?
at_css('h1').content = 'Relay Documentation'
end
css('.docs-prevnext', '.hash-link', '.edit-page-link', '.edit-github', 'a.hash', '.edit-page-block', 'a.show', 'a.hide', 'hr').remove
css('table h1', 'table h2', 'table h3').each do |node|
table = node
table = table.parent until table.name == 'table'
table.replace(node)
end
css('a.anchor', 'a.hashref').each do |node|
node.parent['id'] ||= node['name'] || node['id']
end
css('.highlight').each do |node|
node.name = 'pre'
node.css('.gutter').remove
node['data-language'] = node.at_css('[data-lang]').try(:[], 'data-lang') || 'js'
node.content = node.content.strip
end
css('table.highlighttable').each do |node|
node.replace(node.at_css('pre.highlight'))
end
css('.prism').each do |node|
node.name = 'pre'
node['data-language'] = node['class'][/(?<=language\-)(\w+)/]
node.content = node.content
end
css('blockquote > p:first-child').each do |node|
node.remove if node.content.strip == 'Note:'
end
css('h3#props', 'h3#methods').each { |node| node.name = 'h2' }
css('h4.propTitle').each { |node| node.name = 'h3' }
css('> div > div', '> div', 'div > span', '.props', '.prop').each do |node|
node.before(node.children).remove
end
css('a pre', 'h3 .propType').each do |node|
node.name = 'code'
end
css('a[target]').each do |node|
node.remove_attribute('target')
end
doc
end
end
end
end

@ -0,0 +1,43 @@
module Docs
class Relay
class EntriesFilter < Docs::EntriesFilter
def get_name
at_css('h1').children.select(&:text?).map(&:content).join.strip
end
def get_type
link = at_css('.nav-docs-section .active, .toc .active')
section = link.ancestors('.nav-docs-section, section').first
type = section.at_css('h3').content.strip
type
end
def additional_entries
entries = []
css('.inner-content h3 code, .inner-content h4 code').each do |node|
name = node.content
name.remove! %r{[#\(\)]}
name.remove! %r{\w+\:}
name.strip!
id = name.parameterize
node.parent['id'] = id
entries << [name, id, 'Reference']
end
css('.apiIndex a pre').each do |node|
next unless node.parent['href'].start_with?('#')
id = node.parent['href'].remove('#')
name = node.content.strip
sep = name.start_with?('static') ? '.' : '#'
name.remove! %r{(abstract|static) }
name.sub! %r{\(.*\)}, '()'
name.prepend(self.name + sep)
entries << [name, id]
end
entries
end
end
end
end

@ -1,6 +1,6 @@
module Docs module Docs
class Relay < React class Relay < UrlScraper
self.type = 'react' self.type = 'simple'
self.release = '1.4.1' self.release = '1.4.1'
self.base_url = 'https://facebook.github.io/relay/docs/' self.base_url = 'https://facebook.github.io/relay/docs/'
self.root_path = 'getting-started.html' self.root_path = 'getting-started.html'
@ -9,8 +9,9 @@ module Docs
code: 'https://github.com/facebook/relay' code: 'https://github.com/facebook/relay'
} }
options[:root_title] = 'Relay Documentation' html_filters.push 'relay/entries', 'relay/clean_html'
options[:only_patterns] = nil
options[:container] = '.documentationContent'
options[:skip] = %w(videos.html graphql-further-reading.html) options[:skip] = %w(videos.html graphql-further-reading.html)
options[:attribution] = <<-HTML options[:attribution] = <<-HTML

Loading…
Cancel
Save