Cleanup, version, and improve Relay scraper

pull/1405/head
Phil Scherer 4 years ago
parent 37d5fd3bbb
commit 7c01b590f0

@ -2,38 +2,17 @@ module Docs
class Relay class Relay
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
@doc = at_css('.post')
if slug == 'index' header = at_css('h1')
css('img').remove header.parent.before(header).remove
css('.projectTitle').each do |node| css('footer').remove
node.name = 'h1'
node.content = 'Relay'
end
css('pre').remove
end
css('.docLastUpdate').remove
css('.docs-prevnext').remove
css('.edit-page-link').remove
css('h2, h3').each do |node| css('h2, h3').each do |node|
node.css('a').remove node['id'] = node.at_css('a.anchor')['id']
node['id'] = node.content.gsub(/\s/, '-').downcase
end end
css('.onPageNav').remove
css('#docsNav').remove
css('.fixedHeaderContainer').remove
css('footer').remove
# syntax highlight # syntax highlight
css('pre').each do |node| css('pre').each do |node|
node['data-language'] = 'javascript' node['data-language'] = 'javascript'

@ -1,51 +1,47 @@
module Docs module Docs
class Relay class Relay
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
ONLY_SECTIONS = ['API Reference', 'Principles & Architecture']
ONLY_SLUGS = []
def get_name def call
if slug == 'index' if root_page?
return 'Relay' css('.navGroup > h3').each do |node|
next if not ONLY_SECTIONS.include? node.content
node.next_element.css('a').each do |anchor|
ONLY_SLUGS << anchor['href'].split('/').last.strip
end
end
end
super
end end
def get_name
at_css('h1').content at_css('h1').content
end end
def get_type def get_type
if slug == 'index'
return 'Relay'
end
at_css('h1').content at_css('h1').content
end end
def additional_entries def include_default_entry?
entries = [] ONLY_SLUGS.include? slug
if slug == 'index'
return entries
end end
## avoid adding non-desired entries removing tags def additional_entries
# remove header which contains a <h2> tag return [] if not include_default_entry?
css('.fixedHeaderContainer').remove
# remove table of content whose title is an <h2> tag
css('.toc').remove
##
css('h2, h3').each do |node| css('article h2, article h3').each_with_object [] do |node, entries|
next if node.content.include?('Argument') next if node.content.include?('Argument') ||
entry_name = node.content node.content.starts_with?('Example')
if entry_name.include?('(') name = node.content
entry_name = entry_name.match(/.*\(/)[0] + ')' if name.include?('(')
name = name.match(/.*\(/)[0] + ')'
end end
id = node.at_css('a.anchor')['id']
entry_id = node.content.gsub(/\s/, '-').downcase entries << [name, id]
entries << [entry_name, entry_id]
end end
entries
end end
end end

@ -1,9 +1,7 @@
module Docs module Docs
class Relay < UrlScraper class Relay < UrlScraper
self.type = 'simple' self.type = 'simple'
self.release = '10.1.0' self.root_path = 'introduction-to-relay'
self.base_url = 'https://relay.dev'
self.root_path = 'index.html'
self.links = { self.links = {
home: 'https://relay.dev/', home: 'https://relay.dev/',
code: 'https://github.com/facebook/relay' code: 'https://github.com/facebook/relay'
@ -11,19 +9,7 @@ module Docs
html_filters.push 'relay/entries', 'relay/clean_html' html_filters.push 'relay/entries', 'relay/clean_html'
options[:only] = [ options[:skip] = %w(videos)
'/docs/en/graphql-in-relay',
'/docs/en//relay-environment',
'/docs/en/network-layer',
'/docs/en/query-renderer',
'/docs/en/fragment-container',
'/docs/en/refetch-container',
'/docs/en/pagination-container',
'/docs/en/mutations',
'/docs/en/subscriptions',
'/docs/en/relay-store',
'/docs/en/fetch-query'
]
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; 2020&ndash;present Facebook Inc.<br> &copy; 2020&ndash;present Facebook Inc.<br>
@ -34,5 +20,21 @@ module Docs
get_latest_github_release('facebook', 'relay', opts) get_latest_github_release('facebook', 'relay', opts)
end end
version '10' do
self.release = '10.1.0'
self.base_url = "https://relay.dev/docs/en/"
# For some reason, the most-recent version isn't available at a versioned URL
end
version '9' do
self.release = '9.1.0'
self.base_url = "https://relay.dev/docs/en/v#{self.release}/"
end
version '8' do
self.release = '8.0.0'
self.base_url = "https://relay.dev/docs/en/v#{self.release}/"
end
end end
end end

Loading…
Cancel
Save