Cleanup, version, and improve Relay scraper

pull/1405/head
Phil Scherer 4 years ago
parent 37d5fd3bbb
commit 7c01b590f0

@ -2,38 +2,17 @@ module Docs
class Relay
class CleanHtmlFilter < Filter
def call
@doc = at_css('.post')
if slug == 'index'
css('img').remove
header = at_css('h1')
header.parent.before(header).remove
css('.projectTitle').each do |node|
node.name = 'h1'
node.content = 'Relay'
end
css('pre').remove
end
css('.docLastUpdate').remove
css('.docs-prevnext').remove
css('.edit-page-link').remove
css('footer').remove
css('h2, h3').each do |node|
node.css('a').remove
node['id'] = node.content.gsub(/\s/, '-').downcase
node['id'] = node.at_css('a.anchor')['id']
end
css('.onPageNav').remove
css('#docsNav').remove
css('.fixedHeaderContainer').remove
css('footer').remove
# syntax highlight
css('pre').each do |node|
node['data-language'] = 'javascript'

@ -1,51 +1,47 @@
module Docs
class Relay
class EntriesFilter < Docs::EntriesFilter
def get_name
if slug == 'index'
return 'Relay'
ONLY_SECTIONS = ['API Reference', 'Principles & Architecture']
ONLY_SLUGS = []
def call
if root_page?
css('.navGroup > h3').each do |node|
next if not ONLY_SECTIONS.include? node.content
node.next_element.css('a').each do |anchor|
ONLY_SLUGS << anchor['href'].split('/').last.strip
end
end
end
super
end
def get_name
at_css('h1').content
end
def get_type
if slug == 'index'
return 'Relay'
end
at_css('h1').content
end
def additional_entries
entries = []
if slug == 'index'
return entries
end
## avoid adding non-desired entries removing tags
# remove header which contains a <h2> tag
css('.fixedHeaderContainer').remove
def include_default_entry?
ONLY_SLUGS.include? slug
end
# remove table of content whose title is an <h2> tag
css('.toc').remove
##
def additional_entries
return [] if not include_default_entry?
css('h2, h3').each do |node|
next if node.content.include?('Argument')
entry_name = node.content
css('article h2, article h3').each_with_object [] do |node, entries|
next if node.content.include?('Argument') ||
node.content.starts_with?('Example')
if entry_name.include?('(')
entry_name = entry_name.match(/.*\(/)[0] + ')'
name = node.content
if name.include?('(')
name = name.match(/.*\(/)[0] + ')'
end
entry_id = node.content.gsub(/\s/, '-').downcase
entries << [entry_name, entry_id]
id = node.at_css('a.anchor')['id']
entries << [name, id]
end
entries
end
end

@ -1,9 +1,7 @@
module Docs
class Relay < UrlScraper
self.type = 'simple'
self.release = '10.1.0'
self.base_url = 'https://relay.dev'
self.root_path = 'index.html'
self.root_path = 'introduction-to-relay'
self.links = {
home: 'https://relay.dev/',
code: 'https://github.com/facebook/relay'
@ -11,19 +9,7 @@ module Docs
html_filters.push 'relay/entries', 'relay/clean_html'
options[:only] = [
'/docs/en/graphql-in-relay',
'/docs/en//relay-environment',
'/docs/en/network-layer',
'/docs/en/query-renderer',
'/docs/en/fragment-container',
'/docs/en/refetch-container',
'/docs/en/pagination-container',
'/docs/en/mutations',
'/docs/en/subscriptions',
'/docs/en/relay-store',
'/docs/en/fetch-query'
]
options[:skip] = %w(videos)
options[:attribution] = <<-HTML
&copy; 2020&ndash;present Facebook Inc.<br>
@ -34,5 +20,21 @@ module Docs
get_latest_github_release('facebook', 'relay', opts)
end
version '10' do
self.release = '10.1.0'
self.base_url = "https://relay.dev/docs/en/"
# For some reason, the most-recent version isn't available at a versioned URL
end
version '9' do
self.release = '9.1.0'
self.base_url = "https://relay.dev/docs/en/v#{self.release}/"
end
version '8' do
self.release = '8.0.0'
self.base_url = "https://relay.dev/docs/en/v#{self.release}/"
end
end
end

Loading…
Cancel
Save