From 6d580a9255a5bdd281d604ee2c9bd03ac9bc0b47 Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Mon, 9 Oct 2017 10:41:09 -0400 Subject: [PATCH] Decouple Relay scraper from React scraper --- lib/docs/filters/relay/clean_html.rb | 63 ++++++++++++++++++++++++++++ lib/docs/filters/relay/entries.rb | 43 +++++++++++++++++++ lib/docs/scrapers/relay.rb | 9 ++-- 3 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 lib/docs/filters/relay/clean_html.rb create mode 100644 lib/docs/filters/relay/entries.rb diff --git a/lib/docs/filters/relay/clean_html.rb b/lib/docs/filters/relay/clean_html.rb new file mode 100644 index 00000000..a89a9c72 --- /dev/null +++ b/lib/docs/filters/relay/clean_html.rb @@ -0,0 +1,63 @@ +module Docs + class Relay + class CleanHtmlFilter < Filter + def call + @doc = at_css('.inner-content, article.withtoc') + + if root_page? + at_css('h1').content = 'Relay Documentation' + end + + css('.docs-prevnext', '.hash-link', '.edit-page-link', '.edit-github', 'a.hash', '.edit-page-block', 'a.show', 'a.hide', 'hr').remove + + css('table h1', 'table h2', 'table h3').each do |node| + table = node + table = table.parent until table.name == 'table' + table.replace(node) + end + + css('a.anchor', 'a.hashref').each do |node| + node.parent['id'] ||= node['name'] || node['id'] + end + + css('.highlight').each do |node| + node.name = 'pre' + node.css('.gutter').remove + node['data-language'] = node.at_css('[data-lang]').try(:[], 'data-lang') || 'js' + node.content = node.content.strip + end + + css('table.highlighttable').each do |node| + node.replace(node.at_css('pre.highlight')) + end + + css('.prism').each do |node| + node.name = 'pre' + node['data-language'] = node['class'][/(?<=language\-)(\w+)/] + node.content = node.content + end + + css('blockquote > p:first-child').each do |node| + node.remove if node.content.strip == 'Note:' + end + + css('h3#props', 'h3#methods').each { |node| node.name = 'h2' } + css('h4.propTitle').each { |node| node.name = 'h3' } + + css('> div > div', '> div', 'div > span', '.props', '.prop').each do |node| + node.before(node.children).remove + end + + css('a pre', 'h3 .propType').each do |node| + node.name = 'code' + end + + css('a[target]').each do |node| + node.remove_attribute('target') + end + + doc + end + end + end +end diff --git a/lib/docs/filters/relay/entries.rb b/lib/docs/filters/relay/entries.rb new file mode 100644 index 00000000..0c486323 --- /dev/null +++ b/lib/docs/filters/relay/entries.rb @@ -0,0 +1,43 @@ +module Docs + class Relay + class EntriesFilter < Docs::EntriesFilter + def get_name + at_css('h1').children.select(&:text?).map(&:content).join.strip + end + + def get_type + link = at_css('.nav-docs-section .active, .toc .active') + section = link.ancestors('.nav-docs-section, section').first + type = section.at_css('h3').content.strip + type + end + + def additional_entries + entries = [] + + css('.inner-content h3 code, .inner-content h4 code').each do |node| + name = node.content + name.remove! %r{[#\(\)]} + name.remove! %r{\w+\:} + name.strip! + id = name.parameterize + node.parent['id'] = id + entries << [name, id, 'Reference'] + end + + css('.apiIndex a pre').each do |node| + next unless node.parent['href'].start_with?('#') + id = node.parent['href'].remove('#') + name = node.content.strip + sep = name.start_with?('static') ? '.' : '#' + name.remove! %r{(abstract|static) } + name.sub! %r{\(.*\)}, '()' + name.prepend(self.name + sep) + entries << [name, id] + end + + entries + end + end + end +end diff --git a/lib/docs/scrapers/relay.rb b/lib/docs/scrapers/relay.rb index 238721b5..a020e9d7 100644 --- a/lib/docs/scrapers/relay.rb +++ b/lib/docs/scrapers/relay.rb @@ -1,6 +1,6 @@ module Docs - class Relay < React - self.type = 'react' + class Relay < UrlScraper + self.type = 'simple' self.release = '1.4.1' self.base_url = 'https://facebook.github.io/relay/docs/' self.root_path = 'getting-started.html' @@ -9,8 +9,9 @@ module Docs code: 'https://github.com/facebook/relay' } - options[:root_title] = 'Relay Documentation' - options[:only_patterns] = nil + html_filters.push 'relay/entries', 'relay/clean_html' + + options[:container] = '.documentationContent' options[:skip] = %w(videos.html graphql-further-reading.html) options[:attribution] = <<-HTML