rxjs: finish scraper and filters

pull/954/head
Jasper van Merle 6 years ago
parent 20438856d1
commit 17528d9845

@ -211,8 +211,7 @@ credits = [
'2017 Cypress.io', '2017 Cypress.io',
'MIT', 'MIT',
'https://raw.githubusercontent.com/cypress-io/cypress-documentation/develop/LICENSE.md' 'https://raw.githubusercontent.com/cypress-io/cypress-documentation/develop/LICENSE.md'
], ], [
[
'D', 'D',
'1999-2018 The D Language Foundation', '1999-2018 The D Language Foundation',
'Boost', 'Boost',
@ -572,8 +571,7 @@ credits = [
'2016-2018, The Pony Developers & 2014-2015, Causality Ltd.', '2016-2018, The Pony Developers & 2014-2015, Causality Ltd.',
'BSD', 'BSD',
'https://raw.githubusercontent.com/ponylang/ponyc/master/LICENSE' 'https://raw.githubusercontent.com/ponylang/ponyc/master/LICENSE'
], ], [
[
'PostgreSQL', 'PostgreSQL',
'1996-2018 The PostgreSQL Global Development Group<br>&copy; 1994 The Regents of the University of California', '1996-2018 The PostgreSQL Global Development Group<br>&copy; 1994 The Regents of the University of California',
'PostgreSQL', 'PostgreSQL',
@ -648,13 +646,17 @@ credits = [
'2010 The Rust Project Developers', '2010 The Rust Project Developers',
'MIT', 'MIT',
'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT' 'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT'
], [
'RxJS',
'2015-2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors',
'Apache',
'https://raw.githubusercontent.com/ReactiveX/rxjs/master/LICENSE.txt'
], [ ], [
'Salt Stack', 'Salt Stack',
'2019 SaltStack', '2019 SaltStack',
'Apache', 'Apache',
'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE' 'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE'
], ], [
[
'Sass', 'Sass',
'2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein', '2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein',
'MIT', 'MIT',
@ -664,8 +666,7 @@ credits = [
'2002-2019 EPFL, with contributions from Lightbend', '2002-2019 EPFL, with contributions from Lightbend',
'Apache', 'Apache',
'https://raw.githubusercontent.com/scala/scala-lang/master/license.md' 'https://raw.githubusercontent.com/scala/scala-lang/master/license.md'
], ], [
[
'scikit-image', 'scikit-image',
'2011 the scikit-image team', '2011 the scikit-image team',
'BSD', 'BSD',
@ -765,8 +766,7 @@ credits = [
'2003-2019 WordPress Foundation', '2003-2019 WordPress Foundation',
'GPLv2+', 'GPLv2+',
'https://wordpress.org/about/license/' 'https://wordpress.org/about/license/'
], ], [
[
'Yarn', 'Yarn',
'2016-present Yarn Contributors', '2016-present Yarn Contributors',
'BSD', 'BSD',

@ -7,6 +7,11 @@ module Docs
at_css('h1').content = 'RxJS Documentation' at_css('h1').content = 'RxJS Documentation'
end end
if at_css('h1').nil?
title = subpath.rpartition('/').last.titleize
doc.prepend_child("<h1>#{title}</h1>")
end
css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove
css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node| css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node|
@ -65,6 +70,16 @@ module Docs
if node['class'] && node['class'].include?('api-heading') if node['class'] && node['class'].include?('api-heading')
node.name = 'h3' node.name = 'h3'
unless node.ancestors('.instance-method').empty?
matches = node.inner_html.scan(/([^(& ]+)[(&]/)
unless matches.empty? || matches[0][0] == 'constructor'
node['name'] = matches[0][0]
node['id'] = node['name'].downcase + '-'
end
end
node.inner_html = "<code>#{node.inner_html}</code>" node.inner_html = "<code>#{node.inner_html}</code>"
end end
@ -77,25 +92,48 @@ module Docs
node.remove_attribute('class') node.remove_attribute('class')
end end
css('h1[class]').remove_attr('class') css('td > .overloads').each do |node|
css('table[class]').remove_attr('class') node.replace node.at_css('.detail-contents')
css('table[width]').remove_attr('width') end
css('tr[style]').remove_attr('style')
css('td.short-description p').each do |node|
signature = node.parent.parent.next_element.at_css('h3[id]')
signature.after(node) unless signature.nil?
end
if at_css('.api-type-label.module') css('.method-table').each do |node|
at_css('h1').content = subpath.remove('api/') node.replace node.at_css('tbody')
end end
css('th h3').each do |node| css('.api-body > table > caption').each do |node|
node.name = 'span' node.name = 'center'
lift_out_of_table node
end end
css('.api-body > table > tbody > tr:not([class]) > td > *').each do |node|
lift_out_of_table node
end
css('.api-body > table').each do |node|
node.remove if node.content.strip.blank?
end
css('h1[class]').remove_attr('class')
css('table[class]').remove_attr('class')
css('table[width]').remove_attr('width')
css('tr[style]').remove_attr('style')
css('code code').each do |node| css('code code').each do |node|
node.before(node.children).remove node.before(node.children).remove
end end
doc doc
end end
def lift_out_of_table(node)
table = node.ancestors('table').first
table.previous_element.after(node)
end
end end
end end
end end

@ -2,22 +2,28 @@ module Docs
class Rxjs class Rxjs
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def get_name def get_name
name = at_css('h1').content title = at_css('h1')
name = title.nil? ? subpath.rpartition('/').last.titleize : title.content
name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/ name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/
name += '()' unless at_css('.api-type-label.function').nil?
name name
end end
def get_type def get_type
if slug.start_with?('guide') if slug.start_with?('guide')
'Guide' 'Guide'
elsif at_css('.api-type-label.module')
name.split('/').first
elsif slug.start_with?('api/') elsif slug.start_with?('api/')
slug.split('/').second slug.split('/').second
else else
'Miscellaneous' 'Miscellaneous'
end end
end end
def additional_entries
css('h3[id]').map do |node|
["#{name}.#{node['name']}()", node['id']]
end
end
end end
end end
end end

@ -4,11 +4,26 @@ module Docs
class Rxjs < UrlScraper class Rxjs < UrlScraper
self.name = 'RxJS' self.name = 'RxJS'
self.type = 'rxjs' self.type = 'rxjs'
self.release = '6.5.2'
self.base_url = 'https://rxjs.dev/'
self.root_path = 'guide/overview'
self.links = { self.links = {
home: 'https://rxjs.dev/', home: 'https://rxjs.dev/',
code: 'https://github.com/ReactiveX/rxjs' code: 'https://github.com/ReactiveX/rxjs'
} }
html_filters.push 'rxjs/clean_html', 'rxjs/entries'
options[:follow_links] = false
options[:only_patterns] = [/guide\//, /api\//]
options[:skip_patterns] = [/api\/([^\/]+)\.json/]
options[:fix_urls_before_parse] = ->(url) do
url.sub! %r{\Aguide/}, '/guide/'
url.sub! %r{\Aapi/}, '/api/'
url.sub! %r{\Agenerated/}, '/generated/'
url
end
options[:max_image_size] = 256_000 options[:max_image_size] = 256_000
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
@ -16,69 +31,54 @@ module Docs
Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0. Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0.
HTML HTML
module Common def get_latest_version(opts)
private json = fetch_json('https://rxjs.dev/generated/navigation.json', opts)
json['__versionInfo']['raw']
end
def initial_urls private
initial_urls = []
Request.run "#{self.class.base_url}generated/navigation.json" do |response| def initial_urls
data = JSON.parse(response.body) initial_urls = []
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api'
entry['children'].each(&dig) if entry['children']
end
data['SideNav'].each(&dig)
end
Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response| Request.run "#{self.class.base_url}generated/navigation.json" do |response|
data = JSON.parse(response.body) data = JSON.parse(response.body)
dig = ->(entry) do dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path'] initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api'
initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path'] entry['children'].each(&dig) if entry['children']
entry['items'].each(&dig) if entry['items']
end
data.each(&dig)
end end
data['SideNav'].each(&dig)
initial_urls
end end
def handle_response(response) Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
if response.mime_type.include?('json') data = JSON.parse(response.body)
begin dig = ->(entry) do
response.options[:response_body] = JSON.parse(response.body)['contents'] initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
rescue JSON::ParserError initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path']
response.options[:response_body] = '' entry['items'].each(&dig) if entry['items']
end
response.headers['Content-Type'] = 'text/html'
response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json')
end end
super data.each(&dig)
end end
end
version do initial_urls.select do |url|
self.release = '6.3.3' options[:only_patterns].any? { |pattern| url =~ pattern } &&
self.base_url = 'https://rxjs.dev/' options[:skip_patterns].none? { |pattern| url =~ pattern }
self.root_path = 'guide/overview'
html_filters.push 'rxjs/clean_html', 'rxjs/entries'
options[:follow_links] = false
options[:only_patterns] = [/\Aguide/, /\Aapi/]
options[:fix_urls_before_parse] = ->(url) do
url.sub! %r{\Aguide/}, '/guide/'
url.sub! %r{\Aapi/}, '/api/'
url.sub! %r{\Agenerated/}, '/generated/'
url
end end
include Docs::Rxjs::Common
end end
private def handle_response(response)
if response.mime_type.include?('json')
begin
response.options[:response_body] = JSON.parse(response.body)['contents']
rescue JSON::ParserError
response.options[:response_body] = ''
end
response.headers['Content-Type'] = 'text/html'
response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json')
end
super
end
def parse(response) def parse(response)
response.body.gsub! '<code-example', '<pre' response.body.gsub! '<code-example', '<pre'

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

@ -1 +1 @@
http://reactivex.io/ https://github.com/ReactiveX/reactivex.github.io/blob/develop/favicon.ico

Loading…
Cancel
Save