The current RxJS documentation site is https://rxjs.dev/.

It is very similar to Angular documentation site (https://angular.io/) so I reused most code.

Images on the documentation site seem to be broken and so the scrapper cannot download them. You can see an example of a broken image at https://rxjs.dev/api/operators/buffer.

Related to https://github.com/freeCodeCamp/devdocs/issues/939
pull/954/head
Filipe Silva 6 years ago
parent b2608bb7d8
commit 5da0214717

@ -94,6 +94,7 @@
'pages/rfc',
'pages/rubydoc',
'pages/rust',
'pages/rxjs',
'pages/sinon',
'pages/socketio',
'pages/sphinx',

@ -0,0 +1,24 @@
._rxjs {
@extend %simple;
.pre-title { @extend %pre-heading; }
.breadcrumbs { @extend %note; }
.banner { @extend %note-green; }
code.stable { @extend %label-green; }
code.experimental { @extend %label-orange; }
code.deprecated { @extend %label-red; }
.alert.is-important { @extend %note-red; }
.alert.is-helpful, .breadcrumbs { @extend %note-blue; }
.breadcrumbs { padding-left: 2em; }
img { margin: 1em 0; }
.location-badge {
font-style: italic;
text-align: right;
}
td h3 { margin: 0 !important; }
}

@ -0,0 +1,101 @@
module Docs
class Rxjs
class CleanHtmlFilter < Filter
def call
if root_page?
css('.card-container').remove
at_css('h1').content = 'RxJS Documentation'
end
css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove
css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node|
node.before(node.children).remove
end
css('label', 'h2 > em', 'h3 > em').each do |node|
node.name = 'code'
end
css('h1 + code').each do |node|
node.before('<p></p>')
while node.next_element.name == 'code'
node.previous_element << ' '
node.previous_element << node.next_element
end
node.previous_element.prepend_child(node)
end
css('td h3', '.l-sub-section > h3', '.alert h3', '.row-margin > h3', '.api-heading ~ h3', '.api-heading + h2', '.metadata-member h3').each do |node|
node.name = 'h4'
end
css('.l-sub-section', '.alert', '.banner').each do |node|
node.name = 'blockquote'
end
css('.file').each do |node|
node.content = node.content.strip
end
css('.filetree .children').each do |node|
node.css('.file').each do |n|
n.content = " #{n.content}"
end
end
css('.filetree').each do |node|
node.content = node.css('.file').map(&:inner_html).join("\n")
node.name = 'pre'
node.remove_attribute('class')
end
css('pre').each do |node|
node.content = node.content.strip
node['data-language'] = 'typescript' if node['path'].try(:ends_with?, '.ts')
node['data-language'] = 'html' if node['path'].try(:ends_with?, '.html')
node['data-language'] = 'css' if node['path'].try(:ends_with?, '.css')
node['data-language'] = 'js' if node['path'].try(:ends_with?, '.js')
node['data-language'] = 'json' if node['path'].try(:ends_with?, '.json')
node['data-language'] = node['language'].sub(/\Ats/, 'typescript').strip if node['language']
node['data-language'] ||= 'typescript' if node.content.start_with?('@')
node.before(%(<div class="pre-title">#{node['title']}</div>)) if node['title']
if node['class'] && node['class'].include?('api-heading')
node.name = 'h3'
node.inner_html = "<code>#{node.inner_html}</code>"
end
node.remove_attribute('path')
node.remove_attribute('region')
node.remove_attribute('linenums')
node.remove_attribute('title')
node.remove_attribute('language')
node.remove_attribute('hidecopy')
node.remove_attribute('class')
end
css('h1[class]').remove_attr('class')
css('table[class]').remove_attr('class')
css('table[width]').remove_attr('width')
css('tr[style]').remove_attr('style')
if at_css('.api-type-label.module')
at_css('h1').content = subpath.remove('api/')
end
css('th h3').each do |node|
node.name = 'span'
end
css('code code').each do |node|
node.before(node.children).remove
end
doc
end
end
end
end

@ -0,0 +1,23 @@
module Docs
class Rxjs
class EntriesFilter < Docs::EntriesFilter
def get_name
name = at_css('h1').content
name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/
name
end
def get_type
if slug.start_with?('guide')
'Guide'
elsif at_css('.api-type-label.module')
name.split('/').first
elsif slug.start_with?('api/')
slug.split('/').second
else
'Miscellaneous'
end
end
end
end
end

@ -0,0 +1,94 @@
require 'yajl/json_gem'
module Docs
class Rxjs < UrlScraper
self.name = 'RxJS'
self.type = 'rxjs'
self.links = {
home: 'https://rxjs.dev/',
code: 'https://github.com/ReactiveX/rxjs'
}
options[:max_image_size] = 256_000
options[:attribution] = <<-HTML
&copy; 2015&ndash;2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors.<br>
Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0.
HTML
module Common
private
def initial_urls
initial_urls = []
Request.run "#{self.class.base_url}generated/navigation.json" do |response|
data = JSON.parse(response.body)
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api'
entry['children'].each(&dig) if entry['children']
end
data['SideNav'].each(&dig)
end
Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
data = JSON.parse(response.body)
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path']
entry['items'].each(&dig) if entry['items']
end
data.each(&dig)
end
initial_urls
end
def handle_response(response)
if response.mime_type.include?('json')
begin
response.options[:response_body] = JSON.parse(response.body)['contents']
rescue JSON::ParserError
response.options[:response_body] = ''
end
response.headers['Content-Type'] = 'text/html'
response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json')
end
super
end
end
version do
self.release = '6.3.3'
self.base_url = 'https://rxjs.dev/'
self.root_path = 'guide/overview'
html_filters.push 'rxjs/clean_html', 'rxjs/entries'
options[:follow_links] = false
options[:only_patterns] = [/\Aguide/, /\Aapi/]
options[:fix_urls_before_parse] = ->(url) do
url.sub! %r{\Aguide/}, '/guide/'
url.sub! %r{\Aapi/}, '/api/'
url.sub! %r{\Agenerated/}, '/generated/'
url
end
include Docs::Rxjs::Common
end
private
def parse(response)
response.body.gsub! '<code-example', '<pre'
response.body.gsub! '</code-example', '</pre'
response.body.gsub! '<code-pane', '<pre'
response.body.gsub! '</code-pane', '</pre'
response.body.gsub! '<live-example></live-example>', 'live example'
response.body.gsub! '<live-example', '<span'
response.body.gsub! '</live-example', '</span'
super
end
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

@ -0,0 +1 @@
http://reactivex.io/
Loading…
Cancel
Save