mirror of https://github.com/freeCodeCamp/devdocs
Core scrapper changes: *) Add regexp support to stub *) Add support to fragment in internal_urls, normalized_paths => done externally.pull/513/head
parent
f0211029ef
commit
b1ecc6c964
@ -0,0 +1,38 @@
|
|||||||
|
module Docs
|
||||||
|
class Immutablejs
|
||||||
|
class CleanHtmlFilter < Filter
|
||||||
|
def call
|
||||||
|
# Skip the container "div"
|
||||||
|
@doc = at_css('div')
|
||||||
|
|
||||||
|
# Remove data-reactid attributes for cleaner html
|
||||||
|
css('*[data-reactid]').each do |reactEl|
|
||||||
|
reactEl.delete 'data-reactid'
|
||||||
|
end
|
||||||
|
|
||||||
|
# Add id to member label, so we can navigate among them
|
||||||
|
css('h3.memberLabel').each do |memberLabel|
|
||||||
|
memberLabel['id'] = memberLabel.content.strip.chomp('()')
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
css('a').each do |link|
|
||||||
|
# Remove "/" from the start
|
||||||
|
link['href'] = link['href'].gsub(/^(#)?\//, '')
|
||||||
|
|
||||||
|
# We need to convert links - from Iterable/butLast to Iterable#butLast
|
||||||
|
link['href'] = link['href'].split('/').join('#')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Replace code blocks tag code with pre, and add stylings.
|
||||||
|
css('code.codeBlock').each do |codeBlock|
|
||||||
|
codeBlock.name = 'pre'
|
||||||
|
codeBlock['data-language'] = 'javascript'
|
||||||
|
codeBlock['class'] = 'language-javascript'
|
||||||
|
end
|
||||||
|
|
||||||
|
doc
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -0,0 +1,31 @@
|
|||||||
|
module Docs
|
||||||
|
class Immutablejs
|
||||||
|
class EntriesFilter < Docs::EntriesFilter
|
||||||
|
def name
|
||||||
|
typeHeader = at_css('h1.typeHeader')
|
||||||
|
return typeHeader.content if typeHeader
|
||||||
|
end
|
||||||
|
|
||||||
|
def type
|
||||||
|
typeHeader = at_css('h1.typeHeader')
|
||||||
|
return typeHeader.content if typeHeader
|
||||||
|
|
||||||
|
# TODO: Is this ok? This the index page.. I don't think it should have it's own type..
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def additional_entries
|
||||||
|
if current_url.fragment.nil?
|
||||||
|
return []
|
||||||
|
end
|
||||||
|
|
||||||
|
css('h3.memberLabel').map do |memberLabel|
|
||||||
|
entry_name = "#{type}##{memberLabel.content}"
|
||||||
|
[entry_name, memberLabel.content.chomp('()')]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -0,0 +1,23 @@
|
|||||||
|
module Docs
|
||||||
|
class Immutablejs
|
||||||
|
class InternalUrlsFilter < Docs::InternalUrlsFilter
|
||||||
|
def update_and_follow_links
|
||||||
|
urls = result[:internal_urls] = []
|
||||||
|
update_links do |url|
|
||||||
|
urls << url.to_s
|
||||||
|
end
|
||||||
|
urls.uniq!
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_internal_url(str)
|
||||||
|
if str.start_with? "#/"
|
||||||
|
return nil if not str =~ /^#\/[^\/]+$/
|
||||||
|
str = root_url.to_s + str
|
||||||
|
end
|
||||||
|
|
||||||
|
super(str)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -0,0 +1,28 @@
|
|||||||
|
module Docs
|
||||||
|
class Immutablejs
|
||||||
|
class NormalizePathsFilter < Docs::NormalizePathsFilter
|
||||||
|
#
|
||||||
|
# Checks if the given url starts with:
|
||||||
|
# "#" or ".#", with means it's a fragment url
|
||||||
|
#
|
||||||
|
FRAGMENT_REGEX = /^(\.)?#/
|
||||||
|
|
||||||
|
def path
|
||||||
|
#
|
||||||
|
# If we have fragment, we want to use as our path.
|
||||||
|
#
|
||||||
|
if current_url.fragment
|
||||||
|
# Remove "/" from the start
|
||||||
|
@path = current_url.fragment.sub(/^\//, '')
|
||||||
|
end
|
||||||
|
|
||||||
|
super
|
||||||
|
end
|
||||||
|
|
||||||
|
def normalize_href href
|
||||||
|
return href.gsub(FRAGMENT_REGEX, '') if href =~ FRAGMENT_REGEX
|
||||||
|
super
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -0,0 +1,38 @@
|
|||||||
|
module Docs
|
||||||
|
class Immutablejs < UrlScraper
|
||||||
|
self.name = "ImmutableJS"
|
||||||
|
self.type = "immutablejs"
|
||||||
|
self.release = "3.8.1"
|
||||||
|
self.base_url = "https://facebook.github.io/immutable-js/docs/"
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Replacins core html filters with our own, so we can handle fragments in
|
||||||
|
#
|
||||||
|
html_filters.replace 'internal_urls', 'immutablejs/internal_urls'
|
||||||
|
html_filters.replace 'normalize_paths', 'immutablejs/normalize_paths'
|
||||||
|
|
||||||
|
html_filters.push 'immutablejs/clean_html', 'immutablejs/entries'
|
||||||
|
|
||||||
|
|
||||||
|
options[:attribution] = <<-HTML
|
||||||
|
This documentation is generated from <a href="https://github.com/facebook/immutable-js/blob/master/type-definitions/Immutable.d.ts">Immutable.d.ts</a>.
|
||||||
|
Pull requests and <a href="https://github.com/facebook/immutable-js/issues">Issues</a> welcome.
|
||||||
|
HTML
|
||||||
|
|
||||||
|
stub(/.*/) do |url|
|
||||||
|
#
|
||||||
|
# Reuse capybara sessions, since we scrape all pages..
|
||||||
|
# by visiting 'about:blank' we reset the oldest session.
|
||||||
|
#
|
||||||
|
@capybara ||= load_capybara_selenium
|
||||||
|
@capybara.visit 'about:blank'
|
||||||
|
@capybara.visit url
|
||||||
|
|
||||||
|
@capybara.execute_script 'return document.querySelector(".docContents").innerHTML'
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
After Width: | Height: | Size: 758 B |
After Width: | Height: | Size: 1.5 KiB |
Loading…
Reference in new issue