mirror of https://github.com/freeCodeCamp/devdocs
Core scrapper changes: *) Add regexp support to stub *) Add support to fragment in internal_urls, normalized_paths => done externally.pull/513/head
parent
f0211029ef
commit
b1ecc6c964
@ -0,0 +1,38 @@
|
||||
module Docs
|
||||
class Immutablejs
|
||||
class CleanHtmlFilter < Filter
|
||||
def call
|
||||
# Skip the container "div"
|
||||
@doc = at_css('div')
|
||||
|
||||
# Remove data-reactid attributes for cleaner html
|
||||
css('*[data-reactid]').each do |reactEl|
|
||||
reactEl.delete 'data-reactid'
|
||||
end
|
||||
|
||||
# Add id to member label, so we can navigate among them
|
||||
css('h3.memberLabel').each do |memberLabel|
|
||||
memberLabel['id'] = memberLabel.content.strip.chomp('()')
|
||||
end
|
||||
|
||||
|
||||
css('a').each do |link|
|
||||
# Remove "/" from the start
|
||||
link['href'] = link['href'].gsub(/^(#)?\//, '')
|
||||
|
||||
# We need to convert links - from Iterable/butLast to Iterable#butLast
|
||||
link['href'] = link['href'].split('/').join('#')
|
||||
end
|
||||
|
||||
# Replace code blocks tag code with pre, and add stylings.
|
||||
css('code.codeBlock').each do |codeBlock|
|
||||
codeBlock.name = 'pre'
|
||||
codeBlock['data-language'] = 'javascript'
|
||||
codeBlock['class'] = 'language-javascript'
|
||||
end
|
||||
|
||||
doc
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,31 @@
|
||||
module Docs
|
||||
class Immutablejs
|
||||
class EntriesFilter < Docs::EntriesFilter
|
||||
def name
|
||||
typeHeader = at_css('h1.typeHeader')
|
||||
return typeHeader.content if typeHeader
|
||||
end
|
||||
|
||||
def type
|
||||
typeHeader = at_css('h1.typeHeader')
|
||||
return typeHeader.content if typeHeader
|
||||
|
||||
# TODO: Is this ok? This the index page.. I don't think it should have it's own type..
|
||||
nil
|
||||
end
|
||||
|
||||
|
||||
def additional_entries
|
||||
if current_url.fragment.nil?
|
||||
return []
|
||||
end
|
||||
|
||||
css('h3.memberLabel').map do |memberLabel|
|
||||
entry_name = "#{type}##{memberLabel.content}"
|
||||
[entry_name, memberLabel.content.chomp('()')]
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,23 @@
|
||||
module Docs
|
||||
class Immutablejs
|
||||
class InternalUrlsFilter < Docs::InternalUrlsFilter
|
||||
def update_and_follow_links
|
||||
urls = result[:internal_urls] = []
|
||||
update_links do |url|
|
||||
urls << url.to_s
|
||||
end
|
||||
urls.uniq!
|
||||
end
|
||||
|
||||
def to_internal_url(str)
|
||||
if str.start_with? "#/"
|
||||
return nil if not str =~ /^#\/[^\/]+$/
|
||||
str = root_url.to_s + str
|
||||
end
|
||||
|
||||
super(str)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,28 @@
|
||||
module Docs
|
||||
class Immutablejs
|
||||
class NormalizePathsFilter < Docs::NormalizePathsFilter
|
||||
#
|
||||
# Checks if the given url starts with:
|
||||
# "#" or ".#", with means it's a fragment url
|
||||
#
|
||||
FRAGMENT_REGEX = /^(\.)?#/
|
||||
|
||||
def path
|
||||
#
|
||||
# If we have fragment, we want to use as our path.
|
||||
#
|
||||
if current_url.fragment
|
||||
# Remove "/" from the start
|
||||
@path = current_url.fragment.sub(/^\//, '')
|
||||
end
|
||||
|
||||
super
|
||||
end
|
||||
|
||||
def normalize_href href
|
||||
return href.gsub(FRAGMENT_REGEX, '') if href =~ FRAGMENT_REGEX
|
||||
super
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,38 @@
|
||||
module Docs
|
||||
class Immutablejs < UrlScraper
|
||||
self.name = "ImmutableJS"
|
||||
self.type = "immutablejs"
|
||||
self.release = "3.8.1"
|
||||
self.base_url = "https://facebook.github.io/immutable-js/docs/"
|
||||
|
||||
|
||||
#
|
||||
# Replacins core html filters with our own, so we can handle fragments in
|
||||
#
|
||||
html_filters.replace 'internal_urls', 'immutablejs/internal_urls'
|
||||
html_filters.replace 'normalize_paths', 'immutablejs/normalize_paths'
|
||||
|
||||
html_filters.push 'immutablejs/clean_html', 'immutablejs/entries'
|
||||
|
||||
|
||||
options[:attribution] = <<-HTML
|
||||
This documentation is generated from <a href="https://github.com/facebook/immutable-js/blob/master/type-definitions/Immutable.d.ts">Immutable.d.ts</a>.
|
||||
Pull requests and <a href="https://github.com/facebook/immutable-js/issues">Issues</a> welcome.
|
||||
HTML
|
||||
|
||||
stub(/.*/) do |url|
|
||||
#
|
||||
# Reuse capybara sessions, since we scrape all pages..
|
||||
# by visiting 'about:blank' we reset the oldest session.
|
||||
#
|
||||
@capybara ||= load_capybara_selenium
|
||||
@capybara.visit 'about:blank'
|
||||
@capybara.visit url
|
||||
|
||||
@capybara.execute_script 'return document.querySelector(".docContents").innerHTML'
|
||||
end
|
||||
|
||||
|
||||
|
||||
end
|
||||
end
|
After Width: | Height: | Size: 758 B |
After Width: | Height: | Size: 1.5 KiB |
Loading…
Reference in new issue