Finish Babel scraper

pull/712/merge
Thibaut Courouble 7 years ago
parent 1f2030de24
commit 7731a599e1

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 40 KiB

@ -1,7 +1,7 @@
[
[
"2018-2-4",
"New documentations: <a href=\"/jekyll/\">Jekyll</a> and <a href=\"/jsdoc/\">JSDoc</a>"
"New documentations: <a href=\"/babel/\">Babel</a>, <a href=\"/jekyll/\">Jekyll</a> and <a href=\"/jsdoc/\">JSDoc</a>"
], [
"2017-11-26",
"New documentations: <a href=\"/bluebird/\">Bluebird</a>, <a href=\"/eslint/\">ESLint</a> and <a href=\"/homebrew/\">Homebrew</a>"

@ -105,6 +105,11 @@ credits = [
'2010-2017 Caolan McMahon',
'MIT',
'https://raw.githubusercontent.com/caolan/async/master/LICENSE'
], [
'Babel',
'2018 Sebastian McKenzie',
'MIT',
'https://raw.githubusercontent.com/babel/website/master/LICENSE'
], [
'Backbone.js',
'2010-2016 Jeremy Ashkenas, DocumentCloud',

@ -35,7 +35,6 @@
'pages/angularjs',
'pages/apache',
'pages/async',
'pages/babel',
'pages/bootstrap',
'pages/c',
'pages/cakephp',

@ -35,7 +35,6 @@
'pages/angularjs',
'pages/apache',
'pages/async',
'pages/babel',
'pages/bootstrap',
'pages/c',
'pages/cakephp',

@ -169,3 +169,4 @@
._icon-eslint:before { background-position: -9rem -2rem; @extend %doc-icon-2; }
._icon-homebrew:before { background-position: 0 -3rem; @extend %doc-icon-2; }
._icon-jekyll:before { background-position: -1rem -3rem; @extend %doc-icon-2; }
._icon-babel:before { background-position: -2rem -3rem; @extend %doc-icon-2; }

@ -1,10 +0,0 @@
._babel {
@extend %simple;
._note {
h1, h2, h3, h4, h5, h6 {
&:first-child {
margin: 0.5em 0;
}
}
}
}

@ -2,72 +2,34 @@ module Docs
class Babel
class CleanHtmlFilter < Filter
def call
css('.btn-clipboard').remove
css('div.highlighter-rouge').each do |node|
pre = node.at_css('pre')
# copy over the highlighting metadata
match = /language-(\w+)/.match(node['class'])
if match
lang = match[1]
if lang == 'sh'
lang = 'bash'
end
pre['class'] = nil
pre['data-language'] = lang
end
# Remove the server-rendered syntax highlighting
code = pre.at_css('code')
code.content = code.text
# Remove the div.highlighter-rouge and div.highlight wrapping the <pre>
node.add_next_sibling pre
node.remove
if root_page?
doc.inner_html = '<h1>Babel</h1>'
return doc
end
header = at_css('.docs-header .col-md-12')
@doc = at_css('.docs-content')
doc.prepend_child(header)
css('blockquote').each do |node|
node.name = 'div'
node['class'] = '_note'
end
css('.btn-clipboard', '.package-links').remove
css((1..6).map { |n| "h#{n}" }).each do |header|
return unless header.at_css('a')
header.content = header.at_css('a').content
css('.col-md-12', 'h1 a', 'h2 a', 'h3 a', 'h4 a', 'h5 a', 'h5 a').each do |node|
node.before(node.children).remove
end
css('div.highlighter-rouge').each do |node|
pre = node.at_css('pre')
header = doc # .docs-content
.parent # .row
.parent # .container
.previous_element # .docs_header
lang = node['class'][/language-(\w+)/, 1]
lang = 'bash' if lang == 'sh'
pre['data-language'] = lang
toc = doc # .docs-content
.parent # .row
.at_css('.sidebar')
toc['class'] = '_toc'
toc.css('a').each do |a|
a['class'] = '_toc-link'
a.parent.remove if a.content == 'Community Discussion'
pre.remove_attribute('class')
pre.content = pre.content
node.replace(pre)
end
toc.css('ul').attr 'class', '_toc-list'
h1 = header.at_css('h1')
h1.content = h1.content
.titleize
.sub(/\bEnv\b/, 'env')
.sub(/\.[A-Z]/) { |s| s.downcase }
.sub(/\.babelrc/i, '.babelrc')
.sub('Common Js', 'CommonJS')
.sub('J Script', 'JScript')
.sub(/regexp/i, 'RegExp')
.sub(/api|Es(\d+)|cli|jsx?|[au]md/i) { |s| s.upcase }
doc.children.before toc
doc.children.before header.at_css 'p'
doc.children.before h1
css('code').remove_attr('class')
doc
end

@ -2,29 +2,38 @@ module Docs
class Babel
class EntriesFilter < Docs::EntriesFilter
def get_name
at_css('h1').content.sub /^(minify|syntax)|(transform|preset)$/i, ''
at_css('h1').content
end
def get_type
if subpath.start_with? 'plugins/preset'
if subpath.start_with?('plugins/preset')
'Presets'
elsif subpath.start_with? 'plugins/transform'
elsif subpath.start_with?('plugins/transform')
'Transform Plugins'
elsif subpath.start_with? 'plugins/minify'
elsif subpath.start_with?('plugins/minify')
'Minification'
elsif subpath.start_with? 'plugins/syntax'
elsif subpath.start_with?('plugins/syntax')
'Syntax Plugins'
elsif subpath.start_with? 'plugins'
elsif subpath.start_with?('plugins')
'Plugins'
elsif subpath.start_with? 'usage/'
elsif subpath.start_with?('usage/')
'Usage'
elsif subpath.start_with?('core-packages/')
'Core Packages'
else
'Docs'
'Miscellaneous'
end
end
def path
super
def additional_entries
return [] unless slug.include?('api')
css('h2').each_with_object [] do |node, entries|
name = node.content.strip
next unless name.start_with?('babel.')
name.sub! %r{\(.*}, '()'
entries << [name, node['id']]
end
end
end
end

@ -1,10 +1,9 @@
module Docs
class Babel < UrlScraper
self.type = 'babel'
self.type = 'simple'
self.base_url = 'http://babeljs.io/docs/'
self.root_path = '/plugins/'
self.release = '6.26.0'
self.initial_paths = %w[faq tour usage/babel-register core-packages editors usage/caveats]
self.release = '6.26.1'
self.initial_paths = %w(core-packages/)
self.links = {
home: 'https://babeljs.io/',
code: 'https://github.com/babel/babel'
@ -13,19 +12,15 @@ module Docs
html_filters.push 'babel/clean_html', 'babel/entries'
options[:trailing_slash] = true
options[:container] = '.docs-content'
options[:skip] = %w{setup/ community/videos/}
options[:fix_urls] = ->(url) do
return url unless url.start_with? self.base_url
url.sub %r{/(index\.\w+)?$}, ''
end
options[:skip] = %w{setup/ editors/ community/videos/}
options[:attribution] = <<-HTML
&copy; 2018 Sebastian McKenzie<br>
Licensed under the
<a href="https://github.com/babel/website/blob/master/LICENSE">
MIT License
</a>
Licensed under the MIT License.
HTML
stub '' do
'<div></div>'
end
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 653 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

@ -0,0 +1 @@
https://github.com/babel/website/tree/master/website/static/img
Loading…
Cancel
Save