Improve Ember.js scraper

pull/481/head
Thibaut Courouble 9 years ago
parent 3a7cc9c535
commit 9386a2d368

@ -47,6 +47,21 @@
@extend %heading-box;
}
%pre-heading {
padding: .375rem .625rem;
line-height: 1.5;
border-bottom-left-radius: 0;
border-bottom-right-radius: 0;
@extend %heading-box;
+ pre {
border-top-left-radius: 0;
border-top-right-radius: 0;
border-top: 0;
margin-top: 0;
}
}
//
// Notes
//

@ -15,22 +15,14 @@
.text-danger { @extend %label, %label-red; }
.bs-example {
p.bs-example {
padding: .375rem .625rem;
line-height: 1.5;
@extend %heading-box;
}
div.bs-example {
border-bottom-left-radius: 0;
border-bottom-right-radius: 0;
+ pre {
border-top-left-radius: 0;
border-top-right-radius: 0;
border-top: 0;
margin-top: 0;
}
@extend %pre-heading;
}
a.thumbnail {

@ -1,53 +1,22 @@
._ember {
> .class-info { @extend %note, %note-blue; }
> .class-info > p { margin: 0; }
@extend %simple;
> .description > h2, > .description > h3 { font-size: 1rem; }
blockquote.class-info { @extend %note-blue; }
blockquote.class-info > p { margin: 0; }
.item-entry { padding-left: 1rem; }
.pre-title { @extend %pre-heading; }
.title {
margin-left: -1rem;
@extend %block-heading;
> h2, > .args, > .flag {
display: inline-block;
vertical-align: top;
margin: 0;
line-height: inherit;
font-size: inherit;
}
> .flag { // "static"
margin-left: .5em;
color: $textColorLight;
}
> .type {
float: right;
font-weight: normal;
}
}
.meta { // "defined in"
h2 > .flag, h2 > .type {
margin-left: .5em;
color: $textColorLight;
margin-bottom: 1em;
font-weight: normal;
}
.return, .params {
margin-top: 1.5em;
h2 > .type { float: right; }
> h3 {
display: inline-block;
vertical-align: top;
margin: 0 0 1em;
font-size: inherit;
@extend %label, %label-blue;
}
}
.meta { color: $textColorLight; }
dl { margin: 0 1em; }
dt + dt, dd + dt { margin-top: .5em; }
p > code { @extend %label; }
dt > code { @extend %label; }
}

@ -2,7 +2,23 @@ module Docs
class Ember
class CleanHtmlFilter < Filter
def call
root_page? ? root : other
css('hr', '.edit-page').remove
# Remove code highlighting
css('.highlight').each do |node|
node.before(%(<div class="pre-title"><code>#{node.at_css('thead').content.strip}</code></div>)) if node.at_css('thead')
node.content = node.at_css('.code pre').content
node.name = 'pre'
node['data-language'] = node['class'][/(javascript|js|html|hbs|handlebars)/, 1]
node['data-language'] = node['data-language'].sub(/(hbs|handlebars)/, 'html')
end
if base_url.path.start_with?('/api')
root_page? ? root : api
else
guide
end
doc
end
@ -28,8 +44,8 @@ module Docs
end
end
def other
css(*%w(hr .edit-page #api-options .toc-anchor .inherited .protected .private .deprecated)).remove
def api
css('#api-options', '.toc-anchor', '.inherited').remove
# Remove tabs and "Index"
css('.tabs').each do |node|
@ -41,24 +57,45 @@ module Docs
css('.method', '.property', '.event').remove_attr('id')
css('h3[data-id]').each do |node|
# Put id attributes on headings
node.name = 'h2'
node['id'] = node['data-id']
node.remove_attribute 'data-id'
node.content = node.content
# Move headings, span.args, etc. into a div.title
div = Nokogiri::XML::Node.new 'div', doc
div['class'] = 'title'
node.before(div).parent = div
div.add_child(div.next_element) while div.next_element.name == 'span'
heading = Nokogiri::XML::Node.new 'h2', doc
heading['id'] = node['data-id']
node.before(heading).remove
heading.content = node.content
heading.add_child(heading.next_element) while heading.next_element.name == 'span'
end
# Remove code highlighting
css('.highlight').each do |node|
node.content = node.at_css('.code pre').content
node.name = 'pre'
node['data-language'] = node['class'][/(javascript|js|html)/, 1]
css('> .class-info').each do |node|
node.name = 'blockquote'
end
css('div.meta').each do |node|
node.name = 'p'
end
css('span.type').each do |node|
node.name = 'code'
end
css('.pane', '.item-entry').each do |node|
node.before(node.children).remove
end
end
def guide
@doc = at_css('article')
css('.previous-guide', '.next-guide').remove
css('img').each do |node|
node['src'] = node['src'].sub('https://guides.emberjs.com/', base_url.to_s)
end
css('h3, h4, h5').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i - 1 }
end unless at_css('h2')
css('blockquote > p > em').each do |node|
node.before(node.children).remove
end
end
end

@ -2,40 +2,52 @@ module Docs
class Ember
class EntriesFilter < Docs::EntriesFilter
def get_name
name = at_css('.api-header').content.split.first
# Remove "Ember." prefix if the next character is uppercase
name.sub! %r{\AEmber\.([A-Z])(?!EATURES)}, '\1'
name == 'Handlebars.helpers' ? 'Handlebars Helpers' : name
if base_url.path.start_with?('/api')
name = at_css('.api-header').content.split.first
# Remove "Ember." prefix if the next character is uppercase
name.sub! %r{\AEmber\.([A-Z])(?!EATURES)}, '\1'
name == 'Handlebars.helpers' ? 'Handlebars Helpers' : name
else
name = at_css('article h1').content.remove('Edit Page').strip
name = at_css('li.toc-level-0.selected > a').content if name == 'Introduction'
name
end
end
def get_type
group = if css('p').any? { |node| node.content.include?('PRIVATE') }
'Private'
elsif css('p').any? { |node| node.content.include?('DEPRECATED') }
'Deprecated'
end
if at_css('.api-header').content.include?('Module')
'Modules'
elsif name.start_with? 'DS'
group ? "Data (#{group})" : 'Data'
elsif name.start_with? 'RSVP'
'RSVP'
elsif name.start_with? 'Test'
'Test'
if base_url.path.start_with?('/api')
if at_css('.api-header').content.include?('Module')
'Modules'
elsif name.start_with? 'DS'
'Data'
elsif name.start_with? 'RSVP'
'RSVP'
elsif name.start_with? 'Test'
'Test'
elsif name.start_with?('Ember')
name.split('.')[0..1].join('.')
else
name.split('.').first
end
else
group || name
if node = at_css('li.toc-level-0.selected > a')
"Guide: #{node.content.strip}"
else
'Guide'
end
end
end
def additional_entries
css('.item-entry').map do |node|
heading = node.at_css('h2')
return [] unless base_url.path.start_with?('/api')
css('.item-entry:not(.inherited)').map do |node|
heading = node.at_css('h3[data-id]')
name = heading.content.strip
if self.name == 'Handlebars Helpers'
name << ' (handlebars helper)'
next [name, heading['id']]
next [name, heading['data-id']]
end
# Give their own type to "Ember.platform", "Ember.run", etc.
@ -51,7 +63,7 @@ module Docs
name << '()' if node['class'].include? 'method'
name << ' event' if node['class'].include? 'event'
[name, heading['id'], type]
[name, heading['data-id'], type]
end
end
end

@ -1,32 +1,67 @@
module Docs
class Ember < UrlScraper
class << self
attr_accessor :guide_url
end
self.name = 'Ember.js'
self.slug = 'ember'
self.type = 'ember'
self.release = '2.7.0'
self.base_url = 'http://emberjs.com/api/'
self.guide_url = "https://guides.emberjs.com/v#{self.release}/"
self.initial_urls = [guide_url]
self.links = {
home: 'http://emberjs.com/',
code: 'https://github.com/emberjs/ember.js'
}
html_filters.push 'ember/clean_html', 'ember/entries', 'title'
html_filters.push 'ember/entries', 'ember/clean_html', 'title'
options[:trailing_slash] = false
options[:title] = false
options[:root_title] = 'Ember.js'
options[:container] = ->(filter) do
filter.root_page? ? '#toc-list' : '#content'
if filter.base_url.path.start_with?('/api')
filter.root_page? ? '#toc-list' : '#content'
else
'main'
end
end
# Duplicates
options[:skip] = %w(classes/String.html data/classes/DS.html)
options[:skip_patterns] = [/\._/]
options[:skip_patterns] = [/\._/, /contributing/]
options[:attribution] = <<-HTML
&copy; 2016 Yehuda Katz, Tom Dale and Ember.js contributors<br>
Licensed under the MIT License.
HTML
def guide_url
@guide_url ||= URL.parse(self.class.guide_url)
end
private
def process_url?(url)
base_url.contains?(url) || guide_url.contains?(url)
end
def process_response(response)
original_scheme = @base_url.scheme
original_host = @base_url.host
original_path = @base_url.path
@base_url.scheme = response.effective_url.scheme
@base_url.host = response.effective_url.host
@base_url.path = response.effective_url.path[/\A\/v[\d\.]+\//, 0] || '/api/'
super
ensure
@base_url.scheme = original_scheme
@base_url.host = original_host
@base_url.path = original_path
end
end
end

Loading…
Cancel
Save