From 9386a2d368491ada5aac58a29276e25d89b558ef Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Sun, 4 Sep 2016 12:35:42 -0400 Subject: [PATCH] Improve Ember.js scraper --- assets/stylesheets/global/_classes.scss | 15 +++++ assets/stylesheets/pages/_bootstrap.scss | 12 +--- assets/stylesheets/pages/_ember.scss | 51 ++++------------ lib/docs/filters/ember/clean_html.rb | 75 ++++++++++++++++++------ lib/docs/filters/ember/entries.rb | 58 ++++++++++-------- lib/docs/scrapers/ember.rb | 43 ++++++++++++-- 6 files changed, 157 insertions(+), 97 deletions(-) diff --git a/assets/stylesheets/global/_classes.scss b/assets/stylesheets/global/_classes.scss index d928e464..f75c29e2 100644 --- a/assets/stylesheets/global/_classes.scss +++ b/assets/stylesheets/global/_classes.scss @@ -47,6 +47,21 @@ @extend %heading-box; } +%pre-heading { + padding: .375rem .625rem; + line-height: 1.5; + border-bottom-left-radius: 0; + border-bottom-right-radius: 0; + @extend %heading-box; + + + pre { + border-top-left-radius: 0; + border-top-right-radius: 0; + border-top: 0; + margin-top: 0; + } +} + // // Notes // diff --git a/assets/stylesheets/pages/_bootstrap.scss b/assets/stylesheets/pages/_bootstrap.scss index 06931ccf..0deb79b7 100644 --- a/assets/stylesheets/pages/_bootstrap.scss +++ b/assets/stylesheets/pages/_bootstrap.scss @@ -15,22 +15,14 @@ .text-danger { @extend %label, %label-red; } - .bs-example { + p.bs-example { padding: .375rem .625rem; line-height: 1.5; @extend %heading-box; } div.bs-example { - border-bottom-left-radius: 0; - border-bottom-right-radius: 0; - - + pre { - border-top-left-radius: 0; - border-top-right-radius: 0; - border-top: 0; - margin-top: 0; - } + @extend %pre-heading; } a.thumbnail { diff --git a/assets/stylesheets/pages/_ember.scss b/assets/stylesheets/pages/_ember.scss index cea85417..40771e6e 100644 --- a/assets/stylesheets/pages/_ember.scss +++ b/assets/stylesheets/pages/_ember.scss @@ -1,53 +1,22 @@ ._ember { - > .class-info { @extend %note, %note-blue; } - > .class-info > p { margin: 0; } + @extend %simple; - > .description > h2, > .description > h3 { font-size: 1rem; } + blockquote.class-info { @extend %note-blue; } + blockquote.class-info > p { margin: 0; } - .item-entry { padding-left: 1rem; } + .pre-title { @extend %pre-heading; } - .title { - margin-left: -1rem; - @extend %block-heading; - - > h2, > .args, > .flag { - display: inline-block; - vertical-align: top; - margin: 0; - line-height: inherit; - font-size: inherit; - } - - > .flag { // "static" - margin-left: .5em; - color: $textColorLight; - } - - > .type { - float: right; - font-weight: normal; - } - } - - .meta { // "defined in" + h2 > .flag, h2 > .type { + margin-left: .5em; color: $textColorLight; - margin-bottom: 1em; + font-weight: normal; } - .return, .params { - margin-top: 1.5em; + h2 > .type { float: right; } - > h3 { - display: inline-block; - vertical-align: top; - margin: 0 0 1em; - font-size: inherit; - @extend %label, %label-blue; - } - } + .meta { color: $textColorLight; } dl { margin: 0 1em; } dt + dt, dd + dt { margin-top: .5em; } - - p > code { @extend %label; } + dt > code { @extend %label; } } diff --git a/lib/docs/filters/ember/clean_html.rb b/lib/docs/filters/ember/clean_html.rb index 21d82ada..641029fe 100644 --- a/lib/docs/filters/ember/clean_html.rb +++ b/lib/docs/filters/ember/clean_html.rb @@ -2,7 +2,23 @@ module Docs class Ember class CleanHtmlFilter < Filter def call - root_page? ? root : other + css('hr', '.edit-page').remove + + # Remove code highlighting + css('.highlight').each do |node| + node.before(%(
#{node.at_css('thead').content.strip}
)) if node.at_css('thead') + node.content = node.at_css('.code pre').content + node.name = 'pre' + node['data-language'] = node['class'][/(javascript|js|html|hbs|handlebars)/, 1] + node['data-language'] = node['data-language'].sub(/(hbs|handlebars)/, 'html') + end + + if base_url.path.start_with?('/api') + root_page? ? root : api + else + guide + end + doc end @@ -28,8 +44,8 @@ module Docs end end - def other - css(*%w(hr .edit-page #api-options .toc-anchor .inherited .protected .private .deprecated)).remove + def api + css('#api-options', '.toc-anchor', '.inherited').remove # Remove tabs and "Index" css('.tabs').each do |node| @@ -41,24 +57,45 @@ module Docs css('.method', '.property', '.event').remove_attr('id') css('h3[data-id]').each do |node| - # Put id attributes on headings - node.name = 'h2' - node['id'] = node['data-id'] - node.remove_attribute 'data-id' - node.content = node.content - - # Move headings, span.args, etc. into a div.title - div = Nokogiri::XML::Node.new 'div', doc - div['class'] = 'title' - node.before(div).parent = div - div.add_child(div.next_element) while div.next_element.name == 'span' + heading = Nokogiri::XML::Node.new 'h2', doc + heading['id'] = node['data-id'] + node.before(heading).remove + heading.content = node.content + heading.add_child(heading.next_element) while heading.next_element.name == 'span' end - # Remove code highlighting - css('.highlight').each do |node| - node.content = node.at_css('.code pre').content - node.name = 'pre' - node['data-language'] = node['class'][/(javascript|js|html)/, 1] + css('> .class-info').each do |node| + node.name = 'blockquote' + end + + css('div.meta').each do |node| + node.name = 'p' + end + + css('span.type').each do |node| + node.name = 'code' + end + + css('.pane', '.item-entry').each do |node| + node.before(node.children).remove + end + end + + def guide + @doc = at_css('article') + + css('.previous-guide', '.next-guide').remove + + css('img').each do |node| + node['src'] = node['src'].sub('https://guides.emberjs.com/', base_url.to_s) + end + + css('h3, h4, h5').each do |node| + node.name = node.name.sub(/\d/) { |i| i.to_i - 1 } + end unless at_css('h2') + + css('blockquote > p > em').each do |node| + node.before(node.children).remove end end end diff --git a/lib/docs/filters/ember/entries.rb b/lib/docs/filters/ember/entries.rb index 5fd08a59..301618d0 100644 --- a/lib/docs/filters/ember/entries.rb +++ b/lib/docs/filters/ember/entries.rb @@ -2,40 +2,52 @@ module Docs class Ember class EntriesFilter < Docs::EntriesFilter def get_name - name = at_css('.api-header').content.split.first - # Remove "Ember." prefix if the next character is uppercase - name.sub! %r{\AEmber\.([A-Z])(?!EATURES)}, '\1' - name == 'Handlebars.helpers' ? 'Handlebars Helpers' : name + if base_url.path.start_with?('/api') + name = at_css('.api-header').content.split.first + # Remove "Ember." prefix if the next character is uppercase + name.sub! %r{\AEmber\.([A-Z])(?!EATURES)}, '\1' + name == 'Handlebars.helpers' ? 'Handlebars Helpers' : name + else + name = at_css('article h1').content.remove('Edit Page').strip + name = at_css('li.toc-level-0.selected > a').content if name == 'Introduction' + name + end end def get_type - group = if css('p').any? { |node| node.content.include?('PRIVATE') } - 'Private' - elsif css('p').any? { |node| node.content.include?('DEPRECATED') } - 'Deprecated' - end - - if at_css('.api-header').content.include?('Module') - 'Modules' - elsif name.start_with? 'DS' - group ? "Data (#{group})" : 'Data' - elsif name.start_with? 'RSVP' - 'RSVP' - elsif name.start_with? 'Test' - 'Test' + if base_url.path.start_with?('/api') + if at_css('.api-header').content.include?('Module') + 'Modules' + elsif name.start_with? 'DS' + 'Data' + elsif name.start_with? 'RSVP' + 'RSVP' + elsif name.start_with? 'Test' + 'Test' + elsif name.start_with?('Ember') + name.split('.')[0..1].join('.') + else + name.split('.').first + end else - group || name + if node = at_css('li.toc-level-0.selected > a') + "Guide: #{node.content.strip}" + else + 'Guide' + end end end def additional_entries - css('.item-entry').map do |node| - heading = node.at_css('h2') + return [] unless base_url.path.start_with?('/api') + + css('.item-entry:not(.inherited)').map do |node| + heading = node.at_css('h3[data-id]') name = heading.content.strip if self.name == 'Handlebars Helpers' name << ' (handlebars helper)' - next [name, heading['id']] + next [name, heading['data-id']] end # Give their own type to "Ember.platform", "Ember.run", etc. @@ -51,7 +63,7 @@ module Docs name << '()' if node['class'].include? 'method' name << ' event' if node['class'].include? 'event' - [name, heading['id'], type] + [name, heading['data-id'], type] end end end diff --git a/lib/docs/scrapers/ember.rb b/lib/docs/scrapers/ember.rb index 9c5040d1..37b2708d 100644 --- a/lib/docs/scrapers/ember.rb +++ b/lib/docs/scrapers/ember.rb @@ -1,32 +1,67 @@ module Docs class Ember < UrlScraper + class << self + attr_accessor :guide_url + end + self.name = 'Ember.js' self.slug = 'ember' self.type = 'ember' self.release = '2.7.0' self.base_url = 'http://emberjs.com/api/' + self.guide_url = "https://guides.emberjs.com/v#{self.release}/" + self.initial_urls = [guide_url] self.links = { home: 'http://emberjs.com/', code: 'https://github.com/emberjs/ember.js' } - html_filters.push 'ember/clean_html', 'ember/entries', 'title' + html_filters.push 'ember/entries', 'ember/clean_html', 'title' + + options[:trailing_slash] = false options[:title] = false options[:root_title] = 'Ember.js' options[:container] = ->(filter) do - filter.root_page? ? '#toc-list' : '#content' + if filter.base_url.path.start_with?('/api') + filter.root_page? ? '#toc-list' : '#content' + else + 'main' + end end # Duplicates options[:skip] = %w(classes/String.html data/classes/DS.html) - - options[:skip_patterns] = [/\._/] + options[:skip_patterns] = [/\._/, /contributing/] options[:attribution] = <<-HTML © 2016 Yehuda Katz, Tom Dale and Ember.js contributors
Licensed under the MIT License. HTML + + def guide_url + @guide_url ||= URL.parse(self.class.guide_url) + end + + private + + def process_url?(url) + base_url.contains?(url) || guide_url.contains?(url) + end + + def process_response(response) + original_scheme = @base_url.scheme + original_host = @base_url.host + original_path = @base_url.path + @base_url.scheme = response.effective_url.scheme + @base_url.host = response.effective_url.host + @base_url.path = response.effective_url.path[/\A\/v[\d\.]+\//, 0] || '/api/' + super + ensure + @base_url.scheme = original_scheme + @base_url.host = original_host + @base_url.path = original_path + end end end