diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json index e61a924f..01974bf8 100644 --- a/assets/javascripts/news.json +++ b/assets/javascripts/news.json @@ -1,5 +1,8 @@ [ [ + "2017-01-22", + "New HTTP documentation (thanks Mozilla)" + ], [ "2016-12-04", "New documentations: SQLite, Codeception and CodeceptJS" ], [ diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index 4ebd4bea..d8b7a2e9 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -180,7 +180,7 @@ credits = [ 'Apache', 'https://raw.githubusercontent.com/apache/cordova-docs/master/LICENSE' ], [ - 'CSS
DOM
HTML
JavaScript
SVG
XPath', + 'CSS
DOM
HTTP
HTML
JavaScript
SVG
XPath', '2005-2017 Mozilla Developer Network and individual contributors', 'CC BY-SA', 'https://creativecommons.org/licenses/by-sa/2.5/' diff --git a/lib/docs/filters/http/clean_html.rb b/lib/docs/filters/http/clean_html.rb index ffe19857..956233f4 100644 --- a/lib/docs/filters/http/clean_html.rb +++ b/lib/docs/filters/http/clean_html.rb @@ -2,11 +2,29 @@ module Docs class Http class CleanHtmlFilter < Filter def call - if root_page? - doc.inner_html = '

Hypertext Transfer Protocol

' - return doc + current_url.host == 'tools.ietf.org' ? ietf : mdn + doc + end + + def mdn + css('.column-container', '.column-half').each do |node| + node.before(node.children).remove + end + + css('p > code + strong').each do |node| + code = node.previous_element + if code.content =~ /\A[\s\d]+\z/ + code.content = "#{code.content.strip} #{node.content.strip}" + node.remove + end end + css('strong > code').each do |node| + node.parent.before(node.parent.children).remove + end + end + + def ietf doc.child.remove while doc.child.name != 'pre' css('span.grey', '.invisible', '.noprint', 'a[href^="#page-"]').remove @@ -33,8 +51,6 @@ module Docs html.remove! %r[\.{2,}$] html.gsub! %r[(^\n$){3,}], "\n" doc.inner_html = %(
#{html}
) - - doc end end end diff --git a/lib/docs/filters/http/entries.rb b/lib/docs/filters/http/entries.rb index e08c43be..1d4f98c8 100644 --- a/lib/docs/filters/http/entries.rb +++ b/lib/docs/filters/http/entries.rb @@ -2,95 +2,40 @@ module Docs class Http class EntriesFilter < Docs::EntriesFilter def get_name - name = at_css('h1').content - name.remove! %r{\A.+\:} - name.remove! %r{\A.+\-\-} - "#{rfc}: #{name.strip}" + if current_url.host == 'tools.ietf.org' + name = at_css('h1').content + name.remove! %r{\A.+\:} + name.remove! %r{\A.+\-\-} + rfc = slug.sub('rfc', 'RFC ') + "#{rfc}: #{name.strip}" + elsif slug.start_with?('Status') + at_css('code').content + else + name = super + name.remove! %r{\A\w+\.} + name.remove! 'Basics of HTTP.' + name.sub! 'Content-Security-Policy.', 'CSP.' + name.sub! '.', ': ' + name.sub! '1: x', '1.x' + name + end end def get_type - 'RFC' - end - - def rfc - slug.sub('rfc', 'RFC ') - end - - SECTIONS = { - 'rfc2616' => [ - [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15], - [14], - [] - ], - 'rfc4918' => [ - [], - [11], - [] - ], - 'rfc7230' => [ - (2..9).to_a, - [], - [] - ], - 'rfc7231' => [ - [3, 8, 9], - [], - [4, 5, 6, 7] - ], - 'rfc7232' => [ - [5, 6, 7, 8], - [2, 3, 4], - [] - ], - 'rfc7233' => [ - [5, 6], - [2, 3, 4], - [] - ], - 'rfc7234' => [ - [3, 6, 7, 8], - [4, 5], - [] - ], - 'rfc7235' => [ - [2, 5, 6], - [3, 4], - [] - ] - } - - LEVEL_1 = /\A(\d+)\z/ - LEVEL_2 = /\A(\d+)\.\d+\z/ - LEVEL_3 = /\A(\d+)\.\d+\.\d+\z/ - - def additional_entries - return [] if root_page? - type = nil - - css('a[href^="#section-"]').each_with_object([]) do |node, entries| - id = node['href'].remove('#') - break entries if entries.any? { |e| e[1] == id } - - content = node.next.content.strip - content.remove! %r{\s*\.+\d*\z} - content.remove! %r{\A[\.\s]+} - - name = "#{content} (#{rfc})" - number = node.content.strip - - if number =~ LEVEL_1 - if SECTIONS[slug][0].include?($1.to_i) - entries << [name, id, self.name] - end - - type = content.sub(/\ Definitions\z/, 's') - type = 'Request Header Fields' if type.include?('Header Fields') && type.exclude?('Response') - type = 'Response Status Codes' if type.include?('Status Codes') - type = self.name unless type.start_with?('Request ') || type.start_with?('Response ') - elsif (number =~ LEVEL_2 && SECTIONS[slug][1].include?($1.to_i)) || - (number =~ LEVEL_3 && SECTIONS[slug][2].include?($1.to_i)) - entries << [name, id, (name =~ /\A\d\d\d/ ? 'Response Status Codes' : type )] - end + return 'RFC' if current_url.host == 'tools.ietf.org' + + if slug.start_with?('Headers/Content-Security-Policy') + 'CSP' + elsif slug.start_with?('Headers') + 'Headers' + elsif slug.start_with?('Methods') + 'Methods' + elsif slug.start_with?('Status') + 'Status' + elsif slug.start_with?('Basics_of_HTTP') + 'Guides: Basics' + else + 'Guides' end end end diff --git a/lib/docs/filters/mdn/contribute_link.rb b/lib/docs/filters/mdn/contribute_link.rb index 66d8ce79..1444bdff 100644 --- a/lib/docs/filters/mdn/contribute_link.rb +++ b/lib/docs/filters/mdn/contribute_link.rb @@ -2,6 +2,8 @@ module Docs class Mdn class ContributeLinkFilter < Filter def call + return html if current_url.host != 'developer.mozilla.org' + html << <<-HTML.strip_heredoc

@@ -9,6 +11,7 @@ module Docs

HTML + html end end diff --git a/lib/docs/scrapers/http.rb b/lib/docs/scrapers/http.rb index 2bca45dd..d1c83577 100644 --- a/lib/docs/scrapers/http.rb +++ b/lib/docs/scrapers/http.rb @@ -1,17 +1,40 @@ module Docs - class Http < UrlScraper + class Http < Mdn + include MultipleBaseUrls + self.name = 'HTTP' - self.type = 'rfc' - self.base_url = 'https://tools.ietf.org/html/' - self.initial_paths = %w(rfc2616 rfc4918 rfc7230 rfc7231 - rfc7232 rfc7233 rfc7234 rfc7235) + self.base_urls = ['https://developer.mozilla.org/en-US/docs/Web/HTTP', 'https://tools.ietf.org/html/'] + + html_filters.push 'http/clean_html', 'http/entries', 'title' - html_filters.push 'http/clean_html', 'http/entries' + options[:root_title] = 'HTTP' + options[:title] = ->(filter) { + filter.current_url.host == 'tools.ietf.org' ? false : filter.default_title + } + options[:container] = ->(filter) { + filter.current_url.host == 'tools.ietf.org' ? '.content' : nil + } + options[:skip_links] = ->(filter) { + filter.current_url.host == 'tools.ietf.org' ? true : false + } + options[:attribution] = ->(filter) { + if filter.current_url.host == 'tools.ietf.org' + "© document authors. All rights reserved." + else + Docs::Mdn.options[:attribution] + end + } - options[:container] = '.content' - options[:skip_links] = true - options[:attribution] = <<-HTML - © document authors. All rights reserved. - HTML + def initial_urls + %w(https://developer.mozilla.org/en-US/docs/Web/HTTP + https://tools.ietf.org/html/rfc2616 + https://tools.ietf.org/html/rfc4918 + https://tools.ietf.org/html/rfc7230 + https://tools.ietf.org/html/rfc7231 + https://tools.ietf.org/html/rfc7232 + https://tools.ietf.org/html/rfc7233 + https://tools.ietf.org/html/rfc7234 + https://tools.ietf.org/html/rfc7235) + end end end