Add HTTP documentation from MDN

pull/566/merge
Thibaut Courouble 8 years ago
parent 9a97067e06
commit 8a59a660cf

@ -1,5 +1,8 @@
[
[
"2017-01-22",
"New <a href=\"/http/\">HTTP</a> documentation (thanks Mozilla)"
], [
"2016-12-04",
"New documentations: <a href=\"/sqlite/\">SQLite</a>, <a href=\"/codeception/\">Codeception</a> and <a href=\"/codeceptjs/\">CodeceptJS</a>"
], [

@ -180,7 +180,7 @@ credits = [
'Apache',
'https://raw.githubusercontent.com/apache/cordova-docs/master/LICENSE'
], [
'CSS<br>DOM<br>HTML<br>JavaScript<br>SVG<br>XPath',
'CSS<br>DOM<br>HTTP<br>HTML<br>JavaScript<br>SVG<br>XPath',
'2005-2017 Mozilla Developer Network and individual contributors',
'CC BY-SA',
'https://creativecommons.org/licenses/by-sa/2.5/'

@ -2,11 +2,29 @@ module Docs
class Http
class CleanHtmlFilter < Filter
def call
if root_page?
doc.inner_html = '<h1>Hypertext Transfer Protocol</h1>'
return doc
current_url.host == 'tools.ietf.org' ? ietf : mdn
doc
end
def mdn
css('.column-container', '.column-half').each do |node|
node.before(node.children).remove
end
css('p > code + strong').each do |node|
code = node.previous_element
if code.content =~ /\A[\s\d]+\z/
code.content = "#{code.content.strip} #{node.content.strip}"
node.remove
end
end
css('strong > code').each do |node|
node.parent.before(node.parent.children).remove
end
end
def ietf
doc.child.remove while doc.child.name != 'pre'
css('span.grey', '.invisible', '.noprint', 'a[href^="#page-"]').remove
@ -33,8 +51,6 @@ module Docs
html.remove! %r[\.{2,}$]
html.gsub! %r[(^\n$){3,}], "\n"
doc.inner_html = %(<div class="_rfc-pre">#{html}</div>)
doc
end
end
end

@ -2,95 +2,40 @@ module Docs
class Http
class EntriesFilter < Docs::EntriesFilter
def get_name
name = at_css('h1').content
name.remove! %r{\A.+\:}
name.remove! %r{\A.+\-\-}
"#{rfc}: #{name.strip}"
if current_url.host == 'tools.ietf.org'
name = at_css('h1').content
name.remove! %r{\A.+\:}
name.remove! %r{\A.+\-\-}
rfc = slug.sub('rfc', 'RFC ')
"#{rfc}: #{name.strip}"
elsif slug.start_with?('Status')
at_css('code').content
else
name = super
name.remove! %r{\A\w+\.}
name.remove! 'Basics of HTTP.'
name.sub! 'Content-Security-Policy.', 'CSP.'
name.sub! '.', ': '
name.sub! '1: x', '1.x'
name
end
end
def get_type
'RFC'
end
def rfc
slug.sub('rfc', 'RFC ')
end
SECTIONS = {
'rfc2616' => [
[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15],
[14],
[]
],
'rfc4918' => [
[],
[11],
[]
],
'rfc7230' => [
(2..9).to_a,
[],
[]
],
'rfc7231' => [
[3, 8, 9],
[],
[4, 5, 6, 7]
],
'rfc7232' => [
[5, 6, 7, 8],
[2, 3, 4],
[]
],
'rfc7233' => [
[5, 6],
[2, 3, 4],
[]
],
'rfc7234' => [
[3, 6, 7, 8],
[4, 5],
[]
],
'rfc7235' => [
[2, 5, 6],
[3, 4],
[]
]
}
LEVEL_1 = /\A(\d+)\z/
LEVEL_2 = /\A(\d+)\.\d+\z/
LEVEL_3 = /\A(\d+)\.\d+\.\d+\z/
def additional_entries
return [] if root_page?
type = nil
css('a[href^="#section-"]').each_with_object([]) do |node, entries|
id = node['href'].remove('#')
break entries if entries.any? { |e| e[1] == id }
content = node.next.content.strip
content.remove! %r{\s*\.+\d*\z}
content.remove! %r{\A[\.\s]+}
name = "#{content} (#{rfc})"
number = node.content.strip
if number =~ LEVEL_1
if SECTIONS[slug][0].include?($1.to_i)
entries << [name, id, self.name]
end
type = content.sub(/\ Definitions\z/, 's')
type = 'Request Header Fields' if type.include?('Header Fields') && type.exclude?('Response')
type = 'Response Status Codes' if type.include?('Status Codes')
type = self.name unless type.start_with?('Request ') || type.start_with?('Response ')
elsif (number =~ LEVEL_2 && SECTIONS[slug][1].include?($1.to_i)) ||
(number =~ LEVEL_3 && SECTIONS[slug][2].include?($1.to_i))
entries << [name, id, (name =~ /\A\d\d\d/ ? 'Response Status Codes' : type )]
end
return 'RFC' if current_url.host == 'tools.ietf.org'
if slug.start_with?('Headers/Content-Security-Policy')
'CSP'
elsif slug.start_with?('Headers')
'Headers'
elsif slug.start_with?('Methods')
'Methods'
elsif slug.start_with?('Status')
'Status'
elsif slug.start_with?('Basics_of_HTTP')
'Guides: Basics'
else
'Guides'
end
end
end

@ -2,6 +2,8 @@ module Docs
class Mdn
class ContributeLinkFilter < Filter
def call
return html if current_url.host != 'developer.mozilla.org'
html << <<-HTML.strip_heredoc
<div class="_attribution">
<p class="_attribution-p">
@ -9,6 +11,7 @@ module Docs
</p>
</div>
HTML
html
end
end

@ -1,17 +1,40 @@
module Docs
class Http < UrlScraper
class Http < Mdn
include MultipleBaseUrls
self.name = 'HTTP'
self.type = 'rfc'
self.base_url = 'https://tools.ietf.org/html/'
self.initial_paths = %w(rfc2616 rfc4918 rfc7230 rfc7231
rfc7232 rfc7233 rfc7234 rfc7235)
self.base_urls = ['https://developer.mozilla.org/en-US/docs/Web/HTTP', 'https://tools.ietf.org/html/']
html_filters.push 'http/clean_html', 'http/entries', 'title'
html_filters.push 'http/clean_html', 'http/entries'
options[:root_title] = 'HTTP'
options[:title] = ->(filter) {
filter.current_url.host == 'tools.ietf.org' ? false : filter.default_title
}
options[:container] = ->(filter) {
filter.current_url.host == 'tools.ietf.org' ? '.content' : nil
}
options[:skip_links] = ->(filter) {
filter.current_url.host == 'tools.ietf.org' ? true : false
}
options[:attribution] = ->(filter) {
if filter.current_url.host == 'tools.ietf.org'
"&copy; document authors. All rights reserved."
else
Docs::Mdn.options[:attribution]
end
}
options[:container] = '.content'
options[:skip_links] = true
options[:attribution] = <<-HTML
&copy; document authors. All rights reserved.
HTML
def initial_urls
%w(https://developer.mozilla.org/en-US/docs/Web/HTTP
https://tools.ietf.org/html/rfc2616
https://tools.ietf.org/html/rfc4918
https://tools.ietf.org/html/rfc7230
https://tools.ietf.org/html/rfc7231
https://tools.ietf.org/html/rfc7232
https://tools.ietf.org/html/rfc7233
https://tools.ietf.org/html/rfc7234
https://tools.ietf.org/html/rfc7235)
end
end
end

Loading…
Cancel
Save