trio: finish scraper and filters

pull/1002/head
Jasper van Merle 6 years ago
parent 5d224ce579
commit 3a6a158f14

1
.gitignore vendored

@ -7,4 +7,3 @@ public/fonts
public/docs/**/* public/docs/**/*
docs/**/* docs/**/*
!docs/*.md !docs/*.md
vendor

@ -721,6 +721,11 @@ credits = [
'2018 HashiCorp', '2018 HashiCorp',
'MPL', 'MPL',
'https://raw.githubusercontent.com/hashicorp/terraform-website/master/LICENSE.md' 'https://raw.githubusercontent.com/hashicorp/terraform-website/master/LICENSE.md'
], [
'Trio',
'2017 Nathaniel J. Smith',
'MIT',
'https://raw.githubusercontent.com/python-trio/trio/master/LICENSE.MIT'
], [ ], [
'Twig', 'Twig',
'2009-2018 The Twig Team', '2009-2018 The Twig Team',

@ -3,6 +3,7 @@ module Docs
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
@doc = at_css('div[role="main"]') @doc = at_css('div[role="main"]')
css('.section, [itemprop=articleBody]').each do |node| css('.section, [itemprop=articleBody]').each do |node|
node.replace node.children node.replace node.children
end end
@ -11,9 +12,11 @@ module Docs
css('dt').each do |node| css('dt').each do |node|
node.name = 'h3' node.name = 'h3'
if node.parent.classes.include? 'field-list' if node.parent.classes.include? 'field-list'
node.name = 'h4' node.name = 'h4'
node['style'] = 'margin: 0' node['style'] = 'margin: 0'
if node.text == 'Parameters' or node.text == 'Raises' if node.text == 'Parameters' or node.text == 'Raises'
node.next_element.css('strong').each do |n| node.next_element.css('strong').each do |n|
n.name = 'code' n.name = 'code'
@ -21,30 +24,31 @@ module Docs
end end
else else
code = doc.document.create_element 'code' code = doc.document.create_element 'code'
if em = node.at_css('.property') if em = node.at_css('.property')
code.inner_html = "<em>#{em.text.strip}</em> " code.inner_html = "<em>#{em.text.strip}</em> "
em.remove em.remove
end end
code.inner_html += node.inner_text.strip code.inner_html += node.inner_text.strip
node.inner_html = code node.inner_html = code
end end
end end
css('pre').each do |node| css('pre').each do |node|
node.content = node.content.strip
classes = node.parent.parent.classes classes = node.parent.parent.classes
if classes.include? 'highlight-python3' if classes.include? 'highlight-python3'
node['class'] = 'language-python'
node['data-language'] = 'python' node['data-language'] = 'python'
end end
node.parent.parent.replace(node) node.parent.parent.replace(node)
end end
css('.admonition').each do |node| css('.admonition').each do |node|
node.name = 'blockquote' node.name = 'blockquote'
node.at_css('.admonition-title').name = 'h4' node.at_css('.admonition-title').name = 'h4'
# new_node = node.document.create_element 'blockquote'
# new_node.inner_html = node.inner_html
# node.replace new_node
end end
doc doc

@ -11,7 +11,6 @@ module Docs
def additional_entries def additional_entries
css('.descname').each_with_object [] do |node, entries| css('.descname').each_with_object [] do |node, entries|
name = node.text name = node.text
if node.previous.classes.include? 'descclassname' if node.previous.classes.include? 'descclassname'
name = node.previous.text + name name = node.previous.text + name
@ -25,9 +24,11 @@ module Docs
or dl.classes.include?('data') or dl.classes.include?('data')
parent = dl.parent.previous_element parent = dl.parent.previous_element
cls = '' cls = ''
if n = parent.at_css('.descclassname') if n = parent.at_css('.descclassname')
cls += n.text cls += n.text
end end
if n = parent.at_css('.descname') if n = parent.at_css('.descname')
if n.text == "The nursery interface" if n.text == "The nursery interface"
cls += "Nursery." cls += "Nursery."
@ -35,6 +36,7 @@ module Docs
cls += n.text + '.' cls += n.text + '.'
end end
end end
name = cls + name name = cls + name
end end

@ -1,8 +1,8 @@
module Docs module Docs
class Trio < UrlScraper class Trio < UrlScraper
self.type = 'simple' self.type = 'simple'
self.release = '0.11' self.release = '0.12.1'
self.base_url = 'https://trio.readthedocs.io/en/latest/' self.base_url = 'https://trio.readthedocs.io/en/v0.12.1/'
self.root_path = 'index.html' self.root_path = 'index.html'
self.links = { self.links = {
home: 'https://trio.readthedocs.io/', home: 'https://trio.readthedocs.io/',
@ -19,9 +19,13 @@ module Docs
] ]
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; 2017-2019 Nathaniel J. Smith<br> &copy; 2017 Nathaniel J. Smith<br>
Licensed under MIT and Apache2. Licensed under the MIT License.
HTML HTML
def get_latest_version(opts)
doc = fetch_doc('https://trio.readthedocs.io/en/stable/', opts)
doc.at_css('.rst-other-versions a[href^="/en/v"]').content[1..-1]
end
end end
end end

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Loading…
Cancel
Save