trio: finish scraper and filters

pull/1002/head
Jasper van Merle 6 years ago
parent 5d224ce579
commit 3a6a158f14

1
.gitignore vendored

@ -7,4 +7,3 @@ public/fonts
public/docs/**/*
docs/**/*
!docs/*.md
vendor

@ -721,6 +721,11 @@ credits = [
'2018 HashiCorp',
'MPL',
'https://raw.githubusercontent.com/hashicorp/terraform-website/master/LICENSE.md'
], [
'Trio',
'2017 Nathaniel J. Smith',
'MIT',
'https://raw.githubusercontent.com/python-trio/trio/master/LICENSE.MIT'
], [
'Twig',
'2009-2018 The Twig Team',

@ -3,6 +3,7 @@ module Docs
class CleanHtmlFilter < Filter
def call
@doc = at_css('div[role="main"]')
css('.section, [itemprop=articleBody]').each do |node|
node.replace node.children
end
@ -11,9 +12,11 @@ module Docs
css('dt').each do |node|
node.name = 'h3'
if node.parent.classes.include? 'field-list'
node.name = 'h4'
node['style'] = 'margin: 0'
if node.text == 'Parameters' or node.text == 'Raises'
node.next_element.css('strong').each do |n|
n.name = 'code'
@ -21,30 +24,31 @@ module Docs
end
else
code = doc.document.create_element 'code'
if em = node.at_css('.property')
code.inner_html = "<em>#{em.text.strip}</em> "
em.remove
end
code.inner_html += node.inner_text.strip
node.inner_html = code
end
end
css('pre').each do |node|
node.content = node.content.strip
classes = node.parent.parent.classes
if classes.include? 'highlight-python3'
node['class'] = 'language-python'
node['data-language'] = 'python'
end
node.parent.parent.replace(node)
end
css('.admonition').each do |node|
node.name = 'blockquote'
node.at_css('.admonition-title').name = 'h4'
# new_node = node.document.create_element 'blockquote'
# new_node.inner_html = node.inner_html
# node.replace new_node
end
doc

@ -11,7 +11,6 @@ module Docs
def additional_entries
css('.descname').each_with_object [] do |node, entries|
name = node.text
if node.previous.classes.include? 'descclassname'
name = node.previous.text + name
@ -25,9 +24,11 @@ module Docs
or dl.classes.include?('data')
parent = dl.parent.previous_element
cls = ''
if n = parent.at_css('.descclassname')
cls += n.text
end
if n = parent.at_css('.descname')
if n.text == "The nursery interface"
cls += "Nursery."
@ -35,6 +36,7 @@ module Docs
cls += n.text + '.'
end
end
name = cls + name
end

@ -1,8 +1,8 @@
module Docs
class Trio < UrlScraper
self.type = 'simple'
self.release = '0.11'
self.base_url = 'https://trio.readthedocs.io/en/latest/'
self.release = '0.12.1'
self.base_url = 'https://trio.readthedocs.io/en/v0.12.1/'
self.root_path = 'index.html'
self.links = {
home: 'https://trio.readthedocs.io/',
@ -19,9 +19,13 @@ module Docs
]
options[:attribution] = <<-HTML
&copy; 2017-2019 Nathaniel J. Smith<br>
Licensed under MIT and Apache2.
&copy; 2017 Nathaniel J. Smith<br>
Licensed under the MIT License.
HTML
def get_latest_version(opts)
doc = fetch_doc('https://trio.readthedocs.io/en/stable/', opts)
doc.at_css('.rst-other-versions a[href^="/en/v"]').content[1..-1]
end
end
end

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Loading…
Cancel
Save