Finish Ansible scraper

pull/382/head
Thibaut Courouble 9 years ago
parent 16955d1277
commit c17932e811

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB

After

Width:  |  Height:  |  Size: 106 KiB

@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
(result[@_groupFor(type)] ||= []).push(type) (result[@_groupFor(type)] ||= []).push(type)
result.filter (e) -> e.length > 0 result.filter (e) -> e.length > 0
GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|getting\ started)($|[\s\):])/i GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started)($|[\s\):])/i
_groupFor: (type) -> _groupFor: (type) ->
if GUIDES_RGX.test(type.name) if GUIDES_RGX.test(type.name)

@ -1,7 +1,7 @@
[ [
[ [
"2016-03-06", "2016-03-06",
"New documentation: <a href=\"/tensorflow/\">TensorFlow</a> and <a href=\"/haxe/\">Haxe</a>" "New documentation: <a href=\"/tensorflow/\">TensorFlow</a>, <a href=\"/haxe/\">Haxe</a> and <a href=\"/ansible/\">Ansible</a>"
], [ ], [
"2016-02-28", "2016-02-28",
"New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>" "New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>"

@ -79,6 +79,11 @@ credits = [
'2010-2016 Google, Inc.', '2010-2016 Google, Inc.',
'CC BY', 'CC BY',
'https://creativecommons.org/licenses/by/4.0/' 'https://creativecommons.org/licenses/by/4.0/'
], [
'Ansible',
'2012-2016 Michael DeHaan',
'GPLv3',
'https://raw.githubusercontent.com/ansible/ansible/devel/COPYING'
], [ ], [
'Apache HTTP Server', 'Apache HTTP Server',
'2016 The Apache Software Foundation', '2016 The Apache Software Foundation',

@ -131,3 +131,4 @@
._icon-influxdata:before { background-position: -5rem -10rem; @extend %darkIconFix !optional; } ._icon-influxdata:before { background-position: -5rem -10rem; @extend %darkIconFix !optional; }
._icon-tensorflow:before { background-position: -6rem -10rem; } ._icon-tensorflow:before { background-position: -6rem -10rem; }
._icon-haxe:before { background-position: -7rem -10rem; } ._icon-haxe:before { background-position: -7rem -10rem; }
._icon-ansible:before { background-position: -8rem -10rem; @extend %darkIconFix !optional; }

@ -2,16 +2,16 @@ module Docs
class Ansible class Ansible
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
# Remove 'Permalink to this headline' @doc = at_css('#page-content')
css('.headerlink').remove
# Make proper table headers css('blockquote > div > pre:first-child:last-child', 'blockquote > div > ul:first-child:last-child').each do |node|
css('th.head').each do |node| node.ancestors('blockquote').first.before(node).remove
node.name = 'th'
end end
css('table').each do |node|
node.remove_attribute('border') css('a > em').each do |node|
node.remove_attribute('cellpadding') node.before(node.children).remove
end end
doc doc
end end
end end

@ -1,62 +1,29 @@
module Docs module Docs
class Ansible class Ansible
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
TYPES = {
'intro' => 'Basic Topics',
'modules' => 'Basic Topics',
'common' => 'Basic Topics',
'playbooks' => 'Playbooks',
'become' => 'Playbooks',
'test' => 'Playbooks',
'YAMLSyntax' => 'Playbooks',
'list' => 'Module Categories',
'guide' => 'Advanced Topics',
'developing' => 'Advanced Topics',
'galaxy' => 'Advanced Topics'
}
HIDE_SLUGS = [
'playbooks',
'playbooks_special_topics',
'list_of_all_modules.html',
'modules_by_category',
'modules'
]
def get_name def get_name
node = at_css('h1') name = at_css('h1').content.strip
name = node.content.strip name.remove! "\u{00B6}"
case name.remove! %r{ \- .*}
when name.empty? name.remove! 'Introduction To '
super name.remove! %r{ Guide\z}
when slug.eql?('modules_intro')
name = 'Modules'
when name.eql?('Introduction')
name = '#Introduction'
when name.eql?('Getting Started')
name = '#Getting Started'
when name.eql?('Introduction To Ad-Hoc Commands')
name = 'Ad-Hoc Commands'
end
name name
end end
def get_type def get_type
if HIDE_SLUGS.include?(slug) if slug.include?('module')
type = nil if name =~ /\A[a-z]/ && node = css('.toctree-l2.current').last
"Modules: #{node.content.remove(' Modules')}"
else else
akey = slug.split('_').first 'Modules'
type = TYPES.key?(akey) ? TYPES[akey] : 'Modules Reference'
end
type
end end
elsif slug.include?('playbook')
def additional_entries 'Playbooks'
[] elsif slug.include?('guide')
'Guides'
else
'Miscellaneous'
end end
def include_default_entry?
true
end end
end end
end end

@ -16,6 +16,7 @@ module Docs
css('table').each do |node| css('table').each do |node|
node.remove_attribute 'border' node.remove_attribute 'border'
node.remove_attribute 'cellpadding'
end end
css('.section').each do |node| css('.section').each do |node|

@ -1,30 +1,27 @@
module Docs module Docs
class Ansible < UrlScraper class Ansible < UrlScraper
self.name = 'Ansible' self.name = 'Ansible'
self.type = 'ansible' self.type = 'sphinx'
self.release = '2.1.0' self.release = '2.0.1'
self.base_url = 'http://docs.ansible.com/ansible/' self.base_url = 'https://docs.ansible.com/ansible/'
self.root_path = 'intro.html'
self.links = { self.links = {
home: 'http://docs.ansible.com', home: 'https://www.ansible.com/',
code: 'https://github.com/ansible/ansible' code: 'https://github.com/ansible/ansible'
} }
html_filters.push 'ansible/clean_html', 'ansible/entries' html_filters.push 'ansible/entries', 'ansible/clean_html', 'codeigniter/clean_html'
options[:title] = 'Ansible' options[:skip] = %w(
options[:container] = '#page-content' glossary.html
options[:skip] = [ faq.html
'glossary.html', community.html
'faq.html', tower.html
'community.html', quickstart.html
'tower.html', list_of_all_modules.html)
'quickstart.html'
]
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; Michael DeHaan<br> &copy; 2012&ndash;2016 Michael DeHaan<br>
Licensed under the GNU General Public License v.3. Licensed under the GNU General Public License version 3.
HTML HTML
end end
end end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 471 B

After

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 946 B

After

Width:  |  Height:  |  Size: 1.2 KiB

@ -0,0 +1 @@
https://www.ansible.com/logos
Loading…
Cancel
Save