Finish Ansible scraper

pull/382/head
Thibaut Courouble 9 years ago
parent 16955d1277
commit c17932e811

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB

After

Width:  |  Height:  |  Size: 106 KiB

@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
(result[@_groupFor(type)] ||= []).push(type)
result.filter (e) -> e.length > 0
GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|getting\ started)($|[\s\):])/i
GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started)($|[\s\):])/i
_groupFor: (type) ->
if GUIDES_RGX.test(type.name)

@ -1,7 +1,7 @@
[
[
"2016-03-06",
"New documentation: <a href=\"/tensorflow/\">TensorFlow</a> and <a href=\"/haxe/\">Haxe</a>"
"New documentation: <a href=\"/tensorflow/\">TensorFlow</a>, <a href=\"/haxe/\">Haxe</a> and <a href=\"/ansible/\">Ansible</a>"
], [
"2016-02-28",
"New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>"

@ -79,6 +79,11 @@ credits = [
'2010-2016 Google, Inc.',
'CC BY',
'https://creativecommons.org/licenses/by/4.0/'
], [
'Ansible',
'2012-2016 Michael DeHaan',
'GPLv3',
'https://raw.githubusercontent.com/ansible/ansible/devel/COPYING'
], [
'Apache HTTP Server',
'2016 The Apache Software Foundation',

@ -131,3 +131,4 @@
._icon-influxdata:before { background-position: -5rem -10rem; @extend %darkIconFix !optional; }
._icon-tensorflow:before { background-position: -6rem -10rem; }
._icon-haxe:before { background-position: -7rem -10rem; }
._icon-ansible:before { background-position: -8rem -10rem; @extend %darkIconFix !optional; }

@ -2,16 +2,16 @@ module Docs
class Ansible
class CleanHtmlFilter < Filter
def call
# Remove 'Permalink to this headline'
css('.headerlink').remove
# Make proper table headers
css('th.head').each do |node|
node.name = 'th'
@doc = at_css('#page-content')
css('blockquote > div > pre:first-child:last-child', 'blockquote > div > ul:first-child:last-child').each do |node|
node.ancestors('blockquote').first.before(node).remove
end
css('table').each do |node|
node.remove_attribute('border')
node.remove_attribute('cellpadding')
css('a > em').each do |node|
node.before(node.children).remove
end
doc
end
end

@ -1,62 +1,29 @@
module Docs
class Ansible
class EntriesFilter < Docs::EntriesFilter
TYPES = {
'intro' => 'Basic Topics',
'modules' => 'Basic Topics',
'common' => 'Basic Topics',
'playbooks' => 'Playbooks',
'become' => 'Playbooks',
'test' => 'Playbooks',
'YAMLSyntax' => 'Playbooks',
'list' => 'Module Categories',
'guide' => 'Advanced Topics',
'developing' => 'Advanced Topics',
'galaxy' => 'Advanced Topics'
}
HIDE_SLUGS = [
'playbooks',
'playbooks_special_topics',
'list_of_all_modules.html',
'modules_by_category',
'modules'
]
def get_name
node = at_css('h1')
name = node.content.strip
case
when name.empty?
super
when slug.eql?('modules_intro')
name = 'Modules'
when name.eql?('Introduction')
name = '#Introduction'
when name.eql?('Getting Started')
name = '#Getting Started'
when name.eql?('Introduction To Ad-Hoc Commands')
name = 'Ad-Hoc Commands'
end
name = at_css('h1').content.strip
name.remove! "\u{00B6}"
name.remove! %r{ \- .*}
name.remove! 'Introduction To '
name.remove! %r{ Guide\z}
name
end
def get_type
if HIDE_SLUGS.include?(slug)
type = nil
if slug.include?('module')
if name =~ /\A[a-z]/ && node = css('.toctree-l2.current').last
"Modules: #{node.content.remove(' Modules')}"
else
'Modules'
end
elsif slug.include?('playbook')
'Playbooks'
elsif slug.include?('guide')
'Guides'
else
akey = slug.split('_').first
type = TYPES.key?(akey) ? TYPES[akey] : 'Modules Reference'
'Miscellaneous'
end
type
end
def additional_entries
[]
end
def include_default_entry?
true
end
end
end

@ -16,6 +16,7 @@ module Docs
css('table').each do |node|
node.remove_attribute 'border'
node.remove_attribute 'cellpadding'
end
css('.section').each do |node|

@ -1,30 +1,27 @@
module Docs
class Ansible < UrlScraper
self.name = 'Ansible'
self.type = 'ansible'
self.release = '2.1.0'
self.base_url = 'http://docs.ansible.com/ansible/'
self.root_path = 'intro.html'
self.type = 'sphinx'
self.release = '2.0.1'
self.base_url = 'https://docs.ansible.com/ansible/'
self.links = {
home: 'http://docs.ansible.com',
home: 'https://www.ansible.com/',
code: 'https://github.com/ansible/ansible'
}
html_filters.push 'ansible/clean_html', 'ansible/entries'
html_filters.push 'ansible/entries', 'ansible/clean_html', 'codeigniter/clean_html'
options[:title] = 'Ansible'
options[:container] = '#page-content'
options[:skip] = [
'glossary.html',
'faq.html',
'community.html',
'tower.html',
'quickstart.html'
]
options[:skip] = %w(
glossary.html
faq.html
community.html
tower.html
quickstart.html
list_of_all_modules.html)
options[:attribution] = <<-HTML
&copy; Michael DeHaan<br>
Licensed under the GNU General Public License v.3.
&copy; 2012&ndash;2016 Michael DeHaan<br>
Licensed under the GNU General Public License version 3.
HTML
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 471 B

After

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 946 B

After

Width:  |  Height:  |  Size: 1.2 KiB

@ -0,0 +1 @@
https://www.ansible.com/logos
Loading…
Cancel
Save