From 949417022aa28e1475ca3606fe9390b94910488f Mon Sep 17 00:00:00 2001 From: MasterEnoc Date: Mon, 30 Nov 2020 14:21:08 -0600 Subject: [PATCH] Add Chef version 16 - Add new filters due changes in the page of Chef. - Rename old filters for old version of Chef. --- lib/docs/filters/chef/clean_html.rb | 31 ++++------- lib/docs/filters/chef/clean_html_old.rb | 37 +++++++++++++ lib/docs/filters/chef/entries.rb | 72 +++++++------------------ lib/docs/filters/chef/entries_old.rb | 68 +++++++++++++++++++++++ lib/docs/scrapers/chef.rb | 50 ++++++++++++++--- 5 files changed, 177 insertions(+), 81 deletions(-) create mode 100644 lib/docs/filters/chef/clean_html_old.rb create mode 100644 lib/docs/filters/chef/entries_old.rb diff --git a/lib/docs/filters/chef/clean_html.rb b/lib/docs/filters/chef/clean_html.rb index 364d113e..3076e680 100644 --- a/lib/docs/filters/chef/clean_html.rb +++ b/lib/docs/filters/chef/clean_html.rb @@ -2,33 +2,24 @@ module Docs class Chef class CleanHtmlFilter < Filter def call - @doc = at_css('div[role="main"]') + @doc = at_css('#main-content-col') - css('.headerlink').remove - - css('em', 'div.align-center', 'a[href$=".svg"]').each do |node| - node.before(node.children).remove + if root_page? + css('img').remove end - css('.section').each do |node| - node.first_element_child['id'] = node['id'] if node['id'] - node.before(node.children).remove - end + css('pre').each do |node| + node.remove_attribute('style') - css('tt').each do |node| - node.content = node.content.strip - node.name = 'code' + if !(node.classes.include?('highlight')) + node.add_class('highlight') + node['data-language'] = 'ruby' + end end - css('table[border]').each do |node| - node.remove_attribute('border') - end + css('#feedback').remove - css('div[class*="highlight-"]').each do |node| - node.content = node.content.strip - node.name = 'pre' - node['data-language'] = node['class'][/highlight\-(\w+)/, 1] - end + css('.mini-toc-header').remove doc end diff --git a/lib/docs/filters/chef/clean_html_old.rb b/lib/docs/filters/chef/clean_html_old.rb new file mode 100644 index 00000000..ec5e3fbe --- /dev/null +++ b/lib/docs/filters/chef/clean_html_old.rb @@ -0,0 +1,37 @@ +module Docs + class Chef + class CleanHtmlOldFilter < Filter + def call + @doc = at_css('div[role="main"]') + + css('.headerlink').remove + + css('em', 'div.align-center', 'a[href$=".svg"]').each do |node| + node.before(node.children).remove + end + + css('.section').each do |node| + node.first_element_child['id'] = node['id'] if node['id'] + node.before(node.children).remove + end + + css('tt').each do |node| + node.content = node.content.strip + node.name = 'code' + end + + css('table[border]').each do |node| + node.remove_attribute('border') + end + + css('div[class*="highlight-"]').each do |node| + node.content = node.content.strip + node.name = 'pre' + node['data-language'] = node['class'][/highlight\-(\w+)/, 1] + end + + doc + end + end + end +end diff --git a/lib/docs/filters/chef/entries.rb b/lib/docs/filters/chef/entries.rb index 311ea769..14c4d341 100644 --- a/lib/docs/filters/chef/entries.rb +++ b/lib/docs/filters/chef/entries.rb @@ -1,68 +1,34 @@ module Docs class Chef class EntriesFilter < Docs::EntriesFilter + def get_name - name = at_css('.body h1').content - name.remove! "\u{00b6}" - name.remove! 'About the ' - name.remove! 'About ' - name + at_css('h1').content end - CLIENT_TYPE_BY_SLUG_END_WITH = { - 'knife_common_options' => 'Workflow Tools', - 'knife_using' => 'Workflow Tools', - 'resource_common' => 'Cookbooks', - 'config_rb_knife_optional_settings' => 'Workflow Tools', - 'knife_index_rebuild' => 'Workflow Tools', - 'handlers' => 'Extend Chef', - 'dsl_recipe' => 'Extend Chef', - 'resource' => 'Extend Chef' - } - - SERVER_TYPE_BY_SLUG_END_WITH = { - 'auth' => 'Theory & Concepts', - 'install_server' => 'Setup & Config', - 'install_server_pre' => 'Setup & Config', - 'config_rb_server_optional_settings' => 'Manage the Server', - 'ctl_chef_server' => 'Manage the Server' - } - def get_type - if server_page? - SERVER_TYPE_BY_SLUG_END_WITH.each do |key, value| - return "Chef Server / #{value}" if slug.end_with?(key) - end + + case slug + when /automate/ + 'Chef Automate' + when /compliance/ + 'Chef Compliance' + when /desktop/ + 'Chef Desktop' + when /habitat/ + 'Chef Habitat' + when /inspec/ + 'Chef InSpec' + when /workstation/ + 'Chef Workstation' + when /effortless/ + 'Effortless Pattern' else - CLIENT_TYPE_BY_SLUG_END_WITH.each do |key, value| - return value if slug.end_with?(key) - end + 'Chef Infra' end - path = nav_path - path.delete('Reference') - path = path[0..0] - path.unshift('Chef Server') if server_page? - - type = path.join(' / ') - type.sub 'Cookbooks / Cookbook', 'Cookbooks /' - type end - def server_page? - slug.start_with?(context[:server_path]) - end - - def nav_path - node = at_css(".nav-docs a[href='#{result[:path].split('/').last}']") - path = [] - until node['class'] && node['class'].include?('main-item') - path.unshift(node.first_element_child.content.strip) if node['class'] && node['class'].include?('has-sub-items') - node = node.parent - end - path.unshift(node.first_element_child.content.strip) - path - end end end end diff --git a/lib/docs/filters/chef/entries_old.rb b/lib/docs/filters/chef/entries_old.rb new file mode 100644 index 00000000..aa3eeae0 --- /dev/null +++ b/lib/docs/filters/chef/entries_old.rb @@ -0,0 +1,68 @@ +module Docs + class Chef + class EntriesOldFilter < Docs::EntriesFilter + def get_name + name = at_css('.body h1').content + name.remove! "\u{00b6}" + name.remove! 'About the ' + name.remove! 'About ' + name + end + + CLIENT_TYPE_BY_SLUG_END_WITH = { + 'knife_common_options' => 'Workflow Tools', + 'knife_using' => 'Workflow Tools', + 'resource_common' => 'Cookbooks', + 'config_rb_knife_optional_settings' => 'Workflow Tools', + 'knife_index_rebuild' => 'Workflow Tools', + 'handlers' => 'Extend Chef', + 'dsl_recipe' => 'Extend Chef', + 'resource' => 'Extend Chef' + } + + SERVER_TYPE_BY_SLUG_END_WITH = { + 'auth' => 'Theory & Concepts', + 'install_server' => 'Setup & Config', + 'install_server_pre' => 'Setup & Config', + 'config_rb_server_optional_settings' => 'Manage the Server', + 'ctl_chef_server' => 'Manage the Server' + } + + def get_type + if server_page? + SERVER_TYPE_BY_SLUG_END_WITH.each do |key, value| + return "Chef Server / #{value}" if slug.end_with?(key) + end + else + CLIENT_TYPE_BY_SLUG_END_WITH.each do |key, value| + return value if slug.end_with?(key) + end + end + + path = nav_path + path.delete('Reference') + path = path[0..0] + path.unshift('Chef Server') if server_page? + + type = path.join(' / ') + type.sub 'Cookbooks / Cookbook', 'Cookbooks /' + type + end + + def server_page? + slug.start_with?(context[:server_path]) + end + + def nav_path + node = at_css(".nav-docs a[href='#{result[:path].split('/').last}']") + path = [] + until node['class'] && node['class'].include?('main-item') + path.unshift(node.first_element_child.content.strip) if node['class'] && node['class'].include?('has-sub-items') + node = node.parent + end + path.unshift(node.first_element_child.content.strip) + path + end + end + end +end diff --git a/lib/docs/scrapers/chef.rb b/lib/docs/scrapers/chef.rb index 3295f2e4..5435e8b6 100644 --- a/lib/docs/scrapers/chef.rb +++ b/lib/docs/scrapers/chef.rb @@ -1,20 +1,16 @@ module Docs class Chef < UrlScraper self.type = 'sphinx_simple' - self.base_url = 'https://docs-archive.chef.io/release/' + self.base_url = 'https://docs.chef.io' self.links = { home: 'https://www.chef.io/', code: 'https://github.com/chef/chef' } - html_filters.push 'chef/entries', 'chef/clean_html' - options[:skip_patterns] = [ - /\A[^\/]+\/\z/, - /\A[^\/]+\/index\.html\z/, - /\A[^\/]+\/release_notes\.html\z/, - /\Aserver[^\/]+\/chef_overview\.html\z/, - /\A[\d\-]+\/server_components\.html\z/ ] + /release_notes/, + /feedback/ + ] options[:attribution] = <<-HTML © Chef Software, Inc.
@@ -24,12 +20,38 @@ module Docs We are not affiliated with, endorsed or sponsored by Chef Inc. HTML + version '16' do + self.release = '16.7.61' + + options[:container] = '.off-canvas-wrapper' + + options[:skip] = [ + '/automate/api/', + '/habitat/supervisor_api/', + '/habitat/builder_api/' + ] + + html_filters.push 'chef/entries', 'chef/clean_html' + + end + version '12' do self.release = '12.13' + self.base_url = 'https://docs-archive.chef.io/release/' + + html_filters.push 'chef/entries_old', 'chef/clean_html_old' options[:client_path] = client_path = '12-13' options[:server_path] = server_path = 'server_12-8' + options[:skip_patterns] = [ + /\A[^\/]+\/\z/, + /\A[^\/]+\/index\.html\z/, + /\A[^\/]+\/release_notes\.html\z/, + /\Aserver[^\/]+\/chef_overview\.html\z/, + /\A[\d\-]+\/server_components\.html\z/ + ] + self.root_path = "#{client_path}/chef_overview.html" self.initial_paths = ["#{server_path}/server_components.html"] @@ -38,10 +60,21 @@ module Docs version '11' do self.release = '11.18' + self.base_url = 'https://docs-archive.chef.io/release/' + + html_filters.push 'chef/entries_old', 'chef/clean_html_old' options[:client_path] = client_path = '11-18' options[:server_path] = server_path = 'server_12-8' + options[:skip_patterns] = [ + /\A[^\/]+\/\z/, + /\A[^\/]+\/index\.html\z/, + /\A[^\/]+\/release_notes\.html\z/, + /\Aserver[^\/]+\/chef_overview\.html\z/, + /\A[\d\-]+\/server_components\.html\z/ + ] + self.root_path = "#{client_path}/chef_overview.html" self.initial_paths = ["#{server_path}/server_components.html"] @@ -52,5 +85,6 @@ module Docs doc = fetch_doc('https://downloads.chef.io/products/infra', opts) doc.at_css('#versions > option').content.strip end + end end