From b7075dd51ac8c3c21046fbf293b744e75a19aa60 Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Tue, 25 Sep 2018 23:46:15 -0400 Subject: [PATCH] Implement working crawling and section building for salt --- lib/docs/filters/salt_stack/clean_html.rb | 2 ++ lib/docs/filters/salt_stack/entries.rb | 19 +++++++++++++++++-- lib/docs/scrapers/salt_stack.rb | 7 ++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/salt_stack/clean_html.rb b/lib/docs/filters/salt_stack/clean_html.rb index ac53a94c..0f084519 100644 --- a/lib/docs/filters/salt_stack/clean_html.rb +++ b/lib/docs/filters/salt_stack/clean_html.rb @@ -2,6 +2,8 @@ module Docs class SaltStack class CleanHtmlFilter < Filter def call + css('.headerlink').remove + doc end end diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb index dda9871d..51bf17d2 100644 --- a/lib/docs/filters/salt_stack/entries.rb +++ b/lib/docs/filters/salt_stack/entries.rb @@ -1,12 +1,27 @@ module Docs class SaltStack class EntriesFilter < Docs::EntriesFilter + SALT_REF_RGX = /salt\.([^\.]+)\.([^\s]+)/ + def get_name - at_css('h1').content + header = at_css('h1').content + + ref_match = SALT_REF_RGX.match(header) + if ref_match + ns, mod = ref_match.captures + "#{ns}.#{mod}" + else + header + end end def get_type - 'TODO' + type, _ = slug.split('/', 2) + type + end + + def include_default_entry? + !subpath.end_with?('index.html') end end end diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index 3196a18a..a4c974b2 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -1,9 +1,14 @@ module Docs class SaltStack < UrlScraper + self.type = 'salt_stack' self.release = '2018.3.2' self.base_url = 'https://docs.saltstack.com/en/latest/ref/' - html_filters.push 'salt_stack/entries', 'salt_stack/clean_html' + html_filters.push 'salt_stack/clean_html', 'salt_stack/entries' + + options[:only_patterns] = [ + %r{[^/]+/all/} + ] options[:container] = '.body-content'