diff --git a/lib/docs/filters/bash/entries.rb b/lib/docs/filters/bash/entries.rb index 3489020f..01348e2c 100644 --- a/lib/docs/filters/bash/entries.rb +++ b/lib/docs/filters/bash/entries.rb @@ -1,13 +1,22 @@ module Docs class Bash class EntriesFilter < Docs::EntriesFilter + def get_name - name = at_css('hr + a + *').content.gsub(/(\d+\.?)+/, '') + name = at_css('h1','h2', 'h3', 'h4').content.gsub(/(\d+\.?)+/, '') + + # remove 'E.' notation for appendixes + if name.match?(/[[:upper:]]\./) + # remove 'E.' + name.sub!(/[[:upper:]]\./, '') + # remove all dots (.) + name.gsub!(/\./, '') + # remove all numbers + name.gsub!(/[[:digit:]]/, '') + end - # Remove the "D. " from names like "D. Concept Index" and "D. Function Index" - name = name[3..-1] if name.start_with?('D. ') + name.strip - name end def get_type @@ -44,13 +53,14 @@ module Docs end # Construct path to the page which the index links to - entry_path = '/html_node/' + page + '#' + hash + entry_path = page + '#' + hash entries << [entry_name, entry_path, entry_type] end entries end + end end end diff --git a/lib/docs/scrapers/bash.rb b/lib/docs/scrapers/bash.rb index ba4135ec..ab4a7bd9 100644 --- a/lib/docs/scrapers/bash.rb +++ b/lib/docs/scrapers/bash.rb @@ -1,9 +1,9 @@ module Docs class Bash < UrlScraper self.type = 'bash' - self.release = '5.0' - self.base_url = 'https://www.gnu.org/software/bash/manual' - self.root_path = '/html_node/index.html' + self.release = '5.1' + self.base_url = 'https://www.gnu.org/software/bash/manual/html_node' + self.root_path = 'index.html' self.links = { home: 'https://www.gnu.org/software/bash/', code: 'http://git.savannah.gnu.org/cgit/bash.git' @@ -11,8 +11,6 @@ module Docs html_filters.push 'bash/entries', 'bash/clean_html' - options[:only_patterns] = [/\/html_node\//] - options[:attribution] = <<-HTML Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
Licensed under the GNU Free Documentation License.