diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 727576c7..52e4e0f9 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -142,6 +142,13 @@ $GS = '/usr/local/opt/ghostscript/bin/gs'; # GhostScript ``` +## Man + +```sh +wget --recursive --no-parent https://man7.org/linux/man-pages/ +mv man7.org/linux/man-pages/ docs/man/ +``` + ## NumPy ```sh diff --git a/lib/docs/filters/man/clean_html.rb b/lib/docs/filters/man/clean_html.rb new file mode 100644 index 00000000..1aab6fbd --- /dev/null +++ b/lib/docs/filters/man/clean_html.rb @@ -0,0 +1,19 @@ +module Docs + class Man + class CleanHtmlFilter < Filter + def call + css('.page-top').remove + css('.nav-bar').remove + css('.nav-end').remove + css('.sec-table').remove + css('a[href="#top_of_page"]').remove + css('.end-man-text').remove + css('.start-footer').remove + css('.footer').remove + css('.end-footer').remove + css('form[action="https://www.google.com/search"]').remove + doc + end + end + end +end diff --git a/lib/docs/filters/man/entries.rb b/lib/docs/filters/man/entries.rb new file mode 100644 index 00000000..66af74d4 --- /dev/null +++ b/lib/docs/filters/man/entries.rb @@ -0,0 +1,29 @@ +module Docs + class Man + class EntriesFilter < Docs::EntriesFilter + + def get_name + at_css('h1').content.sub(' — Linux manual page', '') + end + + def get_type + 'Linux manual page' + end + + def entries + return super unless slug == 'dir_by_project' + type0 = nil + return css('*').each_with_object [] do |node, entries| + if node.name == 'h2' + type0 = node.content + elsif node.name == 'a' and node['href'] and node['href'].start_with?('man') and type0 + name = node.content + node.next_sibling.content + path = node['href'] + entries << Entry.new(name, path, type0) + end + end + end + + end + end +end diff --git a/lib/docs/scrapers/man.rb b/lib/docs/scrapers/man.rb new file mode 100644 index 00000000..07ac40d2 --- /dev/null +++ b/lib/docs/scrapers/man.rb @@ -0,0 +1,16 @@ +module Docs + class Man < FileScraper + self.name = 'Linux man pages' + self.type = 'simple' + self.slug = 'man' + self.base_url = "https://man7.org/linux/man-pages/" + self.initial_paths = %w(dir_by_project.html) + self.links = { + home: 'https://man7.org/linux/man-pages/', + } + html_filters.push 'man/entries', 'man/clean_html' + options[:attribution] = <<-HTML + ... + HTML + end +end