diff --git a/lib/docs/filters/elisp/clean_html.rb b/lib/docs/filters/elisp/clean_html.rb new file mode 100644 index 00000000..985191b2 --- /dev/null +++ b/lib/docs/filters/elisp/clean_html.rb @@ -0,0 +1,56 @@ +module Docs + class Elisp + class CleanHtmlFilter < Filter + def call + + if current_url == root_url + # remove copyright header + css('table ~ p').remove + + # remove "Detailed Node Listing" header + css('h3').remove + + # remove "Detailed Node Listing" table + css('table')[1].remove + + # remove copyright + css('blockquote').remove + + # remove index page in the index table + css('tbody tr:last-child').remove + end + + # remove navigation bar + css('.node').remove + + # Remove content in headers + css('h2', 'h3', 'h4', 'h5', 'h6').each do |node| + + # remove numbers at the beginning of all headers + node.content = node.content.slice(/[[:alpha:]]...*/) + + # remove 'Appendix' word + node.content = node.content.sub(/Appendix.{2}/, '') if node.content.include?('Appendix') + + # remove 'E.' notation for appendixes + if node.content.match?(/[[:upper:]]\./) + # remove 'E.' + node.content = node.content.sub(/[[:upper:]]\./, '') + # remove all dots (.) + node.content = node.content.gsub(/\./, '') + # remove all numbers + node.content = node.content.gsub(/[[:digit:]]/, '') + end + + end + + # add id to each defun section that contains a functions, macro, etc. + css('.defun').each do |node| + node['id']= node.first_element_child.content + end + + doc + end + end + end +end diff --git a/lib/docs/filters/elisp/entries.rb b/lib/docs/filters/elisp/entries.rb new file mode 100644 index 00000000..4961e36d --- /dev/null +++ b/lib/docs/filters/elisp/entries.rb @@ -0,0 +1,50 @@ +module Docs + class Elisp + class EntriesFilter < Docs::EntriesFilter + def get_name + # remove numbers at the beginnig + name = at_css('h2', 'h3', 'h4', 'h5', 'h6').content.slice(/[[:alpha:]]...*/) + + # remove 'Appendix' word + name = name.sub(/Appendix.{2}/, '') if name.include?('Appendix') + + # remove 'E.' notation for appendixes + if name.match?(/[[:upper:]]\./) + # remove 'E.' + name = name.sub(/[[:upper:]]\./, '') + # remove all dots (.) + name = name.gsub(/\./, '') + # remove all numbers + name = name.gsub(/[[:digit:]]/, '') + end + + name + end + + def get_type + 'Manual' + end + + def additional_entries + entries = [] + + css('.defun').each do |node| + entry_type = 'Builtin Functions' if node.content.include?('Function') + entry_type = 'Builtin Macros' if node.content.include?('Macro') + entry_type = 'Builtin Variables' if node.content.include?('Variable') + entry_type = 'Builtin User Options' if node.content.include?('User Option') + entry_type = 'Builtin Special Forms' if node.content.include?('Special Form') + entry_type = 'Builtin Commands' if node.content.include?('Command') + entry_type = 'Builtin Constants' if node.content.include?('Constant') + + entry_name = node.first_element_child.content + entry_path = slug + '#' + entry_name + entries << [entry_name, entry_path.downcase, entry_type] + end + + entries + end + + end + end +end diff --git a/lib/docs/scrapers/elisp.rb b/lib/docs/scrapers/elisp.rb new file mode 100644 index 00000000..0a103bdc --- /dev/null +++ b/lib/docs/scrapers/elisp.rb @@ -0,0 +1,65 @@ +module Docs + class Elisp < UrlScraper + self.type = 'elisp' + self.release = '26.3' + self.base_url= 'https://www.gnu.org/software/emacs/manual/html_node/elisp/' + self.root_path = 'index.html' + self.links = { + home:'https://www.gnu.org/software/emacs/manual/elisp', + code: 'https://git.savannah.gnu.org/cgit/emacs.git' + } + + html_filters.push 'elisp/entries', 'elisp/clean_html' + + # some file that were not skipped by skip patterns + options[:skip] = [ + 'Coding-Conventions.html', + 'Key-Binding-Conventions.html', + 'Library-Headers.html' + ] + + # some non essential sections + options[:skip_patterns] = [ + /Introduction.html/, + /Antinews.html/, + /GNU-Free-Documentation-License.html/, + /GPL.html/, + /Tips.html/, + /Definition-of-/ + ] + + # fix duplicates + options[:fix_urls]= -> (url) do + url.sub!('Window-Group.html', 'Basic-Windows.html') + url.sub!('Local-defvar-example.html', 'Using-Lexical-Binding.html') + url.sub!('Defining-Lisp-variables-in-C.html', 'Writing-Emacs-Primitives.html') + url.sub!('describe_002dsymbols-example.html', 'Accessing-Documentation.html') + url.sub!('The-interactive_002donly-property.html', 'Defining-Commands.html') + url.sub!('Text-help_002decho.html', 'Special-Properties.html') + url.sub!('Help-display.html', 'Special-Properties.html') + url.sub!('autoload-cookie.html', 'Autoload.html') + url.sub!('external_002ddebugging_002doutput.html', 'Output-Streams.html') + url.sub!('modifier-bits.html', 'Other-Char-Bits.html') + url.sub!('message_002dbox.html', 'Displaying-Messages.html') + url.sub!('abbreviate_002dfile_002dname.html', 'Directory-Names.html') + url.sub!('Inhibit-point-motion-hooks.html', 'Special-Properties.html') + url.sub!('Coding-systems-for-a-subprocess.html', 'Process-Information.html') + url.sub!('Process-Filter-Example.html', 'Filter-Functions.html') + url.sub!('Docstring-hyperlinks.html', 'Documentation-Tips.html') + url.sub!('seq_002dlet.html', 'Sequence-Functions.html') + url.sub!('should_005fquit.html', 'Module-Misc.html') + url.sub!('Display-Face-Attribute-Testing.html', 'Display-Feature-Testing.html') + url.sub!('module-initialization-function.html', 'Module-Initialization.html') + url.sub!('pcase_002dsymbol_002dcaveats.html', 'pcase-Macro.html') + url.sub!('intern.html', 'Module-Misc.html') + url.sub!('pcase_002dexample_002d1.html', 'pcase-Macro.html') + url + end + + options[:attribution]= <<-HTML + Copyright © 1990-1996, 1998-2019 Free Software Foundation, Inc.
+ Licensed under the GNU GPL license. + HTML + + end +end