From 63af2153d843af2ec553604448d26e083ba32b08 Mon Sep 17 00:00:00 2001 From: Jed Fox Date: Sat, 11 Nov 2017 12:40:55 -0500 Subject: [PATCH] Extract a common superclass out of the Python EntriesFilter classes --- lib/docs/filters/python/entries_common.rb | 78 +++++++++++++++++++++++ lib/docs/filters/python/entries_v2.rb | 64 ++----------------- lib/docs/filters/python/entries_v3.rb | 66 ++----------------- 3 files changed, 87 insertions(+), 121 deletions(-) create mode 100644 lib/docs/filters/python/entries_common.rb diff --git a/lib/docs/filters/python/entries_common.rb b/lib/docs/filters/python/entries_common.rb new file mode 100644 index 00000000..0a3e921c --- /dev/null +++ b/lib/docs/filters/python/entries_common.rb @@ -0,0 +1,78 @@ +module Docs + class Python + class CommonEntriesFilter < Docs::EntriesFilter + def get_name + name = at_css('h1').content + name.remove! %r{\A[\d\.]+ } # remove list number + name.remove! "\u{00B6}" # remove pilcrow sign + name.remove! %r{ [\u{2013}\u{2014}].+\z} # remove text after em/en dash + name.remove! 'Built-in' + name.strip! + name + end + + def get_type + return 'Logging' if slug.start_with? 'library/logging' + + type = at_css('.related a[accesskey="U"]').content + + original_type = type + type = parse_type type + + if type == original_type + if type == 'The Python Standard Library' + type = at_css('h1').content + elsif type.include?('I/O') || %w(select selectors).include?(name) + type = 'Input/ouput' + end + end + + type.remove! %r{\A\d+\.\s+} # remove list number + type.remove! "\u{00b6}" # remove paragraph character + type.sub! ' and ', ' & ' + [' Services', ' Modules', ' Specific', 'Python '].each { |str| type.remove!(str) } + + self.class.const_get(:REPLACE_TYPES)[type] || type + end + + def parse_type + raise Error.new('Python::CommonEntriesFilter is an abstract filter. Subclass it before using it.') + end + + def include_default_entry? + !at_css('.body > .section:only-child > .toctree-wrapper:last-child') && type != 'Superseded' + end + + def additional_entries + return [] if root_page? || !include_default_entry? || name == 'errno' + clean_id_attributes + entries = [] + + css('.class > dt[id]', '.exception > dt[id]', '.attribute > dt[id]').each do |node| + entries << [node['id'], node['id']] + end + + css('.data > dt[id]').each do |node| + if node['id'].split('.').last.upcase! # skip constants + entries << [node['id'], node['id']] + end + end + + css('.function > dt[id]', '.method > dt[id]', '.staticmethod > dt[id]', '.classmethod > dt[id]').each do |node| + entries << [node['id'] + '()', node['id']] + end + + entries + end + + def clean_id_attributes + css('.section > .target[id]').each do |node| + if dt = node.at_css('+ dl > dt') + dt['id'] ||= node['id'].remove(/\w+\-/) + end + node.remove + end + end + end + end +end diff --git a/lib/docs/filters/python/entries_v2.rb b/lib/docs/filters/python/entries_v2.rb index 35168aac..3858b176 100644 --- a/lib/docs/filters/python/entries_v2.rb +++ b/lib/docs/filters/python/entries_v2.rb @@ -1,6 +1,6 @@ module Docs class Python - class EntriesV2Filter < Docs::EntriesFilter + class EntriesV2Filter < CommonEntriesFilter REPLACE_TYPES = { 'compiler package' => 'Compiler', 'Cryptographic' => 'Cryptography', @@ -15,72 +15,18 @@ module Docs 'Program Frameworks' => 'Frameworks', 'Structured Markup Processing Tools' => 'Structured Markup' } - def get_name - name = at_css('h1').content - name.remove! %r{\A[\d\.]+ } # remove list number - name.remove! "\u{00B6}" # remove pilcrow sign - name.remove! %r{ [\u{2013}\u{2014}].+\z} # remove text after em/en dash - name.remove! 'Built-in' - name.strip! - name - end - - def get_type - return 'Logging' if slug.start_with? 'library/logging' - - type = at_css('.related a[accesskey="U"]').content - - if type == 'The Python Standard Library' - type = at_css('h1').content - elsif type.include?('I/O') || %w(select selectors).include?(name) - type = 'Input/ouput' - elsif type.start_with? '18' + def parse_type(type) + if type.start_with? '18' type = 'Internet Data Handling' elsif type.include? 'Mac' type = 'Mac OS' end - type.remove! %r{\A\d+\.\s+} # remove list number - type.remove! "\u{00b6}" # remove paragraph character - type.sub! ' and ', ' & ' - [' Services', ' Modules', ' Specific', 'Python '].each { |str| type.remove!(str) } - - REPLACE_TYPES[type] || type + type end def include_default_entry? - !at_css('.body > .section:only-child > .toctree-wrapper:last-child') && !type.in?(%w(Superseded SunOS)) - end - - def additional_entries - return [] if root_page? || !include_default_entry? || name == 'errno' - clean_id_attributes - entries = [] - - css('.class > dt[id]', '.exception > dt[id]', '.attribute > dt[id]').each do |node| - entries << [node['id'], node['id']] - end - - css('.data > dt[id]').each do |node| - if node['id'].split('.').last.upcase! # skip constants - entries << [node['id'], node['id']] - end - end - - css('.function > dt[id]', '.method > dt[id]', '.staticmethod > dt[id]', '.classmethod > dt[id]').each do |node| - entries << [node['id'] + '()', node['id']] - end - - entries - end - - def clean_id_attributes - css('.section > .target[id]').each do |node| - if dt = node.at_css('+ dl > dt') - dt['id'] ||= node['id'].remove(/\w+\-/) - end - node.remove - end + super && type != 'SunOS' end end end diff --git a/lib/docs/filters/python/entries_v3.rb b/lib/docs/filters/python/entries_v3.rb index 759e244a..5ce0121d 100644 --- a/lib/docs/filters/python/entries_v3.rb +++ b/lib/docs/filters/python/entries_v3.rb @@ -1,6 +1,6 @@ module Docs class Python - class EntriesV3Filter < Docs::EntriesFilter + class EntriesV3Filter < CommonEntriesFilter REPLACE_TYPES = { 'Cryptographic' => 'Cryptography', 'Custom Interpreters' => 'Interpreters', @@ -13,70 +13,12 @@ module Docs 'Program Frameworks' => 'Frameworks', 'Structured Markup Processing Tools' => 'Structured Markup' } - def get_name - name = at_css('h1').content - name.remove! %r{\A[\d\.]+ } # remove list number - name.remove! "\u{00B6}" # remove pilcrow sign - name.remove! %r{ [\u{2013}\u{2014}].+\z} # remove text after em/en dash - name.remove! 'Built-in' - name.strip! - name - end - - def get_type - return 'Logging' if slug.start_with? 'library/logging' - - type = at_css('.related a[accesskey="U"]').content - - if type == 'The Python Standard Library' - type = at_css('h1').content - elsif type.include?('I/O') || %w(select selectors).include?(name) - type = 'Input/ouput' - elsif type.start_with? '19' + def parse_type(type) + if type.start_with? '19' type = 'Internet Data Handling' end - type.remove! %r{\A\d+\.\s+} # remove list number - type.remove! "\u{00b6}" # remove paragraph character - type.sub! ' and ', ' & ' - [' Services', ' Modules', ' Specific', 'Python '].each { |str| type.remove!(str) } - - REPLACE_TYPES[type] || type - end - - def include_default_entry? - !at_css('.body > .section:only-child > .toctree-wrapper:last-child') && !type.in?(%w(Superseded)) - end - - def additional_entries - return [] if root_page? || !include_default_entry? || name == 'errno' - clean_id_attributes - entries = [] - - css('.class > dt[id]', '.exception > dt[id]', '.attribute > dt[id]').each do |node| - entries << [node['id'], node['id']] - end - - css('.data > dt[id]').each do |node| - if node['id'].split('.').last.upcase! # skip constants - entries << [node['id'], node['id']] - end - end - - css('.function > dt[id]', '.method > dt[id]', '.staticmethod > dt[id]', '.classmethod > dt[id]').each do |node| - entries << [node['id'] + '()', node['id']] - end - - entries - end - - def clean_id_attributes - css('.section > .target[id]').each do |node| - if dt = node.at_css('+ dl > dt') - dt['id'] ||= node['id'].remove(/\w+\-/) - end - node.remove - end + type end end end