diff --git a/lib/docs/filters/python/entries_v3.rb b/lib/docs/filters/python/entries_v3.rb index 033427f4..90c9a970 100644 --- a/lib/docs/filters/python/entries_v3.rb +++ b/lib/docs/filters/python/entries_v3.rb @@ -31,6 +31,7 @@ module Docs return 'Tutorial' if slug.start_with? 'tutorial' return 'Software Packaging & Distribution' if slug.start_with? 'distributing' return 'Software Packaging & Distribution' if slug.start_with? 'distutils' + return 'Glossary' if slug.start_with? 'glossary' return 'Basics' unless slug.start_with? 'library/' return 'Basics' if slug.start_with? 'library/index' @@ -57,15 +58,17 @@ module Docs end def include_h2? - return slug.start_with?('reference') || slug.start_with?('tutorial') || slug.start_with?('using') + return slug.start_with?('library') || slug.start_with?('reference') || slug.start_with?('tutorial') || slug.start_with?('using') end def include_default_entry? + return false if slug.starts_with?('genindex') return true if slug == 'library/asyncio' !at_css('.body > .section:only-child > .toctree-wrapper:last-child') && !type.in?(%w(Superseded)) end def additional_entries + return additional_entries_index if slug.starts_with?('genindex') return [] if root_page? || slug.start_with?('library/index') || !include_default_entry? || name == 'errno' clean_id_attributes entries = [] @@ -74,6 +77,10 @@ module Docs entries << [node['id'], node['id']] end + css('.glossary > dt[id]').each do |node| + entries << [node.content, node['id']] + end + css('.function > dt[id]', '.method > dt[id]', '.staticmethod > dt[id]', '.classmethod > dt[id]').each do |node| entries << [node['id'] + '()', node['id']] end @@ -81,7 +88,10 @@ module Docs if include_h2? css('section[id] > h2').each do |node| name = node.content.remove("\u{00b6}") + name.concat " (#{self.name})" if slug.start_with?('library') entries << [name, node.parent['id']] + statement = name[/The (.+) statement/, 1] + entries << ["#{statement} (statement)", node.parent['id'], 'Statements'] if statement && slug.start_with?('reference') end end @@ -96,6 +106,28 @@ module Docs node.remove end end + + def additional_entries_index + css('.genindextable td > ul > li').each_with_object [] do |node, entries| + name = node.children.first + next unless name.text? + name = name.text.strip() + next if name[/^\w/] || name[/^-+\w/] + node.css('> ul > li > a').each do |inner_node| + inner_name = inner_node.text.strip() + next if inner_name[/\[\d+\]/] + type = case inner_name + when 'operator' + 'Operators' + when 'in regular expressions' + 'Regular Expression' + else + 'Symbols' + end + entries << ["#{name} (#{inner_name})", inner_node['href'], type] + end + end + end end end end diff --git a/lib/docs/scrapers/python.rb b/lib/docs/scrapers/python.rb index 6f3c6f1e..09d84204 100644 --- a/lib/docs/scrapers/python.rb +++ b/lib/docs/scrapers/python.rb @@ -7,7 +7,7 @@ module Docs code: 'https://github.com/python/cpython' } - options[:skip_patterns] = [/genindex/, /whatsnew/] + options[:skip_patterns] = [/whatsnew/] options[:skip] = %w( library/2to3.html library/formatter.html