From b46cb9598d70bba893f0c6710be681e55f976066 Mon Sep 17 00:00:00 2001 From: Tim Lim Date: Mon, 16 Oct 2023 20:49:38 +0800 Subject: [PATCH 1/2] Allow Python scraper to keep empty spans with ids --- lib/docs/filters/sphinx/clean_html.rb | 8 +++++--- lib/docs/scrapers/python.rb | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/sphinx/clean_html.rb b/lib/docs/filters/sphinx/clean_html.rb index 126ff47a..36bb96d9 100644 --- a/lib/docs/filters/sphinx/clean_html.rb +++ b/lib/docs/filters/sphinx/clean_html.rb @@ -36,9 +36,11 @@ module Docs node.replace(pre) end - css('span[id]:empty').each do |node| - (node.next_element || node.previous_element)['id'] ||= node['id'] if node.next_element || node.previous_element - node.remove + unless context[:sphinx_keep_empty_ids] + css('span[id]:empty').each do |node| + (node.next_element || node.previous_element)['id'] ||= node['id'] if node.next_element || node.previous_element + node.remove + end end css('.section').each do |node| diff --git a/lib/docs/scrapers/python.rb b/lib/docs/scrapers/python.rb index 39448bd5..52b2505f 100644 --- a/lib/docs/scrapers/python.rb +++ b/lib/docs/scrapers/python.rb @@ -7,6 +7,12 @@ module Docs code: 'https://github.com/python/cpython' } + # bypass the clean_text filter as it removes empty span with ids + options[:clean_text] = false + + # bypass sphinx modifying empty ids + options[:sphinx_keep_empty_ids] = true + options[:skip_patterns] = [/whatsnew/] options[:skip] = %w( library/2to3.html From 4862e15775c45cc7a63fa8d1f048e0952405bdce Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Fri, 5 Jan 2024 17:54:31 +0100 Subject: [PATCH 2/2] Update Python documentation (3.12.1) --- lib/docs/scrapers/python.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/docs/scrapers/python.rb b/lib/docs/scrapers/python.rb index 52b2505f..3988bba3 100644 --- a/lib/docs/scrapers/python.rb +++ b/lib/docs/scrapers/python.rb @@ -28,14 +28,14 @@ module Docs HTML version '3.12' do - self.release = '3.12.0' + self.release = '3.12.1' self.base_url = "https://docs.python.org/#{self.version}/" html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html' end version '3.11' do - self.release = '3.11.5' + self.release = '3.11.7' self.base_url = "https://docs.python.org/#{self.version}/" html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'