From b46cb9598d70bba893f0c6710be681e55f976066 Mon Sep 17 00:00:00 2001 From: Tim Lim Date: Mon, 16 Oct 2023 20:49:38 +0800 Subject: [PATCH] Allow Python scraper to keep empty spans with ids --- lib/docs/filters/sphinx/clean_html.rb | 8 +++++--- lib/docs/scrapers/python.rb | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/sphinx/clean_html.rb b/lib/docs/filters/sphinx/clean_html.rb index 126ff47a..36bb96d9 100644 --- a/lib/docs/filters/sphinx/clean_html.rb +++ b/lib/docs/filters/sphinx/clean_html.rb @@ -36,9 +36,11 @@ module Docs node.replace(pre) end - css('span[id]:empty').each do |node| - (node.next_element || node.previous_element)['id'] ||= node['id'] if node.next_element || node.previous_element - node.remove + unless context[:sphinx_keep_empty_ids] + css('span[id]:empty').each do |node| + (node.next_element || node.previous_element)['id'] ||= node['id'] if node.next_element || node.previous_element + node.remove + end end css('.section').each do |node| diff --git a/lib/docs/scrapers/python.rb b/lib/docs/scrapers/python.rb index 39448bd5..52b2505f 100644 --- a/lib/docs/scrapers/python.rb +++ b/lib/docs/scrapers/python.rb @@ -7,6 +7,12 @@ module Docs code: 'https://github.com/python/cpython' } + # bypass the clean_text filter as it removes empty span with ids + options[:clean_text] = false + + # bypass sphinx modifying empty ids + options[:sphinx_keep_empty_ids] = true + options[:skip_patterns] = [/whatsnew/] options[:skip] = %w( library/2to3.html