diff --git a/lib/docs/filters/pytorch/clean_html.rb b/lib/docs/filters/pytorch/clean_html.rb index 465ae3a5..dd19c3e0 100644 --- a/lib/docs/filters/pytorch/clean_html.rb +++ b/lib/docs/filters/pytorch/clean_html.rb @@ -2,18 +2,9 @@ module Docs class Pytorch class CleanHtmlFilter < Filter def call - breadcrumbs = at_css('.pytorch-breadcrumbs') - type_name = breadcrumbs.css('li')[1].content - @doc = at_css('.pytorch-article') # Show katex-mathml nodes and remove katex-html nodes css('.katex-html').remove - - # pass type_name to following filters as a new node - node = Nokogiri::XML::Node.new 'meta', doc - node.content = type_name - doc.child.before node - doc end end diff --git a/lib/docs/filters/pytorch/entries.rb b/lib/docs/filters/pytorch/entries.rb index ba92e222..4a4580da 100644 --- a/lib/docs/filters/pytorch/entries.rb +++ b/lib/docs/filters/pytorch/entries.rb @@ -2,9 +2,10 @@ module Docs class Pytorch class EntriesFilter < Docs::EntriesFilter def get_name - # retrive the name in breadcrumb from the auxiliary node - name_in_breadcrumb = doc.child.content - doc.child.remove + breadcrumbs = at_css('.pytorch-breadcrumbs') + name_in_breadcrumb = breadcrumbs.css('li')[1].content + + article = at_css('.pytorch-article') # hard-coded name replacements, for better presentation. name_replacements = { @@ -14,9 +15,9 @@ module Docs # The id of the container `div.section` indicates the page type. # If the id starts with `module-`, then it's an API reference, # otherwise it is a note or design doc. - # After the `sphinx/clean_html` filter, that id is assigned to the second element. - if doc.element_children[1]['id']&.starts_with? 'module-' - /\Amodule-(.*)/.match(doc.element_children[1]['id'])[1] + article_id = article.at_css('div.section')['id'] + if article_id.starts_with? 'module-' + /\Amodule-(.*)/.match(article_id)[1] else name_in_breadcrumb = name_in_breadcrumb.delete_suffix(' >') name_in_breadcrumb = name_replacements.fetch(name_in_breadcrumb, name_in_breadcrumb) diff --git a/lib/docs/scrapers/pytorch.rb b/lib/docs/scrapers/pytorch.rb index f370502a..29b480d6 100644 --- a/lib/docs/scrapers/pytorch.rb +++ b/lib/docs/scrapers/pytorch.rb @@ -9,7 +9,7 @@ module Docs code: 'https://github.com/pytorch/pytorch' } - html_filters.push 'pytorch/clean_html', 'sphinx/clean_html', 'pytorch/entries' + html_filters.push 'pytorch/entries', 'pytorch/clean_html', 'sphinx/clean_html' options[:skip] = ['cpp_index.html', 'packages.html', 'py-modindex.html', 'genindex.html'] options[:skip_patterns] = [/\Acommunity/, /\A_modules/, /\Anotes/, /\Aorg\/pytorch\//]