PyTorch 1.6+ scraper code cleanup

pull/1364/head
Phil Scherer 4 years ago
parent 59c6c75519
commit 5afcd785d7

@ -1,27 +1,23 @@
module Docs module Docs
class Pytorch class Pytorch
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def get_name NAME_REPLACEMENTS = {
breadcrumbs = at_css('.pytorch-breadcrumbs')
name_in_breadcrumb = breadcrumbs.css('li')[1].content
article = at_css('.pytorch-article')
# hard-coded name replacements, for better presentation.
name_replacements = {
"Distributed communication package - torch.distributed" => "torch.distributed" "Distributed communication package - torch.distributed" => "torch.distributed"
} }
def get_breadcrumbs()
css('.pytorch-breadcrumbs > li').map { |node| node.content.delete_suffix(' >') }
end
def get_name
# The id of the container `div.section` indicates the page type. # The id of the container `div.section` indicates the page type.
# If the id starts with `module-`, then it's an API reference, # If the id starts with `module-`, then it's an API reference,
# otherwise it is a note or design doc. # otherwise it is a note or design doc.
article_id = article.at_css('div.section')['id'] if at_css('.section')['id'].starts_with? 'module-'
if article_id.starts_with? 'module-'
/\Amodule-(.*)/.match(article_id)[1] /\Amodule-(.*)/.match(article_id)[1]
else else
name_in_breadcrumb = name_in_breadcrumb.delete_suffix(' >') name = get_breadcrumbs()[1]
name_in_breadcrumb = name_replacements.fetch(name_in_breadcrumb, name_in_breadcrumb) NAME_REPLACEMENTS.fetch(name, name)
name_in_breadcrumb
end end
end end

Loading…
Cancel
Save