From 83256157ab91dc16703cc6fa87c2d655b9456703 Mon Sep 17 00:00:00 2001 From: David Chen Date: Sat, 23 May 2020 14:24:23 -0700 Subject: [PATCH] Update Elixir's scraper --- lib/docs/filters/elixir/clean_html.rb | 47 +++++++++++++-------------- lib/docs/filters/elixir/entries.rb | 14 +++++--- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/lib/docs/filters/elixir/clean_html.rb b/lib/docs/filters/elixir/clean_html.rb index cf703389..47cec84c 100644 --- a/lib/docs/filters/elixir/clean_html.rb +++ b/lib/docs/filters/elixir/clean_html.rb @@ -25,41 +25,38 @@ module Docs end def api - css('footer', '.view-source', 'h1 .visible-xs').remove + css('.hover-link', '.view-source', 'footer').remove - css('section section.docstring h2').each do |node| - node.name = 'h4' + css('.summary').each do |node| + node.name = 'dl' end - css('h1 .hover-link', '.detail-link').each do |node| - node.parent['id'] = node['href'].remove('#') - node.remove + css('.summary h2').each do |node| + node.content = node.inner_text + node.parent.before(node) end - css('.details-list').each do |list| - type = list['id'].remove(/s\z/) if list['id'] - list.css('.detail-header').each do |node| - node.name = 'h3' - node['class'] += " #{type}" if type - end + css('.summary-signature').each do |node| + node.name = 'dt' end - css('.summary h2').each { |node| node.parent.before(node) } - css('.summary').each { |node| node.name = 'dl' } - css('.summary-signature').each { |node| node.name = 'dt' } - css('.summary-synopsis').each { |node| node.name = 'dd' } - - css('section', 'div:not(.type-detail)', 'h2 a').each do |node| - node.before(node.children).remove + css('.summary-synopsis').each do |node| + node.name = 'dd' end - css('.detail-header > pre').each do |node| - node.parent.after(node) - end + css('section.detail').each do |detail| + id = detail['id'] + detail.remove_attribute('id') - css('.signature').each do |node| - non_text_children = node.xpath('node()[not(self::text())]') - non_text_children.to_a.reverse.each { |child| node.parent.add_next_sibling(child) } + detail.css('.detail-header').each do |node| + node.name = 'h3' + node['id'] = id + node.content = node.at_css('.signature').inner_text + end + + detail.css('.docstring h2').each do |node| + node.name = 'h4' + end end css('pre').each do |node| diff --git a/lib/docs/filters/elixir/entries.rb b/lib/docs/filters/elixir/entries.rb index 72794f87..24fd9415 100644 --- a/lib/docs/filters/elixir/entries.rb +++ b/lib/docs/filters/elixir/entries.rb @@ -41,21 +41,25 @@ module Docs end def additional_entries - return [] if type == 'Exceptions' || type == 'Guide' + return [] if type == 'Exceptions' || type == 'Guide' || root_page? - css('.detail-header .signature').map do |node| - id = node.parent['id'] + css('.detail-header').map do |node| + id = node['id'] name = node.content.strip + name.remove! %r{\(.*\)} name.remove! 'left ' name.remove! ' right' name.sub! 'sigil_', '~' - unless node.parent['class'].end_with?('macro') || self.name.start_with?('Kernel') + if self.name && !self.name.start_with?('Kernel') name.prepend "#{self.name}." end - name << " (#{id.split('/').last})" if id =~ /\/\d+\z/ + if id =~ %r{/\d+\z} + arity = id.split('/').last + name << " (#{arity})" + end [name, id] end