Update Elixir's scraper

pull/1238/head
David Chen 5 years ago
parent 4eb4bfe5da
commit 83256157ab

@ -25,41 +25,38 @@ module Docs
end end
def api def api
css('footer', '.view-source', 'h1 .visible-xs').remove css('.hover-link', '.view-source', 'footer').remove
css('section section.docstring h2').each do |node| css('.summary').each do |node|
node.name = 'h4' node.name = 'dl'
end end
css('h1 .hover-link', '.detail-link').each do |node| css('.summary h2').each do |node|
node.parent['id'] = node['href'].remove('#') node.content = node.inner_text
node.remove node.parent.before(node)
end end
css('.details-list').each do |list| css('.summary-signature').each do |node|
type = list['id'].remove(/s\z/) if list['id'] node.name = 'dt'
list.css('.detail-header').each do |node|
node.name = 'h3'
node['class'] += " #{type}" if type
end
end end
css('.summary h2').each { |node| node.parent.before(node) } css('.summary-synopsis').each do |node|
css('.summary').each { |node| node.name = 'dl' } node.name = 'dd'
css('.summary-signature').each { |node| node.name = 'dt' }
css('.summary-synopsis').each { |node| node.name = 'dd' }
css('section', 'div:not(.type-detail)', 'h2 a').each do |node|
node.before(node.children).remove
end end
css('.detail-header > pre').each do |node| css('section.detail').each do |detail|
node.parent.after(node) id = detail['id']
end detail.remove_attribute('id')
css('.signature').each do |node| detail.css('.detail-header').each do |node|
non_text_children = node.xpath('node()[not(self::text())]') node.name = 'h3'
non_text_children.to_a.reverse.each { |child| node.parent.add_next_sibling(child) } node['id'] = id
node.content = node.at_css('.signature').inner_text
end
detail.css('.docstring h2').each do |node|
node.name = 'h4'
end
end end
css('pre').each do |node| css('pre').each do |node|

@ -41,21 +41,25 @@ module Docs
end end
def additional_entries def additional_entries
return [] if type == 'Exceptions' || type == 'Guide' return [] if type == 'Exceptions' || type == 'Guide' || root_page?
css('.detail-header .signature').map do |node| css('.detail-header').map do |node|
id = node.parent['id'] id = node['id']
name = node.content.strip name = node.content.strip
name.remove! %r{\(.*\)} name.remove! %r{\(.*\)}
name.remove! 'left ' name.remove! 'left '
name.remove! ' right' name.remove! ' right'
name.sub! 'sigil_', '~' name.sub! 'sigil_', '~'
unless node.parent['class'].end_with?('macro') || self.name.start_with?('Kernel') if self.name && !self.name.start_with?('Kernel')
name.prepend "#{self.name}." name.prepend "#{self.name}."
end end
name << " (#{id.split('/').last})" if id =~ /\/\d+\z/ if id =~ %r{/\d+\z}
arity = id.split('/').last
name << " (#{arity})"
end
[name, id] [name, id]
end end

Loading…
Cancel
Save