Improve D scraper

pull/666/merge
Thibaut Courouble 7 years ago
parent 3e0193cdea
commit 8225e1d7e6

@ -2,6 +2,8 @@
h2 { @extend %block-heading; }
h3, .d_decl { @extend %block-label, %label-blue; }
.d_decl { @extend %code; }
.d_decl > small { color: $textColorLight; }
.d_decl > strong { font-weight: $bolderFontWeight; }
p > code, li > code, td > code, dd > code { @extend %label; }

@ -28,17 +28,40 @@ module Docs
node.replace("<dl><dt>#{dt}</dt><dd>#{dd}</dd></dl>")
end
css('.description > .blankline:first-child + .quickindex').each do |node|
node.next_element.remove if node.next_element && node.next_element['class'] == 'blankline'
node.previous_element.remove
node.parent.before(node)
end
css('div.summary', 'div.description').each do |node|
node.name = 'p' unless node.at_css('p')
node.css('.blankline').each { |n| n.replace('<br><br>') }
end
css('.d_decl').each do |node|
node['id'] = node.at_css('.def-anchor')['id'].remove(/\A\./)
constraints = node.css('.constraint').remove
node.content = node.content.strip
node.inner_html = node.inner_html.gsub(/;\s*/, '<br>').remove(/<br>\z/)
node << "<br><br> Constraints:<br> #{constraints.map(&:content).join('<br> ')}" unless constraints.empty?
node['id'] ||= node.at_css('.quickindex[id]')['id'].remove('quickindex.')
node.css('.def-anchor[id]').each do |n|
n.next_element['id'] ||= n['id']
end
node.css('.constraint').each do |n|
n.content = " Constraints: #{n.content}#{n.next.remove.content if n.next.text?}"
n.name = 'small'
n.remove_attribute('class')
end
node.css('code[id]').each do |n|
n.name = 'strong'
n.remove_attribute('class')
end
node.css('*').each do |n|
n.before(n.children).remove unless n.name == 'br' || n.name == 'small' || n.name == 'strong'
end
node.inner_html = node.inner_html.remove(/<br>\z/)
end
css('pre').each do |node|

@ -29,14 +29,10 @@ module Docs
entries = []
css('.book > tr > td > a').each do |node|
entries << ["#{self.name}.#{node.content}", node['href'].remove(/\A#/).remove(/\A\./)]
end
if entries.empty?
css('.quickindex[id]').each do |node|
name = node['id'].remove(/quickindex\.?/)
next if name.empty? || name =~ /\.\d+\z/
next if name.empty? || name =~ /\.\d+\z/ || name =~ /\A([^\.]+)\.\1\z/
entries << ["#{self.name}.#{name}", name]
end
end

Loading…
Cancel
Save