ocaml: polish scraper

pull/1236/head
Simon Legner 4 years ago
parent c39f1a138b
commit 7128d2d988

@ -2,21 +2,32 @@ module Docs
class Ocaml class Ocaml
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
css('pre').each do |node|
css('pre, .caml-example').each do |node|
span = node.at_css('span[id]')
node['id'] = span['id'] if span
node['data-type'] = "#{span.content} [#{at_css('h1').content}]" if span
node['data-language'] = 'ocaml' node['data-language'] = 'ocaml'
node.name = 'pre'
node.content = node.content
end end
css('.caml-input').each do |node| css('.caml-input').each do |node|
node.content = '# ' + node.content.strip node.content = '# ' + node.content.strip
end end
css('.caml-example').each do |node| css('.maintitle *[style]').each do |node|
node.name = 'pre' node.remove_attribute 'style'
node.traverse { |n| n.remove if n.text? && n.text !~ /\S/ } end
node['data-language'] = 'ocaml' css('h1').each do |node|
node.content = node.content
table = node.ancestors('table.center')
table.first.before(node).remove if table.present?
end end
css('.navbar').remove
doc doc
end end
end end

@ -37,11 +37,8 @@ module Docs
module_node = css('h1').at_css('span') module_node = css('h1').at_css('span')
css('pre').each do |node| css('pre > span[id]').each do |span|
next unless span = node.at_css('span') if span['id'].start_with?('VAL')
if span['id'].nil?
next
elsif span['id'].start_with?('VAL')
entry_type = 'Values' entry_type = 'Values'
elsif span['id'].start_with?('MODULE') elsif span['id'].start_with?('MODULE')
entry_type = 'Modules' entry_type = 'Modules'
@ -52,12 +49,9 @@ module Docs
end end
name = span.content name = span.content
if not module_node.nil? name += " [#{module_node.content}]" unless module_node.nil?
name = "#{name} [#{module_node.content}]"
end
entries << [name, span['id'], entry_type] entries << [name, span['id'], entry_type]
end end
entries entries
end end
end end

Loading…
Cancel
Save