From 31d5b9d1e063959d0c2b344be4cc7872169952ba Mon Sep 17 00:00:00 2001 From: Thibaut Date: Sun, 16 Nov 2014 18:00:54 -0500 Subject: [PATCH] Improve C and C++ scrapers Fixes #138. --- assets/stylesheets/pages/_c.scss | 24 ++++++++++++++++++++---- lib/docs/filters/c/clean_html.rb | 23 ++++++++++++++++------- lib/docs/filters/cpp/entries.rb | 2 ++ lib/docs/scrapers/c.rb | 5 +++++ lib/docs/scrapers/cpp.rb | 5 +++++ 5 files changed, 48 insertions(+), 11 deletions(-) diff --git a/assets/stylesheets/pages/_c.scss b/assets/stylesheets/pages/_c.scss index 20fa61f5..5f68c61b 100644 --- a/assets/stylesheets/pages/_c.scss +++ b/assets/stylesheets/pages/_c.scss @@ -1,7 +1,13 @@ ._c { > h2, > h3 { @extend %block-heading; } > h4 { @extend %block-label, %label-blue; } - > p > code { @extend %label; } + .fmbox { @extend %note; } + code, .t-mark, .t-mark-rev { @extend %label; } + + .t-mark, .t-mark-rev { + white-space: nowrap; + @extend %label-green; + } .t-dcl-begin pre { margin: 0; @@ -19,9 +25,19 @@ } .t-sdsc-nopad dl, .t-sdsc-nopad dd { margin: 0; } - td > h5 { - margin: 0; - line-height: inherit; + td { + > h3, > h5 { + margin: 0; + line-height: inherit; + } + + > ul { margin: 0; } + + > .t-dsc-member-div > div { // utility/functional + float: left; + + + div { margin-left: .5em; } + } } .t-inheritance-diagram { diff --git a/lib/docs/filters/c/clean_html.rb b/lib/docs/filters/c/clean_html.rb index 1a637b73..6c417084 100644 --- a/lib/docs/filters/c/clean_html.rb +++ b/lib/docs/filters/c/clean_html.rb @@ -2,13 +2,11 @@ module Docs class C class CleanHtmlFilter < Filter def call - if root_page? - doc.inner_html = ' ' - return doc - end + css('h1').remove if root_page? - css('#siteSub', '#contentSub', '.printfooter', '.t-navbar', '.editsection', '#toc', '.t-dsc-sep', '.t-dcl-sep', - '#catlinks', '.ambox-notice', '.mw-cite-backlink', '.t-sdsc-sep:first-child:last-child').remove + css('#siteSub', '#contentSub', '.printfooter', '.t-navbar', '.editsection', '#toc', + '.t-dsc-sep', '.t-dcl-sep', '#catlinks', '.ambox-notice', '.mw-cite-backlink', + '.t-sdsc-sep:first-child:last-child', '.t-example-live-link').remove css('#bodyContent', '.mw-content-ltr', 'span[style]').each do |node| node.before(node.children).remove @@ -26,10 +24,16 @@ module Docs node.content = ' ' if node.content.empty? end - css('tt').each do |node| + css('tt', 'span > span.source-cpp').each do |node| node.name = 'code' end + css('div > span.source-cpp').each do |node| + node.name = 'pre' + node.inner_html = node.inner_html.gsub('
', "\n") + node.content = node.content + end + css('div > a > img[alt="About this image"]').each do |node| node.parent.parent.remove end @@ -38,6 +42,11 @@ module Docs node['href'] = node['href'].remove('.html') end + css('h1 ~ .fmbox').each do |node| + node.name = 'div' + node.content = node.content + end + doc end end diff --git a/lib/docs/filters/cpp/entries.rb b/lib/docs/filters/cpp/entries.rb index fcdd64a0..b2a6a1fc 100644 --- a/lib/docs/filters/cpp/entries.rb +++ b/lib/docs/filters/cpp/entries.rb @@ -22,6 +22,8 @@ module Docs def get_type if at_css('#firstHeading').content.include?('C++ keyword') 'Keywords' + elsif subpath.start_with?('experimental') + 'Experimental libraries' elsif type = at_css('.t-navbar > div:nth-child(4) > :first-child').try(:content) type.strip! type.remove! ' library' diff --git a/lib/docs/scrapers/c.rb b/lib/docs/scrapers/c.rb index a294e698..a4af5910 100644 --- a/lib/docs/scrapers/c.rb +++ b/lib/docs/scrapers/c.rb @@ -14,6 +14,11 @@ module Docs options[:root_title] = 'C Programming Language' options[:skip] = %w(language/history.html) + options[:fix_urls] = ->(url) do + url.sub! %r{\A.+/http%3A/}, "http://" + url + end + options[:attribution] = <<-HTML © cppreference.com
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0. diff --git a/lib/docs/scrapers/cpp.rb b/lib/docs/scrapers/cpp.rb index 55992808..8066966b 100644 --- a/lib/docs/scrapers/cpp.rb +++ b/lib/docs/scrapers/cpp.rb @@ -22,6 +22,11 @@ module Docs ) options[:only_patterns] = [/\.html\z/] + options[:fix_urls] = ->(url) do + url.sub! %r{\A.+/http%3A/}, "http://" + url + end + options[:attribution] = <<-HTML © cppreference.com
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.