Merge pull request #1606 from MasterEnoc/cpp

C/C++ update
pull/1763/head
Simon Legner 3 years ago committed by GitHub
commit 45d6e22dca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -39,7 +39,7 @@
'pages/async', 'pages/async',
'pages/bash', 'pages/bash',
'pages/bootstrap', 'pages/bootstrap',
'pages/c', 'pages/cppref',
'pages/cakephp', 'pages/cakephp',
'pages/clojure', 'pages/clojure',
'pages/codeception', 'pages/codeception',

@ -1,4 +1,4 @@
._c { ._cppref {
> h2, > h3 { @extend %block-heading; } > h2, > h3 { @extend %block-heading; }
> h4 { @extend %block-label, %label-blue; } > h4 { @extend %block-label, %label-blue; }
.fmbox { @extend %note; } .fmbox { @extend %note; }

@ -22,6 +22,9 @@ module Docs
end end
def get_type def get_type
return "C keywords" if slug =~ /keyword/
type = at_css('.t-navbar > div:nth-child(4) > :first-child').try(:content) type = at_css('.t-navbar > div:nth-child(4) > :first-child').try(:content)
type.strip! type.strip!
type.remove! ' library' type.remove! ' library'

@ -1,11 +0,0 @@
module Docs
class C
class FixUrlsFilter < Filter
def call
html.gsub! File.join(C.base_url, C.root_path), C.base_url[0..-2]
html.gsub! %r{#{C.base_url}([^"']+?)\.html}, "#{C.base_url}\\1"
html
end
end
end
end

@ -1,6 +1,8 @@
module Docs module Docs
class Cpp class Cpp
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
@@duplicate_names = []
REPLACE_NAMES = { REPLACE_NAMES = {
'Error directive' => '#error directive', 'Error directive' => '#error directive',
'Filename and line information' => '#line directive', 'Filename and line information' => '#line directive',
@ -11,7 +13,8 @@ module Docs
def get_name def get_name
name = at_css('#firstHeading').content.strip name = at_css('#firstHeading').content.strip
name = format_name(name) name = format_name(name)
name.split(',').first name = name.split(',').first
name
end end
def get_type def get_type
@ -61,6 +64,21 @@ module Docs
REPLACE_NAMES[name] || name REPLACE_NAMES[name] || name
end end
# Avoid duplicate pages, these duplicate page are the same page for
# multiple functions that are organized in the same page because provide
# similar behavior but have different name.
def entries
entries = []
if !(@@duplicate_names.include?(name))
@@duplicate_names.push(name)
entries << default_entry if root_page? || include_default_entry?
entries.concat(additional_entries)
build_entries(entries)
end
end
end end
end end
end end

@ -1,11 +0,0 @@
module Docs
class Cpp
class FixUrlsFilter < Filter
def call
html.gsub! File.join(Cpp.base_url, Cpp.root_path), Cpp.base_url[0..-2]
html.gsub! %r{#{Cpp.base_url}([^"']+?)\.html}, "#{Cpp.base_url}\\1"
html
end
end
end
end

@ -1,5 +1,5 @@
module Docs module Docs
class C class Cppref
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
css('h1').remove if root_page? css('h1').remove if root_page?
@ -109,6 +109,20 @@ module Docs
node['src'] = node['src'].sub! %r{http://en.cppreference.com/common/([^"']+?)\.svg}, 'http://upload.cppreference.com/mwiki/\1.svg' node['src'] = node['src'].sub! %r{http://en.cppreference.com/common/([^"']+?)\.svg}, 'http://upload.cppreference.com/mwiki/\1.svg'
end end
# temporary solution due lack of mathjax/mathml support
css('.t-mfrac').each do |node|
fraction = Nokogiri::XML::Node.new('span', doc)
node.css('td').each do |node|
fraction.add_child("<span>#{node.content}</span>")
end
fraction.last_element_child().before("<span>/</span>")
node.before(fraction)
node.remove
end
doc doc
end end
end end

@ -1,5 +1,5 @@
module Docs module Docs
class C class Cppref
class FixCodeFilter < Filter class FixCodeFilter < Filter
def call def call
css('div > span.source-c', 'div > span.source-cpp').each do |node| css('div > span.source-c', 'div > span.source-cpp').each do |node|

@ -1,42 +0,0 @@
module Docs
class C < FileScraper
self.type = 'c'
self.base_url = 'http://en.cppreference.com/w/c/'
self.root_path = 'header.html'
html_filters.insert_before 'clean_html', 'c/fix_code'
html_filters.push 'c/entries', 'c/clean_html', 'title'
text_filters.push 'c/fix_urls'
options[:decode_and_clean_paths] = true
options[:container] = '#content'
options[:title] = false
options[:root_title] = 'C Programming Language'
options[:skip] = %w(language/history.html)
options[:skip_patterns] = [/experimental/]
options[:fix_urls] = ->(url) do
url.sub! %r{\A.+/http%3A/}, 'http://'
url.sub! 'http://en.cppreference.com/upload.cppreference.com', 'http://upload.cppreference.com'
url
end
options[:attribution] = <<-HTML
&copy; cppreference.com<br>
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
HTML
def get_latest_version(opts)
doc = fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', opts)
link = doc.at_css('a[title^="File:"]')
date = link.content.scan(/(\d+)\./)[0][0]
DateTime.strptime(date, '%Y%m%d').to_time.to_i
end
private
def file_path_for(*)
URI.unescape(super)
end
end
end

@ -1,52 +0,0 @@
module Docs
class Cpp < FileScraper
self.name = 'C++'
self.slug = 'cpp'
self.type = 'c'
self.base_url = 'http://en.cppreference.com/w/cpp/'
self.root_path = 'header.html'
html_filters.insert_before 'clean_html', 'c/fix_code'
html_filters.push 'cpp/entries', 'c/clean_html', 'title'
text_filters.push 'cpp/fix_urls'
options[:decode_and_clean_paths] = true
options[:container] = '#content'
options[:title] = false
options[:root_title] = 'C++ Programming Language'
options[:skip] = %w(
language/extending_std.html
language/history.html
regex/ecmascript.html
regex/regex_token_iterator/operator_cmp.html
)
options[:skip_patterns] = [/experimental/]
options[:only_patterns] = [/\.html\z/]
options[:fix_urls] = ->(url) do
url = CGI.unescape(url)
url.sub! %r{\A.+/http%3A/}, 'http://'
url.sub! 'http://en.cppreference.com/upload.cppreference.com', 'http://upload.cppreference.com'
url
end
options[:attribution] = <<-HTML
&copy; cppreference.com<br>
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
HTML
# Same as get_latest_version in lib/docs/scrapers/c.rb
def get_latest_version(opts)
doc = fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', opts)
link = doc.at_css('a[title^="File:"]')
date = link.content.scan(/(\d+)\./)[0][0]
DateTime.strptime(date, '%Y%m%d').to_time.to_i
end
private
def file_path_for(*)
URI.unescape(super)
end
end
end

@ -0,0 +1,12 @@
module Docs
class C < Cppref
self.name = 'c'
self.slug = 'c'
self.base_url = 'https://en.cppreference.com/w/c/'
html_filters.insert_before 'cppref/clean_html', 'c/entries'
options[:root_title] = 'C Programming Language'
end
end

@ -0,0 +1,19 @@
module Docs
class Cpp < Cppref
self.name = 'C++'
self.slug = 'cpp'
self.base_url = 'https://en.cppreference.com/w/cpp/'
html_filters.insert_before 'cppref/clean_html', 'cpp/entries'
options[:root_title] = 'C++ Programming Language'
options[:skip] = %w(
language/extending_std.html
language/history.html
regex/ecmascript.html
regex/regex_token_iterator/operator_cmp.html
)
end
end

@ -0,0 +1,33 @@
module Docs
class Cppref < UrlScraper
self.abstract = true
self.type = 'cppref'
self.root_path = 'header'
html_filters.insert_before 'clean_html', 'cppref/fix_code'
html_filters.push 'cppref/clean_html', 'title'
options[:decode_and_clean_paths] = true
options[:container] = '#content'
options[:title] = false
options[:skip] = %w(language/history.html)
options[:skip_patterns] = [
/experimental/
]
options[:attribution] = <<-HTML
&copy; cppreference.com<br>
Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
HTML
# Check if the 'headers' page has changed
def get_latest_version(opts)
doc = fetch_doc(self.base_url + self.root_path, opts)
date = doc.at_css('#footer-info-lastmod').content
date = date.match(/[[:digit:]]{1,2} .* [[:digit:]]{4}/).to_s
date = DateTime.strptime(date, '%e %B %Y').to_time.to_i
end
end
end
Loading…
Cancel
Save