Finish Nim scraper

pull/669/head
Thibaut Courouble 7 years ago
parent 09eea66d57
commit a4ba32ebca

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 31 KiB

@ -1,5 +1,8 @@
[ [
[ [
"2017-09-03",
"New documentation: <a href=\"/nim/\">Nim</a>"
], [
"2017-07-23", "2017-07-23",
"New documentation: <a href=\"/godot/\">Godot</a>" "New documentation: <a href=\"/godot/\">Godot</a>"
], [ ], [

@ -423,6 +423,11 @@ credits = [
'2009-2016 Xiaozhe Wang (chaoslawful)<br>&copy; 2009-2017 Yichun "agentzh" Zhang (章亦春), OpenResty Inc.', '2009-2016 Xiaozhe Wang (chaoslawful)<br>&copy; 2009-2017 Yichun "agentzh" Zhang (章亦春), OpenResty Inc.',
'BSD', 'BSD',
'https://github.com/openresty/lua-nginx-module#copyright-and-license' 'https://github.com/openresty/lua-nginx-module#copyright-and-license'
], [
'Nim',
'2006-2017 Andreas Rumpf',
'MIT',
'https://github.com/nim-lang/Nim#license'
], [ ], [
'Node.js', 'Node.js',
'Joyent, Inc. and other Node contributors<br>Node.js is a trademark of Joyent, Inc.', 'Joyent, Inc. and other Node contributors<br>Node.js is a trademark of Joyent, Inc.',

@ -1,4 +1,4 @@
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+perl+php+python+crystal+rust+scss+sql+typescript */ /* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
var _self = (typeof window !== 'undefined') var _self = (typeof window !== 'undefined')
? window // if in browser ? window // if in browser
: ( : (
@ -568,6 +568,9 @@ Prism.languages.markup = {
'entity': /&#?[\da-z]{1,8};/i 'entity': /&#?[\da-z]{1,8};/i
}; };
Prism.languages.markup['tag'].inside['attr-value'].inside['entity'] =
Prism.languages.markup['entity'];
// Plugin to make entity title show the real entity, idea by Roman Komarov // Plugin to make entity title show the real entity, idea by Roman Komarov
Prism.hooks.add('wrap', function(env) { Prism.hooks.add('wrap', function(env) {
@ -664,7 +667,7 @@ Prism.languages.clike = {
Prism.languages.javascript = Prism.languages.extend('clike', { Prism.languages.javascript = Prism.languages.extend('clike', {
'keyword': /\b(as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield)\b/, 'keyword': /\b(as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield)\b/,
'number': /\b-?(0x[\dA-Fa-f]+|0b[01]+|0o[0-7]+|\d*\.?\d+([Ee][+-]?\d+)?|NaN|Infinity)\b/, 'number': /\b-?(0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|\d*\.?\d+([Ee][+-]?\d+)?|NaN|Infinity)\b/,
// Allow for all non-ASCII characters (See http://stackoverflow.com/a/2008444) // Allow for all non-ASCII characters (See http://stackoverflow.com/a/2008444)
'function': /[_$a-zA-Z\xA0-\uFFFF][_$a-zA-Z0-9\xA0-\uFFFF]*(?=\()/i, 'function': /[_$a-zA-Z\xA0-\uFFFF][_$a-zA-Z0-9\xA0-\uFFFF]*(?=\()/i,
'operator': /-[-=]?|\+[+=]?|!=?=?|<<?=?|>>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}/ 'operator': /-[-=]?|\+[+=]?|!=?=?|<<?=?|>>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}/
@ -672,7 +675,7 @@ Prism.languages.javascript = Prism.languages.extend('clike', {
Prism.languages.insertBefore('javascript', 'keyword', { Prism.languages.insertBefore('javascript', 'keyword', {
'regex': { 'regex': {
pattern: /(^|[^/])\/(?!\/)(\[.+?]|\\.|[^/\\\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})]))/, pattern: /(^|[^/])\/(?!\/)(\[[^\]\r\n]+]|\\.|[^/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})]))/,
lookbehind: true, lookbehind: true,
greedy: true greedy: true
} }
@ -710,6 +713,7 @@ if (Prism.languages.markup) {
} }
Prism.languages.js = Prism.languages.javascript; Prism.languages.js = Prism.languages.javascript;
Prism.languages.c = Prism.languages.extend('clike', { Prism.languages.c = Prism.languages.extend('clike', {
'keyword': /\b(asm|typeof|inline|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)\b/, 'keyword': /\b(asm|typeof|inline|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)\b/,
'operator': /\-[>-]?|\+\+?|!=?|<<?=?|>>?=?|==?|&&?|\|?\||[~^%?*\/]/, 'operator': /\-[>-]?|\+\+?|!=?|<<?=?|>>?=?|==?|&&?|\|?\||[~^%?*\/]/,
@ -1245,6 +1249,39 @@ Prism.languages.nginx = Prism.languages.extend('clike', {
Prism.languages.insertBefore('nginx', 'keyword', { Prism.languages.insertBefore('nginx', 'keyword', {
'variable': /\$[a-z_]+/i 'variable': /\$[a-z_]+/i
}); });
Prism.languages.nim = {
'comment': /#.*/,
// Double-quoted strings can be prefixed by an identifier (Generalized raw string literals)
// Character literals are handled specifically to prevent issues with numeric type suffixes
'string': {
pattern: /(?:(?:\b(?!\d)(?:\w|\\x[8-9a-fA-F][0-9a-fA-F])+)?(?:"""[\s\S]*?"""(?!")|"(?:\\[\s\S]|""|[^"\\])*")|'(?:\\(?:\d+|x[\da-fA-F]{2}|.)|[^'])')/,
greedy: true
},
// The negative look ahead prevents wrong highlighting of the .. operator
'number': /\b(?:0[xXoObB][\da-fA-F_]+|\d[\d_]*(?:(?!\.\.)\.[\d_]*)?(?:[eE][+-]?\d[\d_]*)?)(?:'?[iuf]\d*)?/,
'keyword': /\b(?:addr|as|asm|atomic|bind|block|break|case|cast|concept|const|continue|converter|defer|discard|distinct|do|elif|else|end|enum|except|export|finally|for|from|func|generic|if|import|include|interface|iterator|let|macro|method|mixin|nil|object|out|proc|ptr|raise|ref|return|static|template|try|tuple|type|using|var|when|while|with|without|yield)\b/,
'function': {
pattern: /(?:(?!\d)(?:\w|\\x[8-9a-fA-F][0-9a-fA-F])+|`[^`\r\n]+`)\*?(?:\[[^\]]+\])?(?=\s*\()/,
inside: {
'operator': /\*$/
}
},
// We don't want to highlight operators inside backticks
'ignore': {
pattern: /`[^`\r\n]+`/,
inside: {
'punctuation': /`/
}
},
'operator': {
// Look behind and look ahead prevent wrong highlighting of punctuations [. .] {. .} (. .)
// but allow the slice operator .. to take precedence over them
// One can define his own operators in Nim so all combination of operators might be an operator.
pattern: /(^|[({\[](?=\.\.)|(?![({\[]\.).)(?:(?:[=+\-*\/<>@$~&%|!?^:\\]|\.\.|\.(?![)}\]]))+|\b(?:and|div|of|or|in|is|isnot|mod|not|notin|shl|shr|xor)\b)/m,
lookbehind: true
},
'punctuation': /[({\[]\.|\.[)}\]]|[`(){}\[\],:]/
};
Prism.languages.perl = { Prism.languages.perl = {
'comment': [ 'comment': [
{ {

@ -67,7 +67,6 @@
'pages/meteor', 'pages/meteor',
'pages/modernizr', 'pages/modernizr',
'pages/moment', 'pages/moment',
'pages/nim',
'pages/nginx', 'pages/nginx',
'pages/node', 'pages/node',
'pages/npm', 'pages/npm',

@ -67,7 +67,6 @@
'pages/meteor', 'pages/meteor',
'pages/modernizr', 'pages/modernizr',
'pages/moment', 'pages/moment',
'pages/nim',
'pages/nginx', 'pages/nginx',
'pages/node', 'pages/node',
'pages/npm', 'pages/npm',

@ -77,6 +77,11 @@
} }
} }
.token.important { .token.important,
.token.bold {
font-weight: $boldFontWeight; font-weight: $boldFontWeight;
} }
.token.italic {
font-style: italic;
}

@ -173,3 +173,4 @@
._icon-electron:before { background-position: -2rem -2rem; @extend %doc-icon-2; } ._icon-electron:before { background-position: -2rem -2rem; @extend %doc-icon-2; }
._icon-falcon:before { background-position: -3rem -2rem; @extend %doc-icon-2; } ._icon-falcon:before { background-position: -3rem -2rem; @extend %doc-icon-2; }
._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; } ._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; }
._icon-nim:before { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }

@ -1,81 +0,0 @@
._nim {
@extend %simple;
@if $style == 'dark' {
span.DecNumber { color: #AE81FF; }
span.BinNumber { color: #AE81FF; }
span.HexNumber { color: #AE81FF; }
span.OctNumber { color: #AE81FF; }
span.FloatNumber { color: #AE81FF; }
span.Identifier { color: #F8F8F2; }
span.Keyword { font-weight: 600; color: #F92672; }
span.StringLit { color: #E6DB74; }
span.LongStringLit { color: #E6DB74; }
span.CharLit { color: #E6DB74; }
span.EscapeSequence { color: white; }
span.Operator { color: white; }
span.Punctuation {color: white; }
span.Comment, span.LongComment {
font-style: italic;
font-weight: 400;
color: #75715E; }
span.RegularExpression { color: darkviolet; }
span.TagStart { color: #F92672; }
span.TagEnd { color: #F92672; }
span.Key { color: #AE81FF; }
span.Value { color: #AE81FF; }
span.RawData { color: #a4255b; }
span.Assembler { color: #AE81FF; }
span.Preprocessor { color: #AE81FF; }
span.Directive { color: #AE81FF; }
span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
span.Other { color: white; }
/* Pop type, const, proc, and iterator defs in nim def blocks */
dt pre > span.Identifier, dt pre > span.Operator { color: #529B2F; font-weight: 700; }
} @else {
span.DecNumber { color: #252dbe; }
span.BinNumber { color: #252dbe; }
span.HexNumber { color: #252dbe; }
span.OctNumber { color: #252dbe; }
span.FloatNumber { color: #252dbe; }
span.Identifier { color: #3b3b3b; }
span.Keyword { font-weight: 600; color: #5e8f60; }
span.StringLit { color: #a4255b; }
span.LongStringLit { color: #a4255b; }
span.CharLit { color: #a4255b; }
span.EscapeSequence { color: black; }
span.Operator { color: black; }
span.Punctuation {color: black; }
span.Comment, span.LongComment {
font-style: italic;
font-weight: 400;
color: #484a86; }
span.RegularExpression { color: darkviolet; }
span.TagStart { color: darkviolet; }
span.TagEnd { color: darkviolet; }
span.Key { color: #252dbe; }
span.Value { color: #252dbe; }
span.RawData { color: #a4255b; }
span.Assembler { color: #252dbe; }
span.Preprocessor { color: #252dbe; }
span.Directive { color: #252dbe; }
span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
span.Other { color: black; }
/* Pop type, const, proc, and iterator defs in nim def blocks */
dt pre > span.Identifier, dt pre > span.Operator { color: #155da4; font-weight: 700; }
}
dt pre > span.Identifier ~ span.Identifier, dt pre > span.Operator ~ span.Identifier {
color: inherit;
font-weight: inherit; }
dt pre > span.Operator ~ span.Identifier, dt pre > span.Operator ~ span.Operator {
color: inherit;
font-weight: inherit; }
}

@ -38,6 +38,7 @@
._markdown, ._markdown,
._mocha, ._mocha,
._mongoose, ._mongoose,
._nim,
._redux, ._redux,
._requirejs, ._requirejs,
._typescript, ._typescript,

@ -4,26 +4,60 @@ module Docs
def call def call
@doc = at_css('#documentId .container') @doc = at_css('#documentId .container')
css('.docinfo').remove css('.docinfo', '.footer', 'blockquote > p:empty', '.link-seesrc').remove
content = at_css('#content') css('h1:not(.title), h2, h3, h4').each do |node|
if content != nil node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
at_css('#content').remove_attribute('class')
@doc.add_child(at_css('#content').inner_html)
end end
css('> div.row').remove if content = at_css('#content')
content.prepend_child at_css('h1.title')
@doc = content
end
css('pre').each do |node| if root_page?
node['data-language'] = 'nim' at_css('h1').content = 'Nim Documentation'
end end
# remove link from headers
css('h1 > a', 'h2 > a', 'h3 > a', 'h4 > a').each do |node| css('h1 > a', 'h2 > a', 'h3 > a', 'h4 > a').each do |node|
node.parent['id'] = node['id'] node.parent['id'] = node['id'] if node['id']
node.parent.content = node.content node.before(node.children).remove
end
css('a[name]').each do |node|
node.next_element['id'] = node['name']
node.remove
end
css('pre').each do |node|
node.content = node.content.strip
node['data-language'] = 'nim' unless node.content =~ /\A[\w\-\_\:\=\ ]+\z/
end end
css('tt').each do |node|
node.name = 'code'
end
css('cite').each do |node|
node.name = 'em'
end
css('.section').each do |node|
node.first_element_child['id'] = node['id'] if node['id']
node.before(node.children).remove
end
css('span.pre').each do |node|
node.before(node.children).remove
end
css('blockquote > pre:only-child', 'blockquote > dl:only-child', 'blockquote > table').each do |node|
node.parent.before(node.parent.children).remove
end
css('a', 'dl', 'table', 'code').remove_attr('class')
css('table').remove_attr('border')
doc doc
end end
end end

@ -1,49 +1,62 @@
module Docs module Docs
class Nim class Nim
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def get_type def get_name
at_css('h1').content name = at_css('h1').content
name.remove! 'Module '
name.remove! ' User Guide'
name.remove! ' User\'s manual'
name.remove! %r{ \-.*}
name.strip!
name
end end
def get_name def get_type
at_css('h1').content if name.include?('Tutorial')
'Tutorial'
elsif slug == 'manual'
'Manual'
elsif at_css('h1').content.include?('Module ')
name
else
'Reference'
end
end end
def additional_entries def additional_entries
entries = [] entries = []
if get_name.start_with? 'Module '
module_name = get_name[7..-1] if at_css('h1').content.include?('Module ')
css('div .section').map do |node| css('#toc-list > li > .simple-toc-section').each do |node|
section_node = node.at_css('h1 a') type = node.previous_element.content.strip
if section_node != nil
section_name = section_node.content.strip node.css('a.reference:not(.reference-toplevel)').each do |n|
items_node = node.at_css('dl.item') n.css('span').remove
if items_node != nil name = n.content.strip
items_node.css('dt a').map do |item_node| name << '()' if (type == 'Procs' || type == 'Templates') && !name.include?('`')
item_name = item_node['name'] name.remove! '`'
if item_name.include? ',' name.prepend "#{self.name}."
item_name = item_name.sub(',', '(') + ')' id = n['href'].remove('#')
end entries << [name, id] unless entries.any? { |e| e[0] == name }
entries << [module_name + '.' + item_name, item_node.parent['id']]
end
end
end end
end end
else elsif slug == 'manual'
css('h1', 'h2', 'h3').map do |node| css('#toc-list > li > a').each do |node|
id = node['id']
name = node.content.strip name = node.content.strip
if id != nil next if name.start_with?('About')
entries << [name, id] id = node['href'].remove('#')
else entries << [name, id]
a = node.at_css('a') end
if a != nil
id = a['id'] css('#toc-list > ul').each do |node|
entries << [name, id] type = node.previous_element.content.strip
end
node.css('> li > a').each do |n|
entries << [n.content.strip, n['href'].remove('#'), "Manual: #{type}"]
end end
end end
end end
entries entries
end end
end end

@ -2,20 +2,20 @@ module Docs
class Nim < UrlScraper class Nim < UrlScraper
self.type = 'nim' self.type = 'nim'
self.release = '0.17.0' self.release = '0.17.0'
self.base_url = 'https://nim-lang.org/docs/'
self.root_path = 'overview.html'
self.links = { self.links = {
home: 'https://nim-lang.org/', home: 'https://nim-lang.org/',
code: 'https://github.com/nim-lang/Nim' code: 'https://github.com/nim-lang/Nim'
} }
self.base_url = 'https://nim-lang.org/'
self.root_path = 'docs/overview.html'
html_filters.push 'nim/entries', 'nim/clean_html' html_filters.push 'nim/entries', 'nim/clean_html'
options[:skip] = %w(cdn-cgi/l/email-protection docs/theindex.html docs/docgen.txt) options[:skip] = %w(theindex.html docgen.txt)
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; 2006&ndash;2017 Andreas Rumpf<br> &copy; 2006&ndash;2017 Andreas Rumpf<br>
All rights reserved. Licensed under the MIT License. Licensed under the MIT License.
HTML HTML
end end
end end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 789 B

After

Width:  |  Height:  |  Size: 283 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 531 B

@ -1 +1 @@
https://nim-lang.org/assets/img/logo.svg https://github.com/nim-lang/website/tree/master/jekyll/assets/img
Loading…
Cancel
Save