Finish Nim scraper

pull/669/head
Thibaut Courouble 7 years ago
parent 09eea66d57
commit a4ba32ebca

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 31 KiB

@ -1,5 +1,8 @@
[
[
"2017-09-03",
"New documentation: <a href=\"/nim/\">Nim</a>"
], [
"2017-07-23",
"New documentation: <a href=\"/godot/\">Godot</a>"
], [

@ -423,6 +423,11 @@ credits = [
'2009-2016 Xiaozhe Wang (chaoslawful)<br>&copy; 2009-2017 Yichun "agentzh" Zhang (章亦春), OpenResty Inc.',
'BSD',
'https://github.com/openresty/lua-nginx-module#copyright-and-license'
], [
'Nim',
'2006-2017 Andreas Rumpf',
'MIT',
'https://github.com/nim-lang/Nim#license'
], [
'Node.js',
'Joyent, Inc. and other Node contributors<br>Node.js is a trademark of Joyent, Inc.',

@ -1,4 +1,4 @@
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+perl+php+python+crystal+rust+scss+sql+typescript */
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
var _self = (typeof window !== 'undefined')
? window // if in browser
: (
@ -568,6 +568,9 @@ Prism.languages.markup = {
'entity': /&#?[\da-z]{1,8};/i
};
Prism.languages.markup['tag'].inside['attr-value'].inside['entity'] =
Prism.languages.markup['entity'];
// Plugin to make entity title show the real entity, idea by Roman Komarov
Prism.hooks.add('wrap', function(env) {
@ -664,7 +667,7 @@ Prism.languages.clike = {
Prism.languages.javascript = Prism.languages.extend('clike', {
'keyword': /\b(as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield)\b/,
'number': /\b-?(0x[\dA-Fa-f]+|0b[01]+|0o[0-7]+|\d*\.?\d+([Ee][+-]?\d+)?|NaN|Infinity)\b/,
'number': /\b-?(0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|\d*\.?\d+([Ee][+-]?\d+)?|NaN|Infinity)\b/,
// Allow for all non-ASCII characters (See http://stackoverflow.com/a/2008444)
'function': /[_$a-zA-Z\xA0-\uFFFF][_$a-zA-Z0-9\xA0-\uFFFF]*(?=\()/i,
'operator': /-[-=]?|\+[+=]?|!=?=?|<<?=?|>>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}/
@ -672,7 +675,7 @@ Prism.languages.javascript = Prism.languages.extend('clike', {
Prism.languages.insertBefore('javascript', 'keyword', {
'regex': {
pattern: /(^|[^/])\/(?!\/)(\[.+?]|\\.|[^/\\\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})]))/,
pattern: /(^|[^/])\/(?!\/)(\[[^\]\r\n]+]|\\.|[^/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})]))/,
lookbehind: true,
greedy: true
}
@ -710,6 +713,7 @@ if (Prism.languages.markup) {
}
Prism.languages.js = Prism.languages.javascript;
Prism.languages.c = Prism.languages.extend('clike', {
'keyword': /\b(asm|typeof|inline|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)\b/,
'operator': /\-[>-]?|\+\+?|!=?|<<?=?|>>?=?|==?|&&?|\|?\||[~^%?*\/]/,
@ -1245,6 +1249,39 @@ Prism.languages.nginx = Prism.languages.extend('clike', {
Prism.languages.insertBefore('nginx', 'keyword', {
'variable': /\$[a-z_]+/i
});
Prism.languages.nim = {
'comment': /#.*/,
// Double-quoted strings can be prefixed by an identifier (Generalized raw string literals)
// Character literals are handled specifically to prevent issues with numeric type suffixes
'string': {
pattern: /(?:(?:\b(?!\d)(?:\w|\\x[8-9a-fA-F][0-9a-fA-F])+)?(?:"""[\s\S]*?"""(?!")|"(?:\\[\s\S]|""|[^"\\])*")|'(?:\\(?:\d+|x[\da-fA-F]{2}|.)|[^'])')/,
greedy: true
},
// The negative look ahead prevents wrong highlighting of the .. operator
'number': /\b(?:0[xXoObB][\da-fA-F_]+|\d[\d_]*(?:(?!\.\.)\.[\d_]*)?(?:[eE][+-]?\d[\d_]*)?)(?:'?[iuf]\d*)?/,
'keyword': /\b(?:addr|as|asm|atomic|bind|block|break|case|cast|concept|const|continue|converter|defer|discard|distinct|do|elif|else|end|enum|except|export|finally|for|from|func|generic|if|import|include|interface|iterator|let|macro|method|mixin|nil|object|out|proc|ptr|raise|ref|return|static|template|try|tuple|type|using|var|when|while|with|without|yield)\b/,
'function': {
pattern: /(?:(?!\d)(?:\w|\\x[8-9a-fA-F][0-9a-fA-F])+|`[^`\r\n]+`)\*?(?:\[[^\]]+\])?(?=\s*\()/,
inside: {
'operator': /\*$/
}
},
// We don't want to highlight operators inside backticks
'ignore': {
pattern: /`[^`\r\n]+`/,
inside: {
'punctuation': /`/
}
},
'operator': {
// Look behind and look ahead prevent wrong highlighting of punctuations [. .] {. .} (. .)
// but allow the slice operator .. to take precedence over them
// One can define his own operators in Nim so all combination of operators might be an operator.
pattern: /(^|[({\[](?=\.\.)|(?![({\[]\.).)(?:(?:[=+\-*\/<>@$~&%|!?^:\\]|\.\.|\.(?![)}\]]))+|\b(?:and|div|of|or|in|is|isnot|mod|not|notin|shl|shr|xor)\b)/m,
lookbehind: true
},
'punctuation': /[({\[]\.|\.[)}\]]|[`(){}\[\],:]/
};
Prism.languages.perl = {
'comment': [
{

@ -67,7 +67,6 @@
'pages/meteor',
'pages/modernizr',
'pages/moment',
'pages/nim',
'pages/nginx',
'pages/node',
'pages/npm',

@ -67,7 +67,6 @@
'pages/meteor',
'pages/modernizr',
'pages/moment',
'pages/nim',
'pages/nginx',
'pages/node',
'pages/npm',

@ -77,6 +77,11 @@
}
}
.token.important {
.token.important,
.token.bold {
font-weight: $boldFontWeight;
}
.token.italic {
font-style: italic;
}

@ -173,3 +173,4 @@
._icon-electron:before { background-position: -2rem -2rem; @extend %doc-icon-2; }
._icon-falcon:before { background-position: -3rem -2rem; @extend %doc-icon-2; }
._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; }
._icon-nim:before { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }

@ -1,81 +0,0 @@
._nim {
@extend %simple;
@if $style == 'dark' {
span.DecNumber { color: #AE81FF; }
span.BinNumber { color: #AE81FF; }
span.HexNumber { color: #AE81FF; }
span.OctNumber { color: #AE81FF; }
span.FloatNumber { color: #AE81FF; }
span.Identifier { color: #F8F8F2; }
span.Keyword { font-weight: 600; color: #F92672; }
span.StringLit { color: #E6DB74; }
span.LongStringLit { color: #E6DB74; }
span.CharLit { color: #E6DB74; }
span.EscapeSequence { color: white; }
span.Operator { color: white; }
span.Punctuation {color: white; }
span.Comment, span.LongComment {
font-style: italic;
font-weight: 400;
color: #75715E; }
span.RegularExpression { color: darkviolet; }
span.TagStart { color: #F92672; }
span.TagEnd { color: #F92672; }
span.Key { color: #AE81FF; }
span.Value { color: #AE81FF; }
span.RawData { color: #a4255b; }
span.Assembler { color: #AE81FF; }
span.Preprocessor { color: #AE81FF; }
span.Directive { color: #AE81FF; }
span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
span.Other { color: white; }
/* Pop type, const, proc, and iterator defs in nim def blocks */
dt pre > span.Identifier, dt pre > span.Operator { color: #529B2F; font-weight: 700; }
} @else {
span.DecNumber { color: #252dbe; }
span.BinNumber { color: #252dbe; }
span.HexNumber { color: #252dbe; }
span.OctNumber { color: #252dbe; }
span.FloatNumber { color: #252dbe; }
span.Identifier { color: #3b3b3b; }
span.Keyword { font-weight: 600; color: #5e8f60; }
span.StringLit { color: #a4255b; }
span.LongStringLit { color: #a4255b; }
span.CharLit { color: #a4255b; }
span.EscapeSequence { color: black; }
span.Operator { color: black; }
span.Punctuation {color: black; }
span.Comment, span.LongComment {
font-style: italic;
font-weight: 400;
color: #484a86; }
span.RegularExpression { color: darkviolet; }
span.TagStart { color: darkviolet; }
span.TagEnd { color: darkviolet; }
span.Key { color: #252dbe; }
span.Value { color: #252dbe; }
span.RawData { color: #a4255b; }
span.Assembler { color: #252dbe; }
span.Preprocessor { color: #252dbe; }
span.Directive { color: #252dbe; }
span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
span.Other { color: black; }
/* Pop type, const, proc, and iterator defs in nim def blocks */
dt pre > span.Identifier, dt pre > span.Operator { color: #155da4; font-weight: 700; }
}
dt pre > span.Identifier ~ span.Identifier, dt pre > span.Operator ~ span.Identifier {
color: inherit;
font-weight: inherit; }
dt pre > span.Operator ~ span.Identifier, dt pre > span.Operator ~ span.Operator {
color: inherit;
font-weight: inherit; }
}

@ -38,6 +38,7 @@
._markdown,
._mocha,
._mongoose,
._nim,
._redux,
._requirejs,
._typescript,

@ -4,26 +4,60 @@ module Docs
def call
@doc = at_css('#documentId .container')
css('.docinfo').remove
css('.docinfo', '.footer', 'blockquote > p:empty', '.link-seesrc').remove
content = at_css('#content')
if content != nil
at_css('#content').remove_attribute('class')
@doc.add_child(at_css('#content').inner_html)
css('h1:not(.title), h2, h3, h4').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
end
css('> div.row').remove
if content = at_css('#content')
content.prepend_child at_css('h1.title')
@doc = content
end
css('pre').each do |node|
node['data-language'] = 'nim'
if root_page?
at_css('h1').content = 'Nim Documentation'
end
# remove link from headers
css('h1 > a', 'h2 > a', 'h3 > a', 'h4 > a').each do |node|
node.parent['id'] = node['id']
node.parent.content = node.content
node.parent['id'] = node['id'] if node['id']
node.before(node.children).remove
end
css('a[name]').each do |node|
node.next_element['id'] = node['name']
node.remove
end
css('pre').each do |node|
node.content = node.content.strip
node['data-language'] = 'nim' unless node.content =~ /\A[\w\-\_\:\=\ ]+\z/
end
css('tt').each do |node|
node.name = 'code'
end
css('cite').each do |node|
node.name = 'em'
end
css('.section').each do |node|
node.first_element_child['id'] = node['id'] if node['id']
node.before(node.children).remove
end
css('span.pre').each do |node|
node.before(node.children).remove
end
css('blockquote > pre:only-child', 'blockquote > dl:only-child', 'blockquote > table').each do |node|
node.parent.before(node.parent.children).remove
end
css('a', 'dl', 'table', 'code').remove_attr('class')
css('table').remove_attr('border')
doc
end
end

@ -1,49 +1,62 @@
module Docs
class Nim
class EntriesFilter < Docs::EntriesFilter
def get_type
at_css('h1').content
def get_name
name = at_css('h1').content
name.remove! 'Module '
name.remove! ' User Guide'
name.remove! ' User\'s manual'
name.remove! %r{ \-.*}
name.strip!
name
end
def get_name
at_css('h1').content
def get_type
if name.include?('Tutorial')
'Tutorial'
elsif slug == 'manual'
'Manual'
elsif at_css('h1').content.include?('Module ')
name
else
'Reference'
end
end
def additional_entries
entries = []
if get_name.start_with? 'Module '
module_name = get_name[7..-1]
css('div .section').map do |node|
section_node = node.at_css('h1 a')
if section_node != nil
section_name = section_node.content.strip
items_node = node.at_css('dl.item')
if items_node != nil
items_node.css('dt a').map do |item_node|
item_name = item_node['name']
if item_name.include? ','
item_name = item_name.sub(',', '(') + ')'
end
entries << [module_name + '.' + item_name, item_node.parent['id']]
end
end
if at_css('h1').content.include?('Module ')
css('#toc-list > li > .simple-toc-section').each do |node|
type = node.previous_element.content.strip
node.css('a.reference:not(.reference-toplevel)').each do |n|
n.css('span').remove
name = n.content.strip
name << '()' if (type == 'Procs' || type == 'Templates') && !name.include?('`')
name.remove! '`'
name.prepend "#{self.name}."
id = n['href'].remove('#')
entries << [name, id] unless entries.any? { |e| e[0] == name }
end
end
else
css('h1', 'h2', 'h3').map do |node|
id = node['id']
elsif slug == 'manual'
css('#toc-list > li > a').each do |node|
name = node.content.strip
if id != nil
entries << [name, id]
else
a = node.at_css('a')
if a != nil
id = a['id']
entries << [name, id]
end
next if name.start_with?('About')
id = node['href'].remove('#')
entries << [name, id]
end
css('#toc-list > ul').each do |node|
type = node.previous_element.content.strip
node.css('> li > a').each do |n|
entries << [n.content.strip, n['href'].remove('#'), "Manual: #{type}"]
end
end
end
entries
end
end

@ -2,20 +2,20 @@ module Docs
class Nim < UrlScraper
self.type = 'nim'
self.release = '0.17.0'
self.base_url = 'https://nim-lang.org/docs/'
self.root_path = 'overview.html'
self.links = {
home: 'https://nim-lang.org/',
code: 'https://github.com/nim-lang/Nim'
}
self.base_url = 'https://nim-lang.org/'
self.root_path = 'docs/overview.html'
html_filters.push 'nim/entries', 'nim/clean_html'
options[:skip] = %w(cdn-cgi/l/email-protection docs/theindex.html docs/docgen.txt)
options[:skip] = %w(theindex.html docgen.txt)
options[:attribution] = <<-HTML
&copy; 2006&ndash;2017 Andreas Rumpf<br>
All rights reserved. Licensed under the MIT License.
Licensed under the MIT License.
HTML
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 789 B

After

Width:  |  Height:  |  Size: 283 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 531 B

@ -1 +1 @@
https://nim-lang.org/assets/img/logo.svg
https://github.com/nim-lang/website/tree/master/jekyll/assets/img
Loading…
Cancel
Save