Finish Crystal scraper

pull/447/merge
Thibaut Courouble 9 years ago
parent 0324bdca48
commit 3036c712e9

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 118 KiB

After

Width:  |  Height:  |  Size: 118 KiB

@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
(result[@_groupFor(type)] ||= []).push(type)
result.filter (e) -> e.length > 0
GUIDES_RGX = /(^|[\s\(])(guides?|tutorials?|reference|playbooks|getting\ started|manual)($|[\s\):])/i
GUIDES_RGX = /(^|[\s\(])(guides?|tutorials?|reference|book|getting\ started|manual)($|[\s\):])/i
_groupFor: (type) ->
if GUIDES_RGX.test(type.name)

@ -1,5 +1,8 @@
[
[
"2016-07-24",
"New documentation: <a href=\"/crystal/\">Crystal</a>"
], [
"2016-07-03",
"New documentations: <a href=\"/cmake/\">CMake</a> and <a href=\"/matplotlib/\">Matplotlib</a>"
], [

@ -160,6 +160,11 @@ credits = [
'2005-2016 Mozilla Developer Network and individual contributors',
'CC BY-SA',
'https://creativecommons.org/licenses/by-sa/2.5/'
], [
'Crystal',
'2012-2016 Manas Technology Solutions',
'Apache',
'https://raw.githubusercontent.com/crystal-lang/crystal/master/LICENSE'
], [
'D3.js',
'2010-2016 Michael Bostock',

@ -1,6 +0,0 @@
#= require views/pages/base
class app.views.CrystalPage extends app.views.BasePage
prepare: ->
@highlightCode @findAllByTag('pre'), 'ruby'
return

@ -10,6 +10,7 @@ app.views.AngularPage =
app.views.AngularjsPage =
app.views.CakephpPage =
app.views.ChaiPage =
app.views.CrystalPage =
app.views.DrupalPage =
app.views.ElixirPage =
app.views.EmberPage =

@ -39,6 +39,7 @@
'pages/cakephp',
'pages/clojure',
'pages/coffeescript',
'pages/crystal',
'pages/d3',
'pages/dojo',
'pages/drupal',

@ -39,6 +39,7 @@
'pages/cakephp',
'pages/clojure',
'pages/coffeescript',
'pages/crystal',
'pages/d3',
'pages/dojo',
'pages/drupal',

@ -130,6 +130,7 @@
._icon-vue:before { background-position: -3rem -8rem; }
._icon-opentsdb:before { background-position: -4rem -8rem; }
._icon-q:before { background-position: -5rem -8rem; }
._icon-crystal:before { background-position: -6rem -8rem; @extend %darkIconFix !optional; }
._icon-react_native:before { background-position: 0 -9rem; }
._icon-phalcon:before { background-position: -1rem -9rem; }
._icon-matplotlib:before { background-position: -2rem -9rem; }

@ -1,7 +1,26 @@
._crystal {
@extend %simple;
blockquote {
@extend %note;
.signature { @extend %code; }
a.signature, .superclass > a { @extend %label; }
.entry-detail { margin-top: 1em; }
.view-source { float: right; }
.superclass-hierarchy {
list-style: none;
padding: 0;
overflow: hidden;
}
li.superclass {
float: left;
margin: 0 .5em 0 0;
padding: 0;
}
li.superclass + li.superclass:before {
content: '<';
margin-right: .5em;
}
}

@ -2,18 +2,47 @@ module Docs
class Crystal
class CleanHtmlFilter < Filter
def call
slug.start_with?('docs') ? book : api
doc
end
# Remove class attr from div and child nodes
css("div").each do |node|
node.xpath("//@class").remove
def book
@doc = at_css('.page-inner > section')
css('pre > code').each do |node|
node.parent['data-language'] = node['class'][/lang-(\w+)/, 1] if node['class']
node.parent.content = node.parent.content
end
end
def api
@doc = at_css('#main-content')
at_css('h1 + p').remove if root_page?
css('.method-permalink', '.doc + br', 'hr', 'a > br', 'div + br').remove
# Set id attributes on <h1> instead of an empty <a>
css("h1").each do |node|
node["id"] = node.at_css("a")["id"]
css('pre > code').each do |node|
node.parent['data-language'] = 'crystal'
node.parent.content = node.parent.content
end
doc
css('span').each do |node|
node.before(node.children).remove
end
css('div.signature').each do |node|
node.name = 'h3'
node.inner_html = node.inner_html.strip
end
css('.entry-detail a:contains("View source")').each do |node|
node['class'] = 'view-source'
node.content = 'Source'
parent = node.parent
node.ancestors('.entry-detail').first.at_css('h3') << node
parent.remove
end
end
end
end

@ -1,21 +1,74 @@
module Docs
class Crystal
class EntriesFilter < Docs::EntriesFilter
# Set the name to h1 content
def get_name
node = at_css("h1")
node.content.strip
if slug.start_with?('docs/')
name = at_css('.page-inner h1').content.strip
if slug.start_with?('docs/syntax_and_semantics')
name.prepend "#{slug.split('/')[2].titleize}: " if slug.split('/').length > 3
elsif slug.split('/').length > 1
chapter = slug.split('/')[1].titleize.capitalize
name.prepend "#{chapter}: " unless name == chapter
end
name
else
name = at_css('h1').children.last.content.strip
name.remove! %r{\(.*\)}
name
end
end
# Crystal types from url slug
def get_type
slug["blob/master/"] = ""
object, method = *slug.split("/")
object = object.capitalize
method ? object : "Index"
return if root_page?
if slug.start_with?('docs/syntax_and_semantics')
'Book: Language'
elsif slug.start_with?('docs/')
'Book'
else
hierarchy = at_css('.superclass-hierarchy')
if hierarchy && hierarchy.content.include?('Exception')
'Exceptions'
else
type = at_css('#types-list > ul > .current > a').content
type = 'Float' if type.start_with?('Float')
type = 'Int' if type.start_with?('Int')
type = 'UInt' if type.start_with?('UInt')
type = 'TCP' if type.start_with?('TCP')
type
end
end
end
def additional_entries
return [] unless slug.start_with?('api')
entries = []
css('.entry-detail[id$="class-method"]').each do |node|
name = node.at_css('.signature > strong').content.strip
name.prepend "#{self.name}." unless slug.end_with?('toplevel')
id = node['id'] = node['id'].remove(/<.+?>/)
entries << [name, id] unless entries.last && entries.last[0] == name
end
css('.entry-detail[id$="instance-method"]').each do |node|
name = node.at_css('.signature > strong').content.strip
name.prepend "#{self.name}#" unless slug.end_with?('toplevel')
id = node['id'] = node['id'].remove(/<.+?>/)
entries << [name, id] unless entries.last && entries.last[0] == name
end
css('.entry-detail[id$="macro"]').each do |node|
name = node.at_css('.signature > strong').content.strip
name.prepend "#{self.name} " unless slug.end_with?('toplevel')
id = node['id'] = node['id'].remove(/<.+?>/)
entries << [name, id] unless entries.last && entries.last[0] == name
end
entries
end
end
end
end

@ -1,22 +1,37 @@
module Docs
class Crystal < UrlScraper
self.name = "Crystal"
self.type = "crystal"
self.base_url = "https://github.com/crystal-lang/crystal-book"
self.initial_paths = %w(/blob/master/SUMMARY.md)
self.type = 'crystal'
self.release = '0.18.7'
self.base_url = 'https://crystal-lang.org/'
self.root_path = 'api/0.18.7/index.html'
self.initial_paths = %w(docs/index.html)
self.links = {
home: "https://crystal-lang.org/",
code: "https://github.com/crystal-lang/crystal"
home: 'https://crystal-lang.org/',
code: 'https://github.com/crystal-lang/crystal'
}
html_filters.push "crystal/clean_html", "crystal/entries"
html_filters.push 'crystal/entries', 'crystal/clean_html'
options[:container] = ".entry-content"
options[:only_patterns] = [/\/blob\/master\/.*\.md/]
options[:skip] = %w(/blob/master/README.md)
options[:only_patterns] = [/\Adocs\//, /\Aapi\/#{release}\//]
options[:attribution] = <<-HTML
<a href="http://creativecommons.org/publicdomain/zero/1.0/">CC0</a>
HTML
options[:replace_paths] = {
"api/#{release}/" => "api/#{release}/index.html",
'docs/' => 'docs/index.html'
}
options[:attribution] = ->(filter) {
if filter.slug.start_with?('docs')
<<-HTML
To the extent possible under law, the persons who contributed to this work
have waived<br>all copyright and related or neighboring rights to this work
by associating CC0 with it.
HTML
else
<<-HTML
&copy; 2012&ndash;2016 Manas Technology Solutions.<br>
Licensed under the Apache License, Version 2.0.
HTML
end
}
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 222 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 734 B

After

Width:  |  Height:  |  Size: 535 B

Loading…
Cancel
Save