Finish Vulkan scraper

pull/669/head
Thibaut Courouble 7 years ago
parent 2f62bca5e4
commit 0df3a77558

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 32 KiB

@ -1,7 +1,7 @@
[ [
[ [
"2017-09-03", "2017-09-03",
"New documentation: <a href=\"/nim/\">Nim</a>" "New documentations: <a href=\"/nim/\">Nim</a> and <a href=\"/vulkan/\">Vulkan</a>"
], [ ], [
"2017-07-23", "2017-07-23",
"New documentation: <a href=\"/godot/\">Godot</a>" "New documentation: <a href=\"/godot/\">Godot</a>"

@ -633,6 +633,11 @@ credits = [
'2013-2017 Evan You, Vue.js contributors', '2013-2017 Evan You, Vue.js contributors',
'MIT', 'MIT',
'https://raw.githubusercontent.com/vuejs/vue/master/LICENSE' 'https://raw.githubusercontent.com/vuejs/vue/master/LICENSE'
], [
'Vulkan',
'2014-2017 Khronos Group Inc.<br>Vulkan and the Vulkan logo are registered trademarks of the Khronos Group Inc.',
'CC BY',
'https://creativecommons.org/licenses/by/4.0/'
], [ ], [
'webpack', 'webpack',
'JS Foundation and other contributors', 'JS Foundation and other contributors',

@ -174,3 +174,4 @@
._icon-falcon:before { background-position: -3rem -2rem; @extend %doc-icon-2; } ._icon-falcon:before { background-position: -3rem -2rem; @extend %doc-icon-2; }
._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; } ._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; }
._icon-nim:before { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; } ._icon-nim:before { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }
._icon-vulkan:before { background-position: -6rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }

@ -43,6 +43,7 @@
._requirejs, ._requirejs,
._typescript, ._typescript,
._vagrant, ._vagrant,
._vulkan,
._yarn { ._yarn {
@extend %simple; @extend %simple;
} }

@ -2,11 +2,51 @@ module Docs
class Vulkan class Vulkan
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
# Copyright is already added via attribution option at_css('#_copyright').parent.remove
css('#_copyright').map do |node|
node.parent.remove css('.sect1', '.sectionbody', '.sect2', '.sect3', 'div.paragraph', 'li > p:only-child', 'dd > p:only-child', 'span', '.ulist').each do |node|
node.before(node.children).remove
end
css('a[id]:empty').each do |node|
node.parent['id'] ||= node['id']
node.remove
end
css('.listingblock').each do |node|
node['data-language'] = node.at_css('[data-lang]')['data-lang']
node.content = node.content.strip
node.name = 'pre'
node.remove_attribute('class')
end
css('.sidebarblock').each do |node|
node.name = 'blockquote'
node.at_css('.title').name = 'h5'
node.css('div').each { |n| n.before(n.children).remove }
node.remove_attribute('class')
end end
css('.admonitionblock').each do |node|
node.name = 'blockquote'
node.children = node.at_css('.content').children
node.at_css('.title').name = 'h5'
node.remove_attribute('class')
end
css('table').each do |node|
node.before %(<div class="_table"></div>)
node.previous_element << node
end
css('strong', 'dt', 'a').remove_attr('class')
css('h4 + h4').each do |node|
node.previous_element.remove
end
css('p:contains("This page is extracted from the Vulkan Specification. Fixes and changes should be made to the Specification, not directly.")').remove
doc doc
end end
end end

@ -1,39 +1,12 @@
module Docs module Docs
class Vulkan class Vulkan
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def get_name
name = at_css('h1').content.strip
name
end
def get_type
# As only documentation is single-paged, hardcode type
initial_page? ? 'Vulkan' : 'Specifications'
end
def include_default_entry?
# additional_entries is responsible to extract relevant entries
false
end
def additional_entries def additional_entries
if initial_page? css('.sect1').each_with_object [] do |node, entries|
# We pack each subsections into their corresponding category for apispec.html type = node.at_css('h2').content
subsections = css('.sect2').map do |node|
# Parse '.sect1' parent, to know what is the entry's type node.css('h3').each do |n|
parent_node = node.parent.parent entries << [n.content, n['id'], type]
# Type is the parent's h2 header
type = parent_node.at_css('h2').content.strip
# Entry node is the one under h3
header_node = node.at_css('h3')
[header_node.content, header_node['id'], type]
end
else
# We create a new category for vkspec.html page
main_sections = css('.sect1').map do |node|
# Entry node is the one under h2
header_node = node.at_css('h2')
[header_node.content, header_node['id'], 'Specifications']
end end
end end
end end

@ -1,34 +1,24 @@
module Docs module Docs
# class Vulkan < FileScraper
class Vulkan < UrlScraper class Vulkan < UrlScraper
self.name = 'Vulkan' self.name = 'Vulkan'
self.slug = 'vk'
self.type = 'vulkan' self.type = 'vulkan'
self.release = '1.0.59'
self.base_url = 'https://www.khronos.org/registry/vulkan/specs/1.0/'
self.root_path = 'apispec.html'
self.links = { self.links = {
home: 'https://www.khronos.org/registry/vulkan/specs/', home: 'https://www.khronos.org/vulkan/'
code: 'https://github.com/KhronosGroup/Vulkan-Docs'
} }
self.root_path = 'apispec.html' html_filters.push 'vulkan/entries', 'vulkan/clean_html', 'title'
self.release = '1.0.56'
# self.dir = '/mnt/d/theblackunknown/Documents/GitHub/Vulkan-Docs/out/1.0/'
self.base_url = 'https://www.khronos.org/registry/vulkan/specs/1.0/'
html_filters.push 'vulkan/entries', 'vulkan/clean_html'
# in apispec.html, skip #header and #footer options[:skip_links] = true
options[:container] = '#content' options[:container] = '#content'
options[:root_title] = 'Vulkan API Reference'
# If we only want API, we should skip this one
options[:skip] = %w(
html/vkspec.html
)
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
Copyright &copy; 2014-2017 Khronos Group. <br> &copy; 2014&ndash;2017 Khronos Group Inc.<br>
This work is licensed under a Creative Commons Attribution 4.0 International License Licensed under the Creative Commons Attribution 4.0 International License.<br>
Vulkan and the Vulkan logo are registered trademarks of the Khronos Group Inc.
HTML HTML
end end
end end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 430 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 926 B

Loading…
Cancel
Save