Finish GNU Fortran scraper

pull/384/head
Thibaut Courouble 9 years ago
parent d366e14ea7
commit 6b37efda62

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 109 KiB

After

Width:  |  Height:  |  Size: 110 KiB

@ -1,13 +1,13 @@
[
[
"2016-04-10",
"New documentation: <a href=\"/browser_support_tables/\">Support tables (caniuse.com)</a>"
"New documentations: <a href=\"/browser_support_tables/\">Support tables (caniuse.com)</a> and <a href=\"/gnu_fortran/\">GNU Fortran</a>"
], [
"2016-03-27",
"New documentation: <a href=\"/typescript/\">TypeScript</a>"
], [
"2016-03-06",
"New documentation: <a href=\"/tensorflow/\">TensorFlow</a>, <a href=\"/haxe/\">Haxe</a> and <a href=\"/ansible/\">Ansible</a>"
"New documentations: <a href=\"/tensorflow/\">TensorFlow</a>, <a href=\"/haxe/\">Haxe</a> and <a href=\"/ansible/\">Ansible</a>"
], [
"2016-02-28",
"New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>"

@ -199,6 +199,11 @@ credits = [
'2005-2016 Linus Torvalds and others',
'GPLv2',
'https://raw.githubusercontent.com/git/git/master/COPYING'
], [
'GNU Fortran',
'Free Software Foundation',
'GFDL',
'https://gcc.gnu.org/onlinedocs/gcc-5.3.0/gfortran/GNU-Free-Documentation-License.html'
], [
'Go',
'Google, Inc.',

@ -134,3 +134,4 @@
._icon-ansible:before { background-position: -8rem -10rem; @extend %darkIconFix !optional; }
._icon-typescript:before { background-position: -9rem -10rem; }
._icon-browser_support_tables:before { background-position: 0rem -11rem; }
._icon-gnu_fortran:before { background-position: -1rem -11rem; }

@ -17,7 +17,7 @@
}
._cordova,
._fortran,
._gnu_fortran,
._grunt,
._haxe,
._influxdata,

@ -1,18 +0,0 @@
module Docs
class Fortran
class CleanHtmlFilter < Filter
def call
css('h2', 'h3', 'h4').each do |node|
node.name = 'h1'
end
# Move page anchor to page title
at_css('h1')['id'] = at_css('.node > a')['name']
css('.node', 'br').remove
doc
end
end
end
end

@ -1,34 +0,0 @@
module Docs
class Fortran
class EntriesFilter < Docs::EntriesFilter
REPLACE_TYPES = {
1 => 'Introduction',
2 => 'GNU Fortran Command Options',
3 => 'Runtime Environment Variables',
4 => 'Fortran 2003 and 2008 Status',
5 => 'Compiler Characteristics',
6 => 'Extensions',
7 => 'Mixed Language Programming',
8 => 'Coarray Programming',
9 => 'Intrinsic Procedures',
10 => 'Intrinsic Modules' }
def chapter_number
at_css('h1').content.to_i
end
def include_default_entry?
REPLACE_TYPES[chapter_number] and not at_css('ul.menu')
end
def get_name
at_css('h1').content.split(' ').drop(1).join(' ').split('—').first
end
def get_type
REPLACE_TYPES[chapter_number]
end
end
end
end

@ -0,0 +1,39 @@
module Docs
class GnuFortran
class CleanHtmlFilter < Filter
def call
heading = at_css('h1, h2, h3, h4, h5')
heading_level = heading.name[/h(\d)/, 1].to_i
css('h2, h3, h4, h5, h6').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i - (heading_level - 1) }
end
css('.node > a[name]').each do |node|
node.parent.next_element['id'] = node['name']
end
css('a[name]').each do |node|
node['id'] = node['name']
end
css('samp > span:first-child:last-child').each do |node|
node.parent.name = 'code'
node.before(node.children).remove
end
css('pre').each do |node|
node.inner_html = node.inner_html.strip_heredoc.strip
end
css('dt > em', 'acronym', 'dfn').each do |node|
node.before(node.children).remove
end
css('.node', 'br').remove
doc
end
end
end
end

@ -0,0 +1,41 @@
module Docs
class GnuFortran
class EntriesFilter < Docs::EntriesFilter
TYPE_BY_CHAPTER = { }
def initialize(*)
super
detect_chapters if root_page?
end
def get_name
at_css('h1').content.split(' ').drop(1).join(' ').split('—').first
end
def get_type
"#{chapter_number}. #{TYPE_BY_CHAPTER[chapter_number]}"
end
def include_default_entry?
!at_css('ul.menu')
end
private
def detect_chapters
css('.contents > ul > li > a').each do |node|
index = node.content.strip.to_i
next unless index > 0
name = node.content.split(' ').drop(1).join(' ')
name.remove! 'GNU Fortran '
name.remove! %r{:.*}
TYPE_BY_CHAPTER[index] = name # YOLO
end
end
def chapter_number
at_css('h1').content.to_i
end
end
end
end

@ -1,32 +0,0 @@
module Docs
class Fortran < FileScraper
self.name = 'GNU Fortran'
self.slug = 'fortran'
self.type = 'fortran'
self.release = '5.3.0'
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/"
self.dir = ''
self.root_path = 'index.html'
self.links = {
home: 'https://gcc.gnu.org/fortran/'
}
html_filters.push 'fortran/clean_html', 'fortran/entries'
options[:skip_patterns] = [
/Funding/,
/Projects/,
/Copying/,
/License/,
/Proposed/,
/Contribut/,
/Index/
]
options[:attribution] = <<-HTML
&copy; Free Software Foundation<br>
Licensed under the GNU Free Documentation License version 1.3.
HTML
end
end

@ -0,0 +1,40 @@
module Docs
class GnuFortran < FileScraper
self.name = 'GNU Fortran'
self.slug = 'gnu_fortran'
self.type = 'gnu_fortran'
self.dir = '/Users/Thibaut/DevDocs/Docs/gfortran'
self.root_path = 'index.html'
self.links = {
home: 'https://gcc.gnu.org/fortran/'
}
html_filters.push 'gnu_fortran/clean_html', 'gnu_fortran/entries'
options[:skip_patterns] = [
/Funding/,
/Projects/,
/Copying/,
/License/,
/Proposed/,
/Contribut/,
/Index/
]
options[:attribution] = <<-HTML
&copy; Free Software Foundation<br>
Licensed under the GNU Free Documentation License, Version 1.3.
HTML
version '5' do
self.release = '5.3.0'
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/"
end
version '4' do
self.release = '4.9.3'
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/"
end
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 372 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 656 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Loading…
Cancel
Save