scala: finish scraper and filters

pull/705/head
Jasper van Merle 6 years ago
parent 74323fda3e
commit 6614375671

@ -2,97 +2,107 @@ module Docs
class Scala class Scala
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
@doc = at_css('#content')
always always
add_title
if slug == 'index' doc
root
else
other
end
end end
def always def always
# remove deprecated sections # Remove deprecated sections
css('.members').each do |members| css('.members').each do |members|
header = members.at_css('h3') header = members.at_css('h3')
members.remove if header.text.downcase.include? 'deprecate' members.remove if header.text.downcase.include? 'deprecate'
end end
# Some of this is just for 2.12
# These are things that provide interactive features, which are not supported yet. css('#mbrsel, #footer').remove
css('#subpackage-spacer, #search, #mbrsel, .diagram-btn').remove
css('#footer').remove css('.diagram-container').remove
css('.toggleContainer').remove css('.toggleContainer > .toggle').each do |node|
title = node.at_css('span')
next if title.nil?
content = node.at_css('.hiddenContent')
next if content.nil?
title.name = 'dt'
content.remove_attribute('class')
content.remove_attribute('style')
content.name = 'dd'
attributes = at_css('.attributes')
unless attributes.nil?
title.parent = attributes
content.parent = attributes
end
end
signature = at_css('#signature') signature = at_css('#signature')
signature.replace %Q| signature.replace "<h2 id=\"signature\">#{signature.inner_html}</h2>"
<h2 id="signature">#{signature.inner_html}</h2>
|
css('div.members > h3').each do |node| css('div.members > h3').each do |node|
change_tag! 'h2', node node.name = 'h2'
end end
css('div.members > ol').each do |list| css('div.members > ol').each do |list|
list.css('li').each do |li| list.css('li').each do |li|
h3 = doc.document.create_element 'h3' h3 = doc.document.create_element 'h3'
h3['id'] = li['name'].rpartition('#').last unless li['name'].nil?
li.prepend_child h3 li.prepend_child h3
li.css('.shortcomment').remove li.css('.shortcomment').remove
modifier = li.at_css('.modifier_kind') modifier = li.at_css('.modifier_kind')
modifier.parent = h3 if modifier modifier.parent = h3 unless modifier.nil?
kind = li.at_css('.modifier_kind .kind')
kind.content = kind.content + ' ' unless kind.nil?
symbol = li.at_css('.symbol') symbol = li.at_css('.symbol')
symbol.parent = h3 if symbol symbol.parent = h3 unless symbol.nil?
li.swap li.children li.swap li.children
end end
list.swap list.children list.swap list.children
end end
pres = css('.fullcomment pre, .fullcommenttop pre') css('.fullcomment pre, .fullcommenttop pre').each do |pre|
pres.each do |pre|
pre['data-language'] = 'scala' pre['data-language'] = 'scala'
pre.content = pre.content
end end
pres.add_class 'language-scala'
doc
end
def root
css('#filter').remove # these are filters to search through the types and packages
css('#library').remove # these are icons at the top
doc
end
def other # Sections of the documentation which do not seem useful
# these are sections of the documentation which do not seem useful
%w(#inheritedMembers #groupedMembers .permalink .hiddenContent .material-icons).each do |selector| %w(#inheritedMembers #groupedMembers .permalink .hiddenContent .material-icons).each do |selector|
css(selector).remove css(selector).remove
end end
# This is the kind of thing we have, class, object, trait # Things that are not shown on the site, like deprecated members
kind = at_css('.modifier_kind .kind').content css('li[visbl=prt]').remove
# this image replacement doesn't do anything on 2.12 docs end
img = at_css('img')
img.replace %Q|<span class="img_kind">#{kind}</span>| unless img.nil? def add_title
class_to_add = kind == 'object' ? 'value': 'type' css('.permalink').remove
# for 2.10, 2.11, the kind class is associated to the body. we have to definition = at_css('#definition')
# add it somewhere, so we do that with the #definition. return if definition.nil?
definition = css('#definition')
definition.css('.big_circle').remove
definition.add_class class_to_add
# this is something that is not shown on the site, such as deprecated members type_full_name = {a: 'Annotation', c: 'Class', t: 'Trait', o: 'Object', p: 'Package'}
css('li[visbl=prt]').remove type = type_full_name[definition.at_css('.big-circle').text.to_sym]
name = CGI.escapeHTML definition.at_css('h1').text
doc package = definition.at_css('#owner').text rescue ''
end package = package + '.' unless name.empty? || package.empty?
private other = definition.at_css('.morelinks').dup
other_content = other ? "<h3>#{other.to_html}</h3>" : ''
def change_tag!(new_tag, node) title_content = root_page? ? 'Package root' : "#{type} #{package}#{name}".strip
node.replace %Q|<#{new_tag}>#{node.inner_html}</#{new_tag}>| title = "<h1>#{title_content}</h1>"
definition.replace title + other_content
end end
end end
end end

@ -1,32 +0,0 @@
module Docs
class Scala
class CleanHtml210Filter < Filter
def call
definition = at_css('#definition')
begin
type = definition.at_css('.img_kind').text
name = definition.at_css('h1').text.strip
package = definition.at_css('#owner').text rescue ''
package = package + '.' unless name.empty? || name.start_with?('root')
other = definition.at_css('.morelinks').dup
other_content = other ? "<h3>#{other.to_html}</h3>" : ''
definition.replace %Q|
<h1><small>#{type} #{package}</small>#{name}</h1>
#{other_content}
|
end if definition
doc
end
private
def change_tag!(new_tag, node)
node.replace %Q|<#{new_tag}>#{node.inner_html}</#{new_tag}>|
end
end
end
end

@ -1,36 +0,0 @@
module Docs
class Scala
class CleanHtml212Filter < Filter
def call
css('.permalink').remove
definition = at_css('#definition')
begin
type_full_name = {c: 'class', t: 'trait', o: 'object', 'p': 'package'}
type = type_full_name[definition.at_css('.big-circle').text.to_sym]
name = definition.at_css('h1').text
package = definition.at_css('#owner').text rescue ''
package = package + '.' unless name.empty? || package.empty?
other = definition.at_css('.morelinks').dup
other_content = other ? "<h3>#{other.to_html}</h3>" : ''
definition.replace %Q|
<h1><small>#{type} #{package}</small>#{name}</h1>
#{other_content}
|
end if definition
doc
end
private
def change_tag!(new_tag, node)
node.replace %Q|<#{new_tag}>#{node.inner_html}</#{new_tag}>|
end
end
end
end

@ -1,14 +1,30 @@
module Docs module Docs
class Scala class Scala
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
REPLACEMENTS = {
'$eq' => '=',
'$colon' => ':',
'$less' => '<',
}
def get_name def get_name
# this first condition is mainly for scala 212 docs, which
# have their package listing as index.html
if is_package? if is_package?
symbol = at_css('#definition h1') symbol = at_css('#definition h1')
symbol ? symbol.text.gsub(/\W+/, '') : "package" symbol ? symbol.text.gsub(/\W+/, '') : "package"
else else
slug.split('/').last name = slug.split('/').last
# Some objects have inner objects, show ParentObject$.ChildObject$ instead of ParentObject$$ChildObject$
name = name.gsub('$$', '$.')
# If a dollar sign is used as separator between two characters, replace it with a dot
name = name.gsub(/([^$.])\$([^$.])/, '\1.\2')
REPLACEMENTS.each do |key, value|
name = name.gsub(key, value)
end
name
end end
end end
@ -26,6 +42,31 @@ module Docs
true true
end end
def additional_entries
entries = []
full_name = "#{type}.#{name}".remove('$')
css(".members li[name^=\"#{full_name}\"]").each do |node|
# Ignore packages
kind = node.at_css('.modifier_kind > .kind')
next if !kind.nil? && kind.content == 'package'
# Ignore deprecated members
next unless node.at_css('.symbol > .name.deprecated').nil?
id = node['name'].rpartition('#').last
member_name = node.at_css('.name')
# Ignore members only existing of hashtags, we can't link to that
next if member_name.nil? || member_name.content.strip.remove('#').blank?
member = "#{name}.#{member_name.content}()"
entries << [member, id]
end
entries
end
private private
# For the package name, we use the slug rather than parsing the package # For the package name, we use the slug rather than parsing the package
@ -40,7 +81,6 @@ module Docs
end end
def parent_package def parent_package
name = package_name
parent = package_drop_last(package_name.split('.')) parent = package_drop_last(package_name.split('.'))
parent.empty? ? '_root_' : parent parent.empty? ? '_root_' : parent
end end

@ -1,80 +1,60 @@
module Docs module Docs
class Scala < FileScraper class Scala < FileScraper
include FixInternalUrlsBehavior self.name = 'Scala'
self.name = 'scala'
self.type = 'scala' self.type = 'scala'
self.links = { self.links = {
home: 'http://www.scala-lang.org/', home: 'http://www.scala-lang.org/',
code: 'https://github.com/scala/scala' code: 'https://github.com/scala/scala'
} }
version '2.12 Library' do options[:container] = '#content-container'
self.release = '2.12.3' options[:attribution] = <<-HTML
self.dir = '/Users/Thibaut/DevDocs/Docs/Scala212/api/scala-library' # https://downloads.lightbend.com/scala/2.12.3/scala-docs-2.12.3.zip &copy; 2002-2019 EPFL, with contributions from Lightbend.
self.base_url = 'http://www.scala-lang.org/api/2.12.3/' HTML
# https://downloads.lightbend.com/scala/2.13.0/scala-docs-2.13.0.zip
# Extract api/scala-library into docs/scala~2.13_library
version '2.13 Library' do
self.release = '2.13.0'
self.base_url = 'https://www.scala-lang.org/api/2.13.0/'
self.root_path = 'index.html' self.root_path = 'index.html'
options[:attribution] = <<-HTML
Scala programming documentation. Copyright (c) 2003-2017 <a html_filters.push 'scala/entries', 'scala/clean_html'
href="http://www.epfl.ch" target="_blank">EPFL</a>, with contributions from <a
href="http://www.lightbend.com" target="_blank">Lightbend</a>.
HTML
html_filters.push 'scala/entries', 'scala/clean_html', 'scala/clean_html_212'
end end
version '2.12 Reflection' do # https://downloads.lightbend.com/scala/2.13.0/scala-docs-2.13.0.zip
self.release = '2.12.3' # Extract api/scala-reflect into docs/scala~2.13_reflection
self.dir = '/Users/Thibaut/DevDocs/Docs/Scala212/api/scala-reflect' # https://downloads.lightbend.com/scala/2.12.3/scala-docs-2.12.3.zip version '2.13 Reflection' do
self.base_url = 'http://www.scala-lang.org/api/2.12.3/scala-reflect/' self.release = '2.13.0'
self.base_url = 'https://www.scala-lang.org/api/2.13.0/scala-reflect/'
self.root_path = 'index.html' self.root_path = 'index.html'
options[:attribution] = <<-HTML
Scala programming documentation. Copyright (c) 2003-2017 <a html_filters.push 'scala/entries', 'scala/clean_html'
href="http://www.epfl.ch" target="_blank">EPFL</a>, with contributions from <a
href="http://www.lightbend.com" target="_blank">Lightbend</a>.
HTML
html_filters.push 'scala/entries', 'scala/clean_html', 'scala/clean_html_212'
end end
version '2.11 Library' do # https://downloads.lightbend.com/scala/2.12.6/scala-docs-2.12.6.zip
self.release = '2.11.8' # Extract api/scala-library into docs/scala~2.12_library
self.dir = '/Users/Thibaut/DevDocs/Docs/Scala211/api/scala-library' # https://downloads.lightbend.com/scala/2.11.8/scala-docs-2.11.8.zip version '2.12 Library' do
self.base_url = 'http://www.scala-lang.org/api/2.11.8/' self.release = '2.12.6'
self.root_path = 'package.html' self.base_url = 'https://www.scala-lang.org/api/2.12.6/'
options[:skip_patterns] = [/^index.html/, /index\/index-/] self.root_path = 'index.html'
options[:attribution] = <<-HTML
Scala programming documentation. Copyright (c) 2003-2016 <a html_filters.push 'scala/entries', 'scala/clean_html'
href="http://www.epfl.ch" target="_blank">EPFL</a>, with contributions from <a
href="http://www.lightbend.com" target="_blank">Lightbend</a>.
HTML
html_filters.push 'scala/entries', 'scala/clean_html', 'scala/clean_html_210'
end end
version '2.11 Reflection' do # https://downloads.lightbend.com/scala/2.12.6/scala-docs-2.12.6.zip
self.release = '2.11.8' # Extract api/scala-reflect into docs/scala~2.12_reflection
self.dir = '/Users/Thibaut/DevDocs/Docs/Scala211/api/scala-reflect' # https://downloads.lightbend.com/scala/2.11.8/scala-docs-2.11.8.zip version '2.12 Reflection' do
self.base_url = 'http://www.scala-lang.org/api/2.11.8/scala-reflect/' self.release = '2.12.6'
self.root_path = 'package.html' self.base_url = 'https://www.scala-lang.org/api/2.12.6/scala-reflect/'
options[:skip_patterns] = [/^index.html/, /index\/index-/] self.root_path = 'index.html'
options[:attribution] = <<-HTML
Scala programming documentation. Copyright (c) 2003-2016 <a html_filters.push 'scala/entries', 'scala/clean_html'
href="http://www.epfl.ch" target="_blank">EPFL</a>, with contributions from <a
href="http://www.lightbend.com" target="_blank">Lightbend</a>.
HTML
html_filters.push 'scala/entries', 'scala/clean_html', 'scala/clean_html_210'
end end
version '2.10' do def get_latest_version(opts)
self.release = '2.10.6' doc = fetch_doc('https://www.scala-lang.org/api/current/', opts)
self.dir = '/Users/Thibaut/DevDocs/Docs/Scala210' # https://downloads.lightbend.com/scala/2.10.6/scala-docs-2.10.6.zip doc.at_css('#doc-version').content
self.base_url = 'http://www.scala-lang.org/api/2.10.6/'
self.root_path = 'package.html'
options[:skip_patterns] = [/^index.html/, /index\/index-/]
options[:attribution] = <<-HTML
Scala programming documentation. Copyright (c) 2003-2013 <a
href="http://www.epfl.ch" target="_blank">EPFL</a>, with contributions from <a
href="http://typesafe.com" target="_blank">Typesafe</a>.
HTML
html_filters.push 'scala/entries', 'scala/clean_html', 'scala/clean_html_210'
end end
end end
end end

Loading…
Cancel
Save