diff --git a/lib/docs/filters/openjdk/clean_html.rb b/lib/docs/filters/openjdk/clean_html.rb index f0b9e82f..4cd6afe0 100644 --- a/lib/docs/filters/openjdk/clean_html.rb +++ b/lib/docs/filters/openjdk/clean_html.rb @@ -5,14 +5,14 @@ module Docs class Openjdk class CleanHtmlFilter < Filter def call - css('.topNav', '.subNav', '.bottomNav', '.legalCopy', 'noscript', '.subTitle').remove + css('.topNav', '.subNav', '.bottomNav', '.legalCopy', 'noscript', '.subTitle', 'hr').remove # Preserve internal fragment links # Transform text # into text - css('a[name]').each do |node| + css('a[name]','a[id]').each do |node| if node.children.all?(&:blank?) - node.next_element['id'] = node['name'] if node.next_element + node.next_element['id'] = (node['id'] || node['name'])if node.next_element node.remove end end @@ -23,11 +23,9 @@ module Docs node.remove end - # Replace summary tables with their detail content - css('h3[id$=".summary"]').each do |node| - id = node['id'].sub('summary', 'detail') - detail = at_css("h3[id='#{id}']") || at_css("h3[id='#{id.remove('optional.').remove('required.')}']") - node.parent.children = detail.parent.children if detail + # remove captions in tables + css('table caption').each do |node| + node.remove end css('h3[id$=".summary"]', 'h3[id$=".detail"]').each do |node| diff --git a/lib/docs/filters/openjdk/clean_html_new.rb b/lib/docs/filters/openjdk/clean_html_new.rb index 0e16f18d..1b6d4816 100644 --- a/lib/docs/filters/openjdk/clean_html_new.rb +++ b/lib/docs/filters/openjdk/clean_html_new.rb @@ -7,15 +7,7 @@ module Docs at_css('h1').content = "OpenJDK #{release} Documentation" end - css('.header .sub-title').remove - - css('blockquote pre').each do |node| - node.parent.name = 'pre' - node.parent['class'] = 'highlight' - node.parent['data-language'] = 'java' - node.parent.content = node.content - node.remove - end + css('.header .sub-title', 'hr', '.table-tabs').remove # fix ul section that contains summaries or tables css('ul').each do |node| @@ -24,6 +16,13 @@ module Docs end end + css('ul.summary-list').each do |node| + node.css('li').each do |subnode| + subnode.name = 'div' + end + node.name = 'div' + end + # add syntax highlight to code blocks css('pre > code').each do |node| node.parent['class'] = 'lang-java' diff --git a/lib/docs/filters/openjdk/entries.rb b/lib/docs/filters/openjdk/entries.rb index 5880ed9e..753c411e 100644 --- a/lib/docs/filters/openjdk/entries.rb +++ b/lib/docs/filters/openjdk/entries.rb @@ -29,16 +29,31 @@ module Docs def additional_entries # Only keep the first found entry with a unique name, # i.e. overloaded methods are skipped in index - css('a[name$=".summary"]').each_with_object({}) do |summary, entries| - next if summary['name'].include?('nested') || summary['name'].include?('constructor') || - summary['name'].include?('field') || summary['name'].include?('constant') - summary.parent.css('.memberNameLink a').each do |node| - name = node.parent.parent.content.strip - name.sub! %r{\(.+?\)}m, '()' - id = node['href'].remove(%r{.*#}) - entries[name] ||= ["#{self.name}.#{name}", id] - end - end.values + if version == '8' || version == '8 Gui' || version == '8 Web' + css('a[name$=".summary"]').each_with_object({}) do |summary, entries| + next if summary['name'].include?('nested') || summary['name'].include?('constructor') || + summary['name'].include?('field') || summary['name'].include?('constant') + summary.parent.css('.memberNameLink a').each do |node| + name = node.parent.parent.content.strip + name.sub! %r{\(.+?\)}m, '()' + id = node['href'].remove(%r{.*#}) + entries[name] ||= ["#{self.name}.#{name}", id] + end + end.values + + else + css('a[id$=".summary"]').each_with_object({}) do |summary, entries| + next if summary['id'].include?('nested') || summary['id'].include?('constructor') || + summary['id'].include?('field') || summary['id'].include?('constant') + summary.parent.css('.memberNameLink a').each do |node| + name = node.parent.parent.content.strip + name.sub! %r{\(.+?\)}m, '()' + id = node['href'].remove(%r{.*#}) + entries[name] ||= ["#{self.name}.#{name}", id] + end + end.values + end + end end end diff --git a/lib/docs/filters/openjdk/entries_new.rb b/lib/docs/filters/openjdk/entries_new.rb index 8fcb2b27..1a5f6cb2 100644 --- a/lib/docs/filters/openjdk/entries_new.rb +++ b/lib/docs/filters/openjdk/entries_new.rb @@ -28,16 +28,14 @@ module Docs end def additional_entries - css('a[name$=".summary"]').each_with_object({}) do |summary, entries| - next if summary['name'].include?('nested') || summary['name'].include?('constructor') || - summary['name'].include?('field') || summary['name'].include?('constant') - summary.parent.css('.memberNameLink a').each do |node| - name = node.parent.parent.content.strip - name.sub! %r{\(.+?\)}m, '()' - id = node['href'].remove(%r{.*#}) - entries[name] ||= ["#{self.name}.#{name}", id] - end - end.values + entries = [] + + css('section[id]').each do |node| + next if !(node['id'].match?(/\(/)) + entries << [self.name+ '.' +node.at_css('h3').content + '()', node['id']] + end + + entries end end diff --git a/lib/docs/scrapers/openjdk.rb b/lib/docs/scrapers/openjdk.rb index 89cc377e..7b2a1d26 100644 --- a/lib/docs/scrapers/openjdk.rb +++ b/lib/docs/scrapers/openjdk.rb @@ -22,7 +22,10 @@ module Docs /\.png/ ] - options[:only_patterns] = [/\Ajava\./] + options[:only_patterns] = [ + /\Ajava\./, + /\Ajdk\./ + ] options[:attribution] = <<-HTML © 1993, 2020, Oracle and/or its affiliates. All rights reserved.
@@ -37,6 +40,7 @@ module Docs version '15' do self.release = '15.0.1' self.root_path = 'index.html' + self.base_url = 'https://docs.oracle.com/en/java/javase/15/docs/api/' html_filters.push NEWFILTERS