diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 21c7ac5a..81451dce 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -133,23 +133,16 @@ tar xf ocaml-4.10-refman-html.tar.gz --transform 's/htmlman/ocaml/' ``` ## OpenJDK +Search 'Openjdk' in https://www.debian.org/distrib/packages, find the `openjdk-$VERSION-doc` package, +download it, extract it with `dpkg -x $PACKAGE ./` and move `./usr/share/doc/openjdk-16-jre-headless/api/` +to `path/to/devdocs/docs/openjdk~$VERSION` -https://packages.debian.org/sid/openjdk-11-doc - -```sh -mkdir docs/openjdk~11 -curl --remote-name http://ftp.debian.org/debian/pool/main/o/openjdk-11/openjdk-11-doc_11.0.9.1+1-1_all.deb -bsdtar --extract --to-stdout --file openjdk-11-doc_11.0.9.1+1-1_all.deb data.tar.xz | \ -bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~11/ ./usr/share/doc/openjdk-11-jre-headless/api/ -``` - -https://packages.debian.org/sid/openjdk-8-doc - +If you use or have access to a Debian-based GNU/Linux distribution you can run the following command: ```sh -mkdir docs/openjdk~8 -curl --remote-name http://ftp.debian.org/debian/pool/main/o/openjdk-8/openjdk-8-doc_8u272-b10-1_all.deb -bsdtar --extract --to-stdout --file openjdk-8-doc_8u272-b10-1_all.deb data.tar.xz | \ -bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~8/ ./usr/share/doc/openjdk-8-jre-headless/api/ +apt download openjdk-$VERSION-doc +dpkg -x $PACKAGE ./ +# previous command makes a directory called 'usr' in the current directory +mv ./usr/share/doc/openjdk-16-jre-headless/api/ path/to/devdocs/docs/openjdk~$VERSION ``` ## PHP diff --git a/lib/docs/filters/openjdk/clean_html.rb b/lib/docs/filters/openjdk/clean_html.rb index 44b4d9e1..f0b9e82f 100644 --- a/lib/docs/filters/openjdk/clean_html.rb +++ b/lib/docs/filters/openjdk/clean_html.rb @@ -1,3 +1,4 @@ +# coding: utf-8 # frozen_string_literal: true module Docs @@ -130,6 +131,13 @@ module Docs node.remove_attribute('class') unless node['class'] == 'inheritance' end + # fix ul section that contains summaries or tables + css('ul').each do |node| + node.css('section').each do |subnode| + node.add_previous_sibling(subnode) + end + end + doc end end diff --git a/lib/docs/filters/openjdk/clean_html_new.rb b/lib/docs/filters/openjdk/clean_html_new.rb new file mode 100644 index 00000000..0e16f18d --- /dev/null +++ b/lib/docs/filters/openjdk/clean_html_new.rb @@ -0,0 +1,49 @@ +module Docs + class Openjdk + class CleanHtmlNewFilter < Filter + def call + + if root_page? + at_css('h1').content = "OpenJDK #{release} Documentation" + end + + css('.header .sub-title').remove + + css('blockquote pre').each do |node| + node.parent.name = 'pre' + node.parent['class'] = 'highlight' + node.parent['data-language'] = 'java' + node.parent.content = node.content + node.remove + end + + # fix ul section that contains summaries or tables + css('ul').each do |node| + node.css('section').each do |subnode| + node.add_previous_sibling(subnode) + end + end + + # add syntax highlight to code blocks + css('pre > code').each do |node| + node.parent['class'] = 'lang-java' + node.parent['data-language'] = 'java' + end + + # add syntax highlight to each method + css('.member-signature').each do |node| + node.name = 'pre' + node['class'] = 'lang-java' + node['data-language'] = 'java' + + node.css('span').each do |subnode| + subnode.name = 'code' + end + + end + + doc + end + end + end +end diff --git a/lib/docs/filters/openjdk/entries_new.rb b/lib/docs/filters/openjdk/entries_new.rb new file mode 100644 index 00000000..8fcb2b27 --- /dev/null +++ b/lib/docs/filters/openjdk/entries_new.rb @@ -0,0 +1,45 @@ +module Docs + class Openjdk + class EntriesNewFilter < Docs::EntriesFilter + + def get_name + name = at_css('.header > .title').content.strip + name.remove! 'Package ' + name.remove! 'Class ' + name.remove! 'Interface ' + name.remove! 'Annotation Type ' + name.remove! 'Enum ' + name.remove! %r{<.*} + name + end + + def get_type + return 'Packages' if slug.end_with?('package-summary') + return 'Modules' if slug.end_with?('module-summary') + + if subtitle = at_css('.header > .sub-title:last-of-type') + type = subtitle.content.strip + else + type = at_css('.header > .title').content.strip.remove 'Package ' + type.remove!('Module ') + end + type = type.split('.')[0..2].join('.') + type + end + + def additional_entries + css('a[name$=".summary"]').each_with_object({}) do |summary, entries| + next if summary['name'].include?('nested') || summary['name'].include?('constructor') || + summary['name'].include?('field') || summary['name'].include?('constant') + summary.parent.css('.memberNameLink a').each do |node| + name = node.parent.parent.content.strip + name.sub! %r{\(.+?\)}m, '()' + id = node['href'].remove(%r{.*#}) + entries[name] ||= ["#{self.name}.#{name}", id] + end + end.values + end + + end + end +end diff --git a/lib/docs/scrapers/openjdk.rb b/lib/docs/scrapers/openjdk.rb index 3524973d..89cc377e 100644 --- a/lib/docs/scrapers/openjdk.rb +++ b/lib/docs/scrapers/openjdk.rb @@ -1,7 +1,6 @@ module Docs class Openjdk < FileScraper - # Downloaded from packages.debian.org/sid/openjdk-8-doc - # Extracting subdirectory /usr/share/doc/openjdk-8-jre-headless/api + self.name = 'OpenJDK' self.type = 'openjdk' self.root_path = 'overview-summary.html' @@ -11,7 +10,6 @@ module Docs } html_filters.insert_after 'internal_urls', 'openjdk/clean_urls' - html_filters.push 'openjdk/entries', 'openjdk/clean_html' options[:skip_patterns] = [ /compact[123]-/, @@ -19,7 +17,12 @@ module Docs /package-tree\.html/, /package-use\.html/, /class-use\//, - /doc-files\//] + /doc-files\//, + /\.svg/, + /\.png/ + ] + + options[:only_patterns] = [/\Ajava\./] options[:attribution] = <<-HTML © 1993, 2020, Oracle and/or its affiliates. All rights reserved.
@@ -29,16 +32,32 @@ module Docs Java and OpenJDK are trademarks or registered trademarks of Oracle and/or its affiliates. HTML + NEWFILTERS = ['openjdk/entries_new', 'openjdk/clean_html_new'] + + version '15' do + self.release = '15.0.1' + self.root_path = 'index.html' + + html_filters.push NEWFILTERS + + options[:container] = 'main' + end + + OLDFILTERS = ['openjdk/entries', 'openjdk/clean_html'] + version '11' do self.release = '11.0.9' self.root_path = 'index.html' self.base_url = 'https://docs.oracle.com/en/java/javase/11/docs/api/' - options[:only_patterns] = [/\Ajava\./] + + html_filters.push OLDFILTERS end version '8' do self.release = '8' + html_filters.push OLDFILTERS + options[:only_patterns] = [ /\Ajava\/beans\//, /\Ajava\/io\//, @@ -61,20 +80,27 @@ module Docs /\Ajavax\/script\//, /\Ajavax\/security\//, /\Ajavax\/sound\//, - /\Ajavax\/tools\//] + /\Ajavax\/tools\// + ] end version '8 GUI' do self.release = '8' + html_filters.push OLDFILTERS + options[:only_patterns] = [ /\Ajava\/awt\//, - /\Ajavax\/swing\//] + /\Ajavax\/swing\// + ] + end version '8 Web' do self.release = '8' + html_filters.push OLDFILTERS + options[:only_patterns] = [ /\Ajava\/applet\//, /\Ajava\/rmi\//,