From 00c643a2b727eb538a41dafc9338554962e89372 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Sun, 31 Jul 2022 08:44:31 +0200 Subject: [PATCH] Update OCaml documentation (newline in code snippets) Fixes #1783. --- docs/file-scrapers.md | 4 ++-- lib/docs/filters/ocaml/clean_html.rb | 15 ++++++++++----- lib/docs/scrapers/ocaml.rb | 12 +++++++++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 56025456..80b5032b 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -151,11 +151,11 @@ bsdtar --extract --file=- --directory=docs/numpy~$VERSION/ ## OCaml Download from https://www.ocaml.org/docs/ the HTML reference: -https://ocaml.org/releases/4.11/ocaml-4.11-refman-html.tar.gz +https://v2.ocaml.org/releases/4.14/ocaml-4.14-refman-html.tar.gz and extract it as `/path/to/devdocs/docs/ocaml`: ```sh -curl https://ocaml.org/releases/$VERSION/ocaml-$VERSION-refman-html.tar.gz | \ +curl https://v2.ocaml.org/releases/$VERSION/ocaml-$VERSION-refman-html.tar.gz | \ tar xz --transform 's/htmlman/ocaml/' --directory docs/ ``` diff --git a/lib/docs/filters/ocaml/clean_html.rb b/lib/docs/filters/ocaml/clean_html.rb index a68284a5..70bac845 100644 --- a/lib/docs/filters/ocaml/clean_html.rb +++ b/lib/docs/filters/ocaml/clean_html.rb @@ -3,17 +3,19 @@ module Docs class CleanHtmlFilter < Filter def call - css('pre, .caml-example').each do |node| + css('pre').each do |node| span = node.at_css('span[id]') node['id'] = span['id'] if span node['data-type'] = "#{span.content} [#{at_css('h1').content}]" if span node['data-language'] = 'ocaml' - node.name = 'pre' node.content = node.content end - css('.caml-input').each do |node| - node.content = '# ' + node.content.strip + css('.caml-input ~ .caml-output').each do |node| + node.previous_element << "\n\n" + node.previous_element << node.content + node.previous_element.remove_class('caml-input') + node.remove end css('.maintitle *[style]').each do |node| @@ -26,7 +28,10 @@ module Docs table.first.before(node).remove if table.present? end - css('.navbar').remove + css('.navbar', '#sidebar-button', 'hr').remove + css('img[alt="Previous"]', 'img[alt="Up"]', 'img[alt="Next"]').each do |node| + node.parent.remove + end doc end diff --git a/lib/docs/scrapers/ocaml.rb b/lib/docs/scrapers/ocaml.rb index 72de98f8..0ab64a90 100644 --- a/lib/docs/scrapers/ocaml.rb +++ b/lib/docs/scrapers/ocaml.rb @@ -4,7 +4,7 @@ module Docs self.type = 'ocaml' self.root_path = 'index.html' self.release = '4.14' - self.base_url = "https://www.ocaml.org/releases/#{self.release}/htmlman/" + self.base_url = "https://v2.ocaml.org/releases/#{self.release}/htmlman/" self.links = { home: 'https://ocaml.org/', code: 'https://github.com/ocaml/ocaml' @@ -27,8 +27,14 @@ module Docs HTML def get_latest_version(opts) - doc = fetch_doc('https://www.ocaml.org/releases/', opts) - doc.css('#main-contents li > a').first.content + get_latest_github_release('ocaml', 'ocaml', opts) + end + + private + + def parse(response) # Hook here because Nokogori removes whitespace from code fragments + response.body.gsub! %r{]*>([\W\w]+?)}, '
\2
' + super end end