From eaec6ec43ff3136df6ba83a7abcb55ad28da492f Mon Sep 17 00:00:00 2001 From: Scott Goley Date: Fri, 8 Nov 2024 23:05:14 -0500 Subject: [PATCH 01/25] duckdb docs (v1.1) - scrape v1 --- lib/docs/filters/duckdb/clean_html.rb | 41 ++++++++++++++++ lib/docs/filters/duckdb/entries.rb | 45 +++++++++++++++++ lib/docs/scrapers/duckdb.rb | 69 +++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 lib/docs/filters/duckdb/clean_html.rb create mode 100644 lib/docs/filters/duckdb/entries.rb create mode 100644 lib/docs/scrapers/duckdb.rb diff --git a/lib/docs/filters/duckdb/clean_html.rb b/lib/docs/filters/duckdb/clean_html.rb new file mode 100644 index 00000000..ae518c7b --- /dev/null +++ b/lib/docs/filters/duckdb/clean_html.rb @@ -0,0 +1,41 @@ +module Docs + class Duckdb + class CleanHtmlFilter < Filter + def call + # First extract the main content + @doc = at_css('main') + return doc if @doc.nil? + + # Remove navigation and header elements + css('.headerline', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove + + # Clean up code blocks + css('pre').each do |node| + # Detect language from class or parent div + if node['class']&.include?('sql') || node.at_css('code.sql') + node['data-language'] = 'sql' + elsif node['class']&.include?('language-sql') + node['data-language'] = 'sql' + end + node.content = node.content.strip + end + + # Remove unnecessary attributes but keep essential ones + css('div, span, p').each do |node| + node.remove_attribute('style') + node.remove_attribute('class') unless node['class'] =~ /highlight/ + end + + # Remove empty elements + css('div, span').each do |node| + node.remove if node.content.strip.empty? + end + + # Remove script tags + css('script').remove + + doc + end + end + end +end \ No newline at end of file diff --git a/lib/docs/filters/duckdb/entries.rb b/lib/docs/filters/duckdb/entries.rb new file mode 100644 index 00000000..ea929022 --- /dev/null +++ b/lib/docs/filters/duckdb/entries.rb @@ -0,0 +1,45 @@ +module Docs + class Duckdb + class EntriesFilter < Docs::EntriesFilter + def get_name + at_css('h1')&.content || 'DuckDB' + end + + def get_type + case subpath + when /\Asql\// + 'SQL Reference' + when /\Aapi\// + 'Client APIs' + when /\Aguides\// + 'How-to Guides' + when /\Adata\// + 'Data Import' + when /\Aoperations_manual\// + 'Operations Manual' + when /\Adev\// + 'Development' + when /\Ainternals\// + 'Internals' + when /\Aextensions\// + 'Extensions' + when /\Aarchive\// + 'Archive' + else + 'Documentation' + end + end + + def additional_entries + entries = [] + css('h2[id]', 'h3[id]').each do |node| + name = node.content.strip + # Clean up the name + name = name.gsub(/[\r\n\t]/, ' ').squeeze(' ') + entries << [name, node['id'], get_type] + end + entries + end + end + end +end \ No newline at end of file diff --git a/lib/docs/scrapers/duckdb.rb b/lib/docs/scrapers/duckdb.rb new file mode 100644 index 00000000..a160b3ef --- /dev/null +++ b/lib/docs/scrapers/duckdb.rb @@ -0,0 +1,69 @@ +module Docs + class Duckdb < UrlScraper + self.name = 'DuckDB' + self.type = 'duckdb' + self.root_path = 'index.html' + self.links = { + home: 'https://duckdb.org/', + code: 'https://github.com/duckdb/duckdb' + } + + html_filters.push 'duckdb/entries', 'duckdb/clean_html' + + options[:container] = '.documentation' + + options[:skip_patterns] = [ + /installation/, + /archive/, + /reference/, + ] + + options[:skip] = %w( + docs/archive/ + docs/installation/ + docs/api/ + ) + + options[:attribution] = <<-HTML + © Copyright 2018–2024 Stichting DuckDB Foundation
+ Licensed under the MIT License. + HTML + + version '1.1' do + self.release = '1.1.x' + self.base_url = 'http://localhost:8000/docs/' + end + + # version '1.0' do + # self.release = '1.0.x' + # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" + + # html_filters.push 'duckdb/clean_html' + # end + + # version '0.9' do + # self.release = '0.9.x' + # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" + + # html_filters.push 'duckdb/clean_html' + # end + + # version '0.8' do + # self.release = '0.8.x' + # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" + + # html_filters.push 'duckdb/clean_html' + # end + + # version '0.7' do + # self.release = '0.7.x' + # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" + + # html_filters.push 'duckdb/clean_html' + # end + + def get_latest_version(opts) + get_github_tags('duckdb', 'duckdb', opts) + end + end +end From a5af2487ec1298fa3ed14167d9df9e60e3b3e108 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Mon, 18 Nov 2024 18:00:04 +0100 Subject: [PATCH 02/25] Update JavaScript documentation --- lib/docs/scrapers/mdn/javascript.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/docs/scrapers/mdn/javascript.rb b/lib/docs/scrapers/mdn/javascript.rb index 48b0d1cd..e6aabdb1 100644 --- a/lib/docs/scrapers/mdn/javascript.rb +++ b/lib/docs/scrapers/mdn/javascript.rb @@ -3,7 +3,7 @@ module Docs prepend FixInternalUrlsBehavior prepend FixRedirectionsBehavior - # release = '2024-08-20' + # release = '2024-11-18' self.name = 'JavaScript' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference' self.links = { From 2a5bbe8bb9c9cba026910208be472750be32e5cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 17:02:38 +0000 Subject: [PATCH 03/25] Bump rexml from 3.2.9 to 3.3.9 Bumps [rexml](https://github.com/ruby/rexml) from 3.2.9 to 3.3.9. - [Release notes](https://github.com/ruby/rexml/releases) - [Changelog](https://github.com/ruby/rexml/blob/master/NEWS.md) - [Commits](https://github.com/ruby/rexml/compare/v3.2.9...v3.3.9) --- updated-dependencies: - dependency-name: rexml dependency-type: indirect ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0ee3e99a..61162896 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -88,8 +88,7 @@ GEM rb-inotify (0.10.1) ffi (~> 1.0) redcarpet (3.6.0) - rexml (3.2.9) - strscan + rexml (3.3.9) rouge (1.11.1) rr (3.1.1) rss (0.3.1) @@ -125,7 +124,6 @@ GEM unicode-display_width (>= 1.5, < 3.0) unicode_utils (~> 1.4) strings-ansi (0.2.0) - strscan (1.0.3) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) terser (1.2.4) From 7e8c19d2c7b3502376173fc110f9130a59ac265a Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Mon, 18 Nov 2024 18:44:23 +0100 Subject: [PATCH 04/25] Update WordPress documentation (6.7) --- lib/docs/filters/wordpress/clean_html.rb | 32 +++++++++++++++++------- lib/docs/filters/wordpress/entries.rb | 6 +++-- lib/docs/scrapers/wordpress.rb | 6 ++--- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/lib/docs/filters/wordpress/clean_html.rb b/lib/docs/filters/wordpress/clean_html.rb index a7aeb472..fba2c4c3 100644 --- a/lib/docs/filters/wordpress/clean_html.rb +++ b/lib/docs/filters/wordpress/clean_html.rb @@ -8,15 +8,29 @@ module Docs end article = at_css('article[id^="post-"]') - @doc = at_css('article[id^="post-"]') unless article.nil? - - css('hr', '.screen-reader-text', '.table-of-contents', - '.anchor', '.toc-jump', '.source-code-links', '.user-notes', - '.show-more', '.hide-more').remove - - header = at_css('h1') - header.content = header.content.strip - doc.prepend_child header + @doc = article unless article.nil? + + css( + 'hr', + '.screen-reader-text', + '.table-of-contents', + '.anchor', + '.toc-jump', + '.source-code-links', + '.user-notes', + '.show-more', + '.hide-more', + '.wp-block-wporg-sidebar-container', + 'section[data-nosnippet="true"]', + # 'section:contains("before being able to contribute a note or feedback")', + ).remove + + if at_css('.entry-content') + header = at_css('h1') + header.remove_attribute('style') + @doc = at_css('.entry-content') + doc.prepend_child header + end # Remove permalink css('h2 > a, h3 > a').each do |node| diff --git a/lib/docs/filters/wordpress/entries.rb b/lib/docs/filters/wordpress/entries.rb index ba539d67..8acca62a 100644 --- a/lib/docs/filters/wordpress/entries.rb +++ b/lib/docs/filters/wordpress/entries.rb @@ -2,11 +2,13 @@ module Docs class Wordpress class EntriesFilter < Docs::EntriesFilter def get_name - at_css('.breadcrumbs .trail-end').content + at_css('h1').content end def get_type - if subpath.starts_with?('classes') + if subpath.starts_with?('classes') and subpath.count('/') == 3 + 'Methods' + elsif subpath.starts_with?('classes') 'Classes' elsif subpath.starts_with?('hooks') 'Hooks' diff --git a/lib/docs/scrapers/wordpress.rb b/lib/docs/scrapers/wordpress.rb index b15a4fd0..beb23cee 100644 --- a/lib/docs/scrapers/wordpress.rb +++ b/lib/docs/scrapers/wordpress.rb @@ -2,7 +2,7 @@ module Docs class Wordpress < UrlScraper self.name = 'WordPress' self.type = 'wordpress' - self.release = '6.1' + self.release = '6.7' self.base_url = 'https://developer.wordpress.org/reference/' self.initial_paths = %w( functions/ @@ -17,7 +17,7 @@ module Docs html_filters.push 'wordpress/entries', 'wordpress/clean_html' - options[:container] = '#content-area' + options[:container] = 'main' options[:trailing_slash] = false options[:only_patterns] = [ /\Afunctions\//, @@ -32,7 +32,7 @@ module Docs ] options[:attribution] = <<-HTML - © 2003–2022 WordPress Foundation
+ © 2003–2024 WordPress Foundation
Licensed under the GNU GPLv2+ License. HTML From 1b968af7834c20d089d7f3022f9aae1718e7f188 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 22:13:16 +0000 Subject: [PATCH 05/25] Update dependency sinatra to v4 [SECURITY] --- Gemfile.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 61162896..a3acf6fa 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -58,7 +58,7 @@ GEM mini_portile2 (2.8.7) minitest (5.25.1) multi_json (1.15.0) - mustermann (3.0.0) + mustermann (3.0.3) ruby2_keywords (~> 0.0.1) newrelic_rpm (8.16.0) nokogiri (1.16.7) @@ -133,7 +133,7 @@ GEM eventmachine (~> 1.0, >= 1.0.4) rack (>= 1, < 3) thor (1.3.2) - tilt (2.3.0) + tilt (2.4.0) tty-pager (0.14.0) strings (~> 0.2.0) tty-screen (~> 0.8) From 2bb4ed8fb885a0c99e30219785b59c89df9777e9 Mon Sep 17 00:00:00 2001 From: Rui Jiang Date: Mon, 18 Nov 2024 18:54:09 -0600 Subject: [PATCH 06/25] updated svelte version from 4 to 5 --- Gemfile.lock | 4 +--- lib/docs/filters/svelte/clean_html.rb | 14 ++++++++++++-- lib/docs/filters/svelte/entries.rb | 27 ++++++--------------------- lib/docs/scrapers/svelte.rb | 14 ++++++++------ 4 files changed, 27 insertions(+), 32 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0ee3e99a..61162896 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -88,8 +88,7 @@ GEM rb-inotify (0.10.1) ffi (~> 1.0) redcarpet (3.6.0) - rexml (3.2.9) - strscan + rexml (3.3.9) rouge (1.11.1) rr (3.1.1) rss (0.3.1) @@ -125,7 +124,6 @@ GEM unicode-display_width (>= 1.5, < 3.0) unicode_utils (~> 1.4) strings-ansi (0.2.0) - strscan (1.0.3) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) terser (1.2.4) diff --git a/lib/docs/filters/svelte/clean_html.rb b/lib/docs/filters/svelte/clean_html.rb index 693825f7..07f0dd51 100644 --- a/lib/docs/filters/svelte/clean_html.rb +++ b/lib/docs/filters/svelte/clean_html.rb @@ -2,11 +2,21 @@ module Docs class Svelte class CleanHtmlFilter < Filter def call - @doc = at_css('main .page.content') + @doc = at_css('main .page.content #docs-content') + + # Remove title header + at_css('> header > div.breadcrumbs').remove() + # Remove extra input toggle + at_css('> aside.on-this-page input').remove() + # Remove "edit this page" link + at_css('> p.edit').remove() + # Remove footer navigation + at_css('> div.controls').remove() + at_css('h1').content = 'Svelte' if root_page? css('pre').each do |node| node.content = node.css('.line').map(&:content).join("\n") - node['data-language'] = 'javascript' + node['data-language'] = 'typescript' end doc end diff --git a/lib/docs/filters/svelte/entries.rb b/lib/docs/filters/svelte/entries.rb index dcd66cc2..c349898a 100644 --- a/lib/docs/filters/svelte/entries.rb +++ b/lib/docs/filters/svelte/entries.rb @@ -2,29 +2,14 @@ module Docs class Svelte class EntriesFilter < Docs::EntriesFilter def get_type - at_css('ul.sidebar > li:has(.active) > span.section').content + page = at_css("main nav ul.sidebar li ul li a[href$='#{result[:path]}']") + category = page.ancestors('li')[1] + return category.css('h3').inner_text end - def additional_entries - subtype = nil - css('aside').remove - css('.category').remove - css('.controls').remove - css('.edit').remove - css('.permalink').remove - css('h2, h3, h4').each_with_object [] do |node, entries| - if node.name == 'h2' - subtype = nil - elsif node.name == 'h3' - subtype = node.content.strip - subtype = nil unless subtype[/Component directives|Element directives/] - end - next if type == 'Before we begin' - name = node.content.strip - name.concat " (#{subtype})" if subtype && node.name == 'h4' - next if name.starts_with?('Example') - entries << [name, node['id'], get_type] - end + def get_name + page = at_css("main nav ul.sidebar li ul li a[href$='#{result[:path]}']") + return page.inner_text end end end diff --git a/lib/docs/scrapers/svelte.rb b/lib/docs/scrapers/svelte.rb index c5900a0b..aac87389 100644 --- a/lib/docs/scrapers/svelte.rb +++ b/lib/docs/scrapers/svelte.rb @@ -3,26 +3,28 @@ module Docs self.name = 'Svelte' self.slug = 'svelte' self.type = 'simple' + self.root_path = '/' self.links = { home: 'https://svelte.dev/', code: 'https://github.com/sveltejs/svelte' } - self.root_path = 'introduction' options[:root_title] = 'Svelte' # https://github.com/sveltejs/svelte/blob/master/LICENSE.md options[:attribution] = <<-HTML - © 2016–2023 Rich Harris and contributors
+ © 2016–2024 Rich Harris and contributors
Licensed under the MIT License. HTML - options[:skip] = %w(team.html plugins/) - - self.base_url = 'https://svelte.dev/docs/' + self.base_url = 'https://svelte.dev/docs/svelte/' html_filters.push 'svelte/entries', 'svelte/clean_html' - + version do + self.release = '5.2.3' + end + + version '4' do self.release = '4.2.1' end From ea00d3bfd13083fcfd435e18de3b0adeae9aa910 Mon Sep 17 00:00:00 2001 From: Rui Jiang Date: Mon, 18 Nov 2024 19:00:02 -0600 Subject: [PATCH 07/25] fixed gem lock --- Gemfile.lock | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 61162896..0ee3e99a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -88,7 +88,8 @@ GEM rb-inotify (0.10.1) ffi (~> 1.0) redcarpet (3.6.0) - rexml (3.3.9) + rexml (3.2.9) + strscan rouge (1.11.1) rr (3.1.1) rss (0.3.1) @@ -124,6 +125,7 @@ GEM unicode-display_width (>= 1.5, < 3.0) unicode_utils (~> 1.4) strings-ansi (0.2.0) + strscan (1.0.3) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) terser (1.2.4) From 9102197db09598dbf4fa9001bc3ec604b922c065 Mon Sep 17 00:00:00 2001 From: Rui Jiang Date: Mon, 18 Nov 2024 19:13:42 -0600 Subject: [PATCH 08/25] removed hover popup --- lib/docs/filters/svelte/clean_html.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/docs/filters/svelte/clean_html.rb b/lib/docs/filters/svelte/clean_html.rb index 07f0dd51..cdc929ea 100644 --- a/lib/docs/filters/svelte/clean_html.rb +++ b/lib/docs/filters/svelte/clean_html.rb @@ -15,6 +15,8 @@ module Docs at_css('h1').content = 'Svelte' if root_page? css('pre').each do |node| + # Remove hover popup + node.css('.twoslash-popup-container').remove() node.content = node.css('.line').map(&:content).join("\n") node['data-language'] = 'typescript' end From 7c7ddaeead951684d00ee477866c3ffa0922e4d6 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Sun, 17 Nov 2024 16:34:27 +0100 Subject: [PATCH 09/25] Update Sequelize documentation (6.37.5) --- lib/docs/filters/sequelize/clean_html.rb | 2 +- lib/docs/scrapers/sequelize.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/docs/filters/sequelize/clean_html.rb b/lib/docs/filters/sequelize/clean_html.rb index a7d9bc05..62e89b00 100644 --- a/lib/docs/filters/sequelize/clean_html.rb +++ b/lib/docs/filters/sequelize/clean_html.rb @@ -2,7 +2,7 @@ module Docs class Sequelize class CleanHtmlFilter < Filter def call - @doc = at_css('article', '.content') + @doc = at_css('article', '.content .self-detail', '.content') if at_css('header > h1') # Pull the header out of its container diff --git a/lib/docs/scrapers/sequelize.rb b/lib/docs/scrapers/sequelize.rb index f170abfe..8410c7ef 100644 --- a/lib/docs/scrapers/sequelize.rb +++ b/lib/docs/scrapers/sequelize.rb @@ -30,7 +30,7 @@ module Docs end version '6' do - self.release = '6.23.2' + self.release = '6.37.5' self.base_url = "https://sequelize.org/docs/v6/" self.base_urls = [ "https://sequelize.org/docs/v6/", From 2edbd614a0682ac84ab435cc882254e576eee2be Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Sun, 17 Nov 2024 16:36:29 +0100 Subject: [PATCH 10/25] Update Matplotlib documentation (3.9.2) --- lib/docs/scrapers/matplotlib.rb | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/docs/scrapers/matplotlib.rb b/lib/docs/scrapers/matplotlib.rb index 1486642c..5e30998c 100644 --- a/lib/docs/scrapers/matplotlib.rb +++ b/lib/docs/scrapers/matplotlib.rb @@ -20,8 +20,8 @@ module Docs Licensed under the Matplotlib License Agreement. HTML - version '3.7' do - self.release = '3.7.1' + version do + self.release = '3.9.2' self.base_urls = [ "https://matplotlib.org/stable/api/", "https://matplotlib.org/stable/mpl_toolkits/mplot3d/", @@ -29,6 +29,24 @@ module Docs ] end + version '3.8' do + self.release = '3.8.4' + self.base_urls = [ + "https://matplotlib.org/#{release}/api/", + "https://matplotlib.org/#{release}/mpl_toolkits/mplot3d/", + "https://matplotlib.org/#{release}/mpl_toolkits/axes_grid/api/" + ] + end + + version '3.7' do + self.release = '3.7.5' + self.base_urls = [ + "https://matplotlib.org/#{release}/api/", + "https://matplotlib.org/#{release}/mpl_toolkits/mplot3d/", + "https://matplotlib.org/#{release}/mpl_toolkits/axes_grid/api/" + ] + end + version '3.6' do self.release = '3.6.0' self.base_urls = [ From e18ddad165c65160de75d3284bebbd2a1795c9c9 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Tue, 19 Nov 2024 07:17:35 +0100 Subject: [PATCH 11/25] Update Support Tables documentation (1.0.30001680) --- lib/docs/scrapers/support_tables.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/docs/scrapers/support_tables.rb b/lib/docs/scrapers/support_tables.rb index 4318edf3..e181fd8f 100644 --- a/lib/docs/scrapers/support_tables.rb +++ b/lib/docs/scrapers/support_tables.rb @@ -7,7 +7,7 @@ module Docs self.name = 'Support Tables' self.slug = 'browser_support_tables' self.type = 'support_tables' - self.release = '1.0.30001642' + self.release = '1.0.30001680' self.base_url = 'https://github.com/Fyrd/caniuse/raw/main/' # https://github.com/Fyrd/caniuse/blob/main/LICENSE From 811b9d13d70e038b2dc29bdcbe713f91fae4da8b Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Tue, 19 Nov 2024 13:09:36 +0100 Subject: [PATCH 12/25] Update RxJS documentation (7.8.1) --- lib/docs/scrapers/rxjs.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/docs/scrapers/rxjs.rb b/lib/docs/scrapers/rxjs.rb index 69d2fffc..83827f97 100644 --- a/lib/docs/scrapers/rxjs.rb +++ b/lib/docs/scrapers/rxjs.rb @@ -4,7 +4,7 @@ module Docs class Rxjs < UrlScraper self.name = 'RxJS' self.type = 'rxjs' - self.release = '7.5.5' + self.release = '7.8.1' self.base_url = 'https://rxjs.dev/' self.root_path = 'guide/overview' self.links = { @@ -16,7 +16,7 @@ module Docs options[:follow_links] = false options[:only_patterns] = [/guide\//, /api\//] - options[:skip_patterns] = [/api\/([^\/]+)\.json/] + options[:skip_patterns] = [/api\/([^\/]+)\.json/, /api\/index/] options[:fix_urls_before_parse] = ->(url) do url.sub! %r{\A(\.\/)?guide/}, '/guide/' url.sub! %r{\Aapi/}, '/api/' From 8b8ebda2071ca713c13854f5a2d63647f6658ce0 Mon Sep 17 00:00:00 2001 From: Oliver Eyton-Williams Date: Tue, 19 Nov 2024 18:43:48 +0100 Subject: [PATCH 13/25] fix: use aliases from config --- assets/javascripts/templates/pages/help_tmpl.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/javascripts/templates/pages/help_tmpl.js b/assets/javascripts/templates/pages/help_tmpl.js index 2fa186c8..e155d829 100644 --- a/assets/javascripts/templates/pages/help_tmpl.js +++ b/assets/javascripts/templates/pages/help_tmpl.js @@ -3,7 +3,7 @@ app.templates.helpPage = function () { const navKey = $.isMac() ? "cmd" : "alt"; const arrowScroll = app.settings.get("arrowScroll"); - const aliases = Object.entries(app.models.Entry.ALIASES); + const aliases = Object.entries(app.config.docs_aliases); const middle = Math.ceil(aliases.length / 2); const aliases_one = aliases.slice(0, middle); const aliases_two = aliases.slice(middle); From d4034eea2d0098e64bfe037e4b7212d43ec96d42 Mon Sep 17 00:00:00 2001 From: Rui Jiang Date: Tue, 19 Nov 2024 21:23:53 -0600 Subject: [PATCH 14/25] updated eslint scraper --- lib/docs/filters/eslint/entries.rb | 7 ++++++- lib/docs/scrapers/eslint.rb | 21 ++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/docs/filters/eslint/entries.rb b/lib/docs/filters/eslint/entries.rb index f5f0f345..e40cd7a5 100644 --- a/lib/docs/filters/eslint/entries.rb +++ b/lib/docs/filters/eslint/entries.rb @@ -10,7 +10,12 @@ module Docs if subpath.start_with?('rules') return 'Rules' else - at_css('nav.docs-index [aria-current="true"]').ancestors('li')[-1].at_css('a').content + type = at_css('nav.docs-index [aria-current="true"]').ancestors('li')[-1].at_css('a').content + # This specific entry is mispelled with a lowercase 'i' + if type.start_with?('integrate') + type = type.sub('integrate', 'Integrate') + end + return type end end end diff --git a/lib/docs/scrapers/eslint.rb b/lib/docs/scrapers/eslint.rb index 6d069839..e3243171 100644 --- a/lib/docs/scrapers/eslint.rb +++ b/lib/docs/scrapers/eslint.rb @@ -2,9 +2,9 @@ module Docs class Eslint < UrlScraper self.name = 'ESLint' self.type = 'simple' - self.release = '8.56.0' + self.release = '9.15.0' self.base_url = 'https://eslint.org/docs/latest/' - self.root_path = 'user-guide/getting-started' + self.root_path = '/' self.links = { home: 'https://eslint.org/', code: 'https://github.com/eslint/eslint' @@ -14,7 +14,22 @@ module Docs options[:skip_patterns] = [/maintain/, /migrating/, /migrate/, /\Aversions/, /rule-deprecation/] options[:skip] = %w(about about/ versions) - options[:replace_paths] = { 'user-guide' => 'user-guide/' } + # A number of paths have a trailing slash, causing them to be suffixed by "index" during the NormalizePathsFilter + options[:replace_paths] = { + 'configure/' => 'configure', + 'contribute/' => 'contribute', + 'contribute/architecture/' => 'contribute/architecture', + 'extend/' => 'extend', + 'flags/' => 'flags', + 'integrate/' => 'integrate', + 'rules/' => 'rules', + 'use/' => 'use', + 'use/formatters/' => 'use/formatters', + 'use/configure/' => 'use/configure', + 'use/configure/rules/' => 'use/configure/rules', + 'use/core-concepts/' => 'use/core-concepts', + 'use/troubleshooting/' => 'use/troubleshooting', + } options[:attribution] = <<-HTML © OpenJS Foundation and other contributors
From dfe89bb46011d1c21f54942c695fd80d65a6eceb Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Wed, 20 Nov 2024 22:12:06 +0100 Subject: [PATCH 15/25] Update ESLint documentation (9.15.0) --- lib/docs/filters/eslint/clean_html.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/docs/filters/eslint/clean_html.rb b/lib/docs/filters/eslint/clean_html.rb index 0737acef..f347cef9 100644 --- a/lib/docs/filters/eslint/clean_html.rb +++ b/lib/docs/filters/eslint/clean_html.rb @@ -5,6 +5,7 @@ module Docs @doc = at_css('#main') if at_css('#main') @doc = at_css('.docs-main__content') if at_css('.docs-main__content') + css('.docs-toc').remove css('.eslint-ad').remove css('.glyphicon').remove css('hr', 'colgroup', 'td:empty').remove From 4baf915b99038bcd56e0b11be7a9f865506cec12 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Wed, 20 Nov 2024 22:20:16 +0100 Subject: [PATCH 16/25] Update Playwright documentation (1.49.0) --- lib/docs/scrapers/playwright.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/docs/scrapers/playwright.rb b/lib/docs/scrapers/playwright.rb index 2d4aa891..c8e2c70e 100644 --- a/lib/docs/scrapers/playwright.rb +++ b/lib/docs/scrapers/playwright.rb @@ -2,7 +2,7 @@ module Docs class Playwright < UrlScraper self.name = 'Playwright' self.type = 'simple' - self.release = '1.46.1' + self.release = '1.49.0' self.base_url = 'https://playwright.dev/docs/' self.root_path = 'intro' self.links = { From 2216cb46cf6ba6dbf1ab6d104ca38e011be4e3ca Mon Sep 17 00:00:00 2001 From: Rui Jiang Date: Wed, 20 Nov 2024 17:26:35 -0600 Subject: [PATCH 17/25] updated yarn version (3.1.1 > 4.5.1) --- lib/docs/filters/yarn/clean_html_berry.rb | 41 ++++------------------- lib/docs/filters/yarn/entries_berry.rb | 19 ++--------- lib/docs/scrapers/yarn.rb | 17 +++++++--- 3 files changed, 22 insertions(+), 55 deletions(-) diff --git a/lib/docs/filters/yarn/clean_html_berry.rb b/lib/docs/filters/yarn/clean_html_berry.rb index 96b3ee53..8a28ce25 100644 --- a/lib/docs/filters/yarn/clean_html_berry.rb +++ b/lib/docs/filters/yarn/clean_html_berry.rb @@ -2,45 +2,18 @@ module Docs class Yarn class CleanHtmlBerryFilter < Filter def call - if slug.empty? - @doc = at_css('main') - css( - (['div:first-child'] * 3).join('>'), # Tagline - 'img', - 'hr', # Footer - 'hr + div', # Footer - ).remove - - css('a').each do |link| - link.name = 'div' - link.css('h3').each do |node| - node.replace("

#{node.content}

") - end - end - - return doc - end - - @doc = at_css('article') - # Heading & edit link - css('h1', 'h1 + a').remove unless slug.start_with?('configuration') - - if slug.start_with?('cli') - css('.header-code').each do |node| - node.name = 'span' - end - end - - if slug.start_with?('configuration') - css('h1', 'h2').each do |node| - node.name = node.name.sub(/\d/) { |i| i.to_i + 1 } - end - end + @doc = at_css('main .container div.theme-doc-markdown.markdown') css('*').each do |node| node.remove_attribute('style') end + css('pre').each do |node| + lang = node['class'][/language-(\w+)/, 1] + node['data-language'] = lang if lang + node.content = node.css('.token-line').map(&:content).join("\n") + end + doc end end diff --git a/lib/docs/filters/yarn/entries_berry.rb b/lib/docs/filters/yarn/entries_berry.rb index 44c1e18e..6b99bfa6 100644 --- a/lib/docs/filters/yarn/entries_berry.rb +++ b/lib/docs/filters/yarn/entries_berry.rb @@ -2,26 +2,11 @@ module Docs class Yarn class EntriesBerryFilter < Docs::EntriesFilter def get_name - if slug.start_with?('configuration') - filename = at_css('main .active code') - content = filename.content - return filename.parent.content.sub content, " (#{content})" - end - - name = at_css('h1').content - - if slug.start_with?('getting-started') - active_link = at_css('main .active') - links = active_link.parent.children.to_a - name.prepend "#{links.index(active_link) + 1}. " - end - - name + at_css('main header h1').content end def get_type - return 'CLI' if slug.start_with?('sdks', 'pnpify') - at_css('header .active').content + at_css('nav.navbar a.navbar__item.navbar__link.navbar__link--active').content end end end diff --git a/lib/docs/scrapers/yarn.rb b/lib/docs/scrapers/yarn.rb index 8cc49260..9d20bc8c 100644 --- a/lib/docs/scrapers/yarn.rb +++ b/lib/docs/scrapers/yarn.rb @@ -13,15 +13,16 @@ module Docs HTML version 'Berry' do - self.release = '3.1.1' + self.release = '4.5.1' self.base_url = 'https://yarnpkg.com/' self.links = { home: 'https://yarnpkg.com/', code: 'https://github.com/yarnpkg/berry' } - html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry', 'title' - options[:skip] = ['features', 'cli', 'configuration', 'advanced'] - options[:skip_patterns] = [/\Aapi/, /\Apackage/] + self.root_path = 'getting-started' + html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry' + options[:skip] = ['cli', 'cli/builder', 'cli/pnpify', 'cli/sdks', 'protocols'] + options[:skip_patterns] = [/\Aapi/, /\Ablog/, /\Apackage/, /\Aassets/] end version 'Classic' do @@ -38,5 +39,13 @@ module Docs def get_latest_version(opts) get_latest_github_release('yarnpkg', 'berry', opts)[/[\d.]+/] end + + private + + # Some pages contain null bytes and cause the parser to fail + def parse(response) + response.body.gsub!(/[\x00\u0000\0]/, '') + super + end end end From ae7246ccc20c0b3f4d6544f50d1a740b5573bcea Mon Sep 17 00:00:00 2001 From: Esmaeil Vakili Date: Wed, 20 Nov 2024 11:21:30 +0330 Subject: [PATCH 18/25] update cmake to v3.31 and add latest version docs to the list --- lib/docs/scrapers/cmake.rb | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/lib/docs/scrapers/cmake.rb b/lib/docs/scrapers/cmake.rb index cf14b562..bb2e4aae 100644 --- a/lib/docs/scrapers/cmake.rb +++ b/lib/docs/scrapers/cmake.rb @@ -16,10 +16,39 @@ module Docs options[:skip_patterns] = [/\Agenerator/, /\Acpack_gen/, /\Ainclude/, /\Arelease/, /tutorial\/(\w*%20)+/] options[:attribution] = <<-HTML - © 2000–2023 Kitware, Inc. and Contributors
+ © 2000–2024 Kitware, Inc. and Contributors
Licensed under the BSD 3-clause License. HTML + version do + self.base_url = "https://cmake.org/cmake/help/latest/" + end + + version '3.31' do + self.release = '3.31' + self.base_url = "https://cmake.org/cmake/help/v#{self.version}/" + end + + version '3.30' do + self.release = '3.30' + self.base_url = "https://cmake.org/cmake/help/v#{self.version}/" + end + + version '3.29' do + self.release = '3.29' + self.base_url = "https://cmake.org/cmake/help/v#{self.version}/" + end + + version '3.28' do + self.release = '3.28' + self.base_url = "https://cmake.org/cmake/help/v#{self.version}/" + end + + version '3.27' do + self.release = '3.27' + self.base_url = "https://cmake.org/cmake/help/v#{self.version}/" + end + version '3.26' do self.release = '3.26' self.base_url = "https://cmake.org/cmake/help/v#{self.version}/" From 3caa196ea31ce2ad6f4c6a0f88abe3b48909c23f Mon Sep 17 00:00:00 2001 From: Esmaeil Vakili Date: Wed, 20 Nov 2024 11:47:23 +0330 Subject: [PATCH 19/25] update cmake icons --- public/icons/docs/cmake/16.png | Bin 231 -> 718 bytes public/icons/docs/cmake/16@2x.png | Bin 392 -> 1632 bytes public/icons/docs/cmake/SOURCE | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) diff --git a/public/icons/docs/cmake/16.png b/public/icons/docs/cmake/16.png index fe82b4bc2ebea007babde4ba1414ea4635346054..b0591445f183c50a2ccb2b704b03da4f76f462a2 100644 GIT binary patch delta 694 zcmV;n0!jVn0nP=GB!2;OQb$4nuFf3k00004XF*Lt006O%3;baP00009a7bBm000id z000id0mpBsWB>pHV@X6oR5(w4QC~=tQ53)D&V5_WbgSkpM`J`tN-Gf`5|#wfLl7l8 z=#M2)3iY6GQPA2$FGU~-74!#!9t!FqgC3GVrqM&tD2ielL4S~9xZK>9*|vM{>D+Ba z)2;JeIOjX(cYeR${XX`;SabN=zEPjwTYN1WyqH&nr}wTk^GjalOR2$7C{z(wVSo@z zCyq-c8`CDk>%Tv802zbguq4V7xVwj}La6hASQRsDiy%bsQ6NwjTc(LHkI+knhcjf~ zjM^MS)l&C%vVW|L6$I|?hgK=@90Nd&p0PRB>`UFGVL^aJ2W-K)( zx(Xfih2KOwm%At-*wBA#m6R;6Ucdn(0)r}jkO>(KcXXHv=tBHMfUpo-JW1KDYicM` zO?o%lUr^T9AO^J!X5u9-{uZvDtF}ZMqEt4}KJDPwp?^cZC+%D8o0FYK+L+nwq?z9R z2p(t4?(uP<_lgb>e*83@qH%s17`}fz*gm$iaorg-3_Ahhq(sYFP&M<}MAsh|c>-tP zx-BQwS!zjcmd|etcyGHMtM{P33UCJik82!l=z}UJf|1tND&3=BP3j`sNBs5H>dXVz z-m&`7r+=GQx*`|ejn2Kg!#STsi;*Nn?V#{2P&K7f*>3FuTs`;b?y(Ow4f9WJJ0klD z<|g61J<>y=#8mSfd)YS}>(ygJ5oh#fz}2s?-%k2<}%v5G#n3n?$L$o8Rt?UsX_2DQ+&kykaRzLJ<}8%~tRl^mM}R$-@8u002ovPDHLk FV1oO+T_OMg diff --git a/public/icons/docs/cmake/16@2x.png b/public/icons/docs/cmake/16@2x.png index 4dcfc24c973cfdea47735f40cb43cd632288bd92..78df82f15b5d0feef6959ed03e9edfd280f1a42f 100644 GIT binary patch delta 1626 zcmV-g2BrCk1KLShf*IZsJEhypY3F}t#3x6yeCgZ=$F?VS2Ps4R1 zVhMZ3z(f-zU(P#PVBD?sj@_penA%?7G+}Y0e{Nrov{wz{uWsVA2GIgyo-FPhJhYH$ zg|e($4gIGr=!C}*_Qw6<8BP7TFB)S2|0u~I_K0C6bPHLQnZre)Pe_x0zV|ADd`=@D z4r=1Vk<|0!A%C{#jL`zR`Mnb?JCtkPa@apZX$SM>R?yALjza@EEL&)y#R&X= z0j9yJ#^IV^)<3?PMBO13bXXu;I(_gc^jize*#`^1`q z{TqrsUVqC7fS`EsF}rp%b4UOPmys=8E_L!${(XJV)e8Gqs(m#8J_CVqS2wllH9n7N zjR7hFKwhhbvA@-VR%n&IUPP=V*uSOPE7na2fS9K%ItPv}WWaL_;TWJ+3s*|no+`g? z@3Bg8@8c=2+bOMYp{R!4SPvb z|42l0T3)nZOM%CY7`|(kvBhf~NB}VFv~abghwCr2;9hN?NZosKmcW-0qOn(OIi6kY zaqiri=f(RDP0ZG&U&%4ELpYe)0=$kBe$)ch-UjU7lK_fvSrdSPfoQ#!ue@>b?7)hX z<$rGkA~6z|2_|Q2n+PF50e~oH;aYJwXVstB`_+n9FIizvhF3zc97E7tR*iXhpVB$fh%L?jfU@~c<**_y_iNPhq#@GC9cDDJ{m`{CZhy-%&S_u)#2a8Le4 zx8b=h?_55AxI`d8LXk)+f^e}200cr*e*OA7I0G?YM+KnXh{@CzsP^_?|FH^y^Qn>V z8r0My{up!R0PogYJO=H}%Nly?%(b+$C(pbB2qeHD0s#Qr*q38uz63(30Mr(47Jqkj zQvasCQUEWV4F3QFHFZx+u}kw0W2Lw}qbIl_&A4tCG-A6hov?RV$dywU=fbEc7XZkW z%cWU1EvDfR2$N*jvj0j8Q*jpjpQ;u#LZa*AStl&Uk=T1*jtr_t%AU#d>^HhjU?#cB z7>RCjW|EuKG|64gN^(br3G5H*v436mv`7ygc0B+1=(~^JEH7@?b9ffr3Y{>stt6jh zDH%*{;Z{ix7Y&>#aQCZM*B&Zw>>c?=BOqb0%|!+tv-qw|_qdPCf@U z^5Wk!9K!LlTF>6WNFc%`ObBB@5T41QSW97460HCnJ;nW_)@GmGoua%)?Qhq9bcMd{ z-Xas5_gSne7_t=!*}@%o7c^;#PqRSI_C_Ajmn;@cbv4g)5QOropI zG~QKCPj*8(>%A@*ZS-3tmYc ze7h>#l?=xrKOPV+CqX2zma-Dy&k3%`Al>~9w6eWB&u942*6l}~*0bGLY;5xmw~!4t zqO6Cj7ER4Fy_;ox&~#R+%R4e0g7N%^1AYevsnyD5;|+eRrVnEeFH^K=5m~^rd!B38 znoqM?=RSg&?D`tQpFsFs2!Fs#`tt!zDAspjeHTDpLn|`<43Dd-JN?)6U*O{+?(97D YGcap%G!U4TzW@LL07*qoM6N<$f;|rj*#H0l delta 376 zcmV-;0f+wJ42T1e8Gi!+002a!ipBr{0CG@FR7C&)00000QX^ASO~8d9QzjR{g&T*y z9w$;Bhx-5rz5pkM08?uKZN5#1qobq0hKGlR!Lzfo{1_)FQim|cCs)N&iNl8|R8tzp z1S^VDzfmU=iziiz1HK;wD~ke#Qv)YXz#7IUFvbMGP*W?5Cx02X1fh{gEC2ui0d!JM zQvg8b*k%9#0K`c|K~#7FmCRKV!T=;#*)=I4;&JzU69fYS&+|-1i1QyJ_z)jp{=Epq5{Q#D3qu4TK0sddOHwaH0AdEp z;UAOA{16|YEpQmL$(H5X7Ac$9n@> W(H2Ll-7R$h0000 Date: Thu, 21 Nov 2024 19:26:35 +0100 Subject: [PATCH 20/25] Update Yarn documentation (4.5.1) --- lib/docs/scrapers/yarn.rb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/docs/scrapers/yarn.rb b/lib/docs/scrapers/yarn.rb index 9d20bc8c..6539c7d5 100644 --- a/lib/docs/scrapers/yarn.rb +++ b/lib/docs/scrapers/yarn.rb @@ -12,7 +12,7 @@ module Docs Licensed under the BSD License. HTML - version 'Berry' do + version do self.release = '4.5.1' self.base_url = 'https://yarnpkg.com/' self.links = { @@ -25,6 +25,18 @@ module Docs options[:skip_patterns] = [/\Aapi/, /\Ablog/, /\Apackage/, /\Aassets/] end + version '3' do + self.release = '3.1.1' + self.base_url = 'https://v3.yarnpkg.com/' + self.links = { + home: 'https://v3.yarnpkg.com/', + code: 'https://github.com/yarnpkg/berry' + } + self.root_path = 'getting-started' + html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry', 'title' + options[:skip] = ['features', 'cli', 'configuration', 'advanced'] + options[:skip_patterns] = [/\Aapi/, /\Apackage/] end + version 'Classic' do self.release = '1.22.17' self.base_url = 'https://classic.yarnpkg.com/en/docs/' From 919425b8cf16003bb9632937d404ceab7f54fcf9 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Thu, 21 Nov 2024 22:09:37 +0100 Subject: [PATCH 21/25] Update PHP documentation (8.4) --- lib/docs/scrapers/php.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/docs/scrapers/php.rb b/lib/docs/scrapers/php.rb index 6d38c70c..b9900e82 100644 --- a/lib/docs/scrapers/php.rb +++ b/lib/docs/scrapers/php.rb @@ -5,7 +5,7 @@ module Docs self.name = 'PHP' self.type = 'php' - self.release = '8.3' + self.release = '8.4' self.base_url = 'https://www.php.net/manual/en/' self.root_path = 'index.html' self.initial_paths = %w( @@ -62,7 +62,7 @@ module Docs options[:skip_patterns] = [/mysqlnd/, /xdevapi/i] options[:attribution] = <<-HTML - © 1997–2023 The PHP Documentation Group
+ © 1997–2024 The PHP Documentation Group
Licensed under the Creative Commons Attribution License v3.0 or later. HTML From 18c6b7caeef09b706f2bb01ce6b487ebdeea8016 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 21:43:41 +0000 Subject: [PATCH 22/25] chore(deps): update dependency minitest to v5.25.2 --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index a3acf6fa..a708f12b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -56,7 +56,7 @@ GEM logger (1.6.1) method_source (1.0.0) mini_portile2 (2.8.7) - minitest (5.25.1) + minitest (5.25.2) multi_json (1.15.0) mustermann (3.0.3) ruby2_keywords (~> 0.0.1) From 5800216f1b3f8fe94a399373c91ef5938e939c4d Mon Sep 17 00:00:00 2001 From: Scott Goley Date: Thu, 21 Nov 2024 21:49:50 -0500 Subject: [PATCH 23/25] +duckdb icons & source --- public/icons/docs/duckdb/16.png | Bin 0 -> 902 bytes public/icons/docs/duckdb/16@2x.png | Bin 0 -> 1566 bytes public/icons/docs/duckdb/SOURCE | 1 + 3 files changed, 1 insertion(+) create mode 100644 public/icons/docs/duckdb/16.png create mode 100644 public/icons/docs/duckdb/16@2x.png create mode 100644 public/icons/docs/duckdb/SOURCE diff --git a/public/icons/docs/duckdb/16.png b/public/icons/docs/duckdb/16.png new file mode 100644 index 0000000000000000000000000000000000000000..855df72e07579463f4069f526928fcecf748a151 GIT binary patch literal 902 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`jKx9jP7LeL$-D$|SkfJR9T^xl z_H+M9WCij$3p^r=85sBugD~Uq{1qucL9r6oh?3y^w370~qEv=}#LT=BJwMkF1yeo4 z@6F#Q166EEjqptK^weVD0CHFvq!?Kl7=bJ=AeM%*L2l7tWCn{f0ojI(ObmQLItqw0 z+gZTk89+7&Bmgl;Ka57Pl7X3lVFEh?3sBy`$k>2!0mMv@de#LHb0z`VAixAPg$b-O z$kGDHg6c9fFaXIWFHt)Z&px?XxX*I=yK25|t6ceG z?w|R+_x!`!dEfsr{n^-YL;GBZ_oI-~+}XnNN4O26VxJ$G=IT0qhqhg(=tfPh&_DOz z+j0HAaosSrICcLH{U9ZMulj@4-*&3Y2y?!iDAK&hULv79Soz?!zgE@UN0PPH-l}X5 z)zNsAH2=eGPW2a}%?|}~-K{v|_H&u9bM!ZtdDqf%C;Xbi=ioTe&v&NGYMD{i6y3=1 z{1(5^>;0U}kJcPpk+Ma;X`1bZkKEIyFRWC*qOB9JFyr*}$!n|+zTL%flKXB8+ph;t z3Nuz*Z!uXUyK3Dujf@_?H+z|u`>)vJbUD-MCT~HLbtlVL@$NBFx2hEt-dNY63vgE5~-L4Ur z8I~`5_%&Lhil@Kr!A5*q#W&)P>an6gt-4d{@+Rf0sz0 zTn{V#F((_;Vv_`?NPe%R9JT<}&n~R7u{i6`*5y*jTQ9*^;FAb8u2f=E-s$jDr*Pct<@KoQmE zmHJSf0k{4ENea3g#!eU?ni+T<9sk%cFngIjnpfb_5ES&Pcz2nzte4!Hue8zSP#=^R z%xRBW3i&;(tb=XJ0sy-5c7J2@T92>8n+eV1Khng%nIMz<2ri6mN+OD3e^tecJ{ zm#^&D+);stG4)qH#<2r^zdmNl-(mXc%Kl{Dido7+S5D%Q74 zYnLY?Rru0&>^ziaVpKXB8Z)14?$q3s991H-5m$S`V*Z@v*`#9%?i)-h1(P@#zS3#0 z;2e*eeU9m`fOtQJd#BvL%{IkUUCVd=gzu+6<8Ns)Gu1^7=Fl;8j-Awr< z143WL~iXso%!iju!kfy#A`)baZ+#f6B|(Y9O}==OZ1z z`_u6umt*Z;bzHQ2{r$3Z9IAwI-02#`XY1~XG1k#GV%P@OPjP0R^DvF^`;!a<{9d=Z z6Yy#vs=1#PH5uxsaLXp0e0NLExj2)y358H!^dXin5l)X4H->Qf7O$6OnpV6|F&XY1 z72)9vjyy(V7F#KIshH*8ldU_L7#c*6c`I$VR2-|GE=nMB8JffkO;`cjj*8=}%a_H{ z+t(Iw5rS77^{em4hbI@GR9fl-1AAvOKN^ZCSi(pznYq!#lvBXZ@A<=%b3o(!G&ig z4^Ji(n11@>ZpSJ?J!LdEPlY3snz5Ds>}Id)(b&XwM*kTS!|KdPYzTK1{S;*XAGPbSpb`dt1Of`y*V literal 0 HcmV?d00001 diff --git a/public/icons/docs/duckdb/SOURCE b/public/icons/docs/duckdb/SOURCE new file mode 100644 index 00000000..286d1738 --- /dev/null +++ b/public/icons/docs/duckdb/SOURCE @@ -0,0 +1 @@ +https://github.com/duckdb/duckdb/tree/main/logo \ No newline at end of file From deedda316149dbde221788c8142499cdb43933f2 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Sat, 23 Nov 2024 14:44:32 +0100 Subject: [PATCH 24/25] Update DuckDB documentation (1.1.3) --- assets/javascripts/news.json | 4 +++ lib/docs/filters/duckdb/attribution.rb | 12 ++++++++ lib/docs/filters/duckdb/clean_html.rb | 21 +++++++------ lib/docs/filters/duckdb/entries.rb | 2 +- lib/docs/scrapers/duckdb.rb | 41 +++++--------------------- 5 files changed, 34 insertions(+), 46 deletions(-) create mode 100644 lib/docs/filters/duckdb/attribution.rb diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json index e56bd8dd..052f4918 100644 --- a/assets/javascripts/news.json +++ b/assets/javascripts/news.json @@ -1,4 +1,8 @@ [ + [ + "2024-11-23", + "New documentation: DuckDB" + ], [ "2024-08-20", "New documentation: Linux man pages" diff --git a/lib/docs/filters/duckdb/attribution.rb b/lib/docs/filters/duckdb/attribution.rb new file mode 100644 index 00000000..7591fdb8 --- /dev/null +++ b/lib/docs/filters/duckdb/attribution.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Docs + class Duckdb + class AttributionFilter < Docs::AttributionFilter + def attribution_link + url = current_url.to_s.sub! 'http://localhost:8000', 'https://duckdb.org' + %(#{url}) + end + end + end +end diff --git a/lib/docs/filters/duckdb/clean_html.rb b/lib/docs/filters/duckdb/clean_html.rb index ae518c7b..d739275e 100644 --- a/lib/docs/filters/duckdb/clean_html.rb +++ b/lib/docs/filters/duckdb/clean_html.rb @@ -3,27 +3,26 @@ module Docs class CleanHtmlFilter < Filter def call # First extract the main content - @doc = at_css('main') + @doc = at_css('#main_content_wrap', 'main') return doc if @doc.nil? + doc.prepend_child at_css('.title').remove + at_css('.title').name = 'h1' + # Remove navigation and header elements - css('.headerline', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove + css('.headerline', '.headlinebar', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove # Clean up code blocks - css('pre').each do |node| - # Detect language from class or parent div - if node['class']&.include?('sql') || node.at_css('code.sql') - node['data-language'] = 'sql' - elsif node['class']&.include?('language-sql') - node['data-language'] = 'sql' - end + css('div.highlighter-rouge').each do |node| + node['data-language'] = node['class'][/language-(\w+)/, 1] if node['class'] node.content = node.content.strip + node.name = 'pre' end - # Remove unnecessary attributes but keep essential ones + # Remove unnecessary attributes css('div, span, p').each do |node| node.remove_attribute('style') - node.remove_attribute('class') unless node['class'] =~ /highlight/ + node.remove_attribute('class') end # Remove empty elements diff --git a/lib/docs/filters/duckdb/entries.rb b/lib/docs/filters/duckdb/entries.rb index ea929022..cb98768a 100644 --- a/lib/docs/filters/duckdb/entries.rb +++ b/lib/docs/filters/duckdb/entries.rb @@ -2,7 +2,7 @@ module Docs class Duckdb class EntriesFilter < Docs::EntriesFilter def get_name - at_css('h1')&.content || 'DuckDB' + at_css('h1', '.title').content end def get_type diff --git a/lib/docs/scrapers/duckdb.rb b/lib/docs/scrapers/duckdb.rb index a160b3ef..98fb16ed 100644 --- a/lib/docs/scrapers/duckdb.rb +++ b/lib/docs/scrapers/duckdb.rb @@ -8,7 +8,13 @@ module Docs code: 'https://github.com/duckdb/duckdb' } + # https://duckdb.org/docs/guides/offline-copy.html + # curl -O https://duckdb.org/duckdb-docs.zip; bsdtar xf duckdb-docs.zip; cd duckdb-docs; python -m http.server + self.release = '1.1.3' + self.base_url = 'http://localhost:8000/docs/' + html_filters.push 'duckdb/entries', 'duckdb/clean_html' + text_filters.replace 'attribution', 'duckdb/attribution' options[:container] = '.documentation' @@ -29,41 +35,8 @@ module Docs Licensed under the MIT License. HTML - version '1.1' do - self.release = '1.1.x' - self.base_url = 'http://localhost:8000/docs/' - end - - # version '1.0' do - # self.release = '1.0.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - - # version '0.9' do - # self.release = '0.9.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - - # version '0.8' do - # self.release = '0.8.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - - # version '0.7' do - # self.release = '0.7.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - def get_latest_version(opts) - get_github_tags('duckdb', 'duckdb', opts) + get_github_tags('duckdb', 'duckdb', opts)[0]['name'] end end end From 1ee7402a63458e2f39625234e3de6165346651d6 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Wed, 27 Nov 2024 15:18:00 +0100 Subject: [PATCH 25/25] Update Vite documentation (6.0.1) --- lib/docs/scrapers/vite.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/docs/scrapers/vite.rb b/lib/docs/scrapers/vite.rb index e6ed5203..937d0916 100644 --- a/lib/docs/scrapers/vite.rb +++ b/lib/docs/scrapers/vite.rb @@ -22,10 +22,15 @@ module Docs html_filters.push 'vite/entries', 'vite/clean_html' version do - self.release = '5.4.11' + self.release = '6.0.1' self.base_url = 'https://vite.dev/' end + version '5' do + self.release = '5.4.11' + self.base_url = 'https://v5.vite.dev/' + end + version '4' do self.release = '4.5.5' self.base_url = 'https://v4.vite.dev/'