From deedda316149dbde221788c8142499cdb43933f2 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Sat, 23 Nov 2024 14:44:32 +0100 Subject: [PATCH] Update DuckDB documentation (1.1.3) --- assets/javascripts/news.json | 4 +++ lib/docs/filters/duckdb/attribution.rb | 12 ++++++++ lib/docs/filters/duckdb/clean_html.rb | 21 +++++++------ lib/docs/filters/duckdb/entries.rb | 2 +- lib/docs/scrapers/duckdb.rb | 41 +++++--------------------- 5 files changed, 34 insertions(+), 46 deletions(-) create mode 100644 lib/docs/filters/duckdb/attribution.rb diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json index e56bd8dd..052f4918 100644 --- a/assets/javascripts/news.json +++ b/assets/javascripts/news.json @@ -1,4 +1,8 @@ [ + [ + "2024-11-23", + "New documentation: DuckDB" + ], [ "2024-08-20", "New documentation: Linux man pages" diff --git a/lib/docs/filters/duckdb/attribution.rb b/lib/docs/filters/duckdb/attribution.rb new file mode 100644 index 00000000..7591fdb8 --- /dev/null +++ b/lib/docs/filters/duckdb/attribution.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Docs + class Duckdb + class AttributionFilter < Docs::AttributionFilter + def attribution_link + url = current_url.to_s.sub! 'http://localhost:8000', 'https://duckdb.org' + %(#{url}) + end + end + end +end diff --git a/lib/docs/filters/duckdb/clean_html.rb b/lib/docs/filters/duckdb/clean_html.rb index ae518c7b..d739275e 100644 --- a/lib/docs/filters/duckdb/clean_html.rb +++ b/lib/docs/filters/duckdb/clean_html.rb @@ -3,27 +3,26 @@ module Docs class CleanHtmlFilter < Filter def call # First extract the main content - @doc = at_css('main') + @doc = at_css('#main_content_wrap', 'main') return doc if @doc.nil? + doc.prepend_child at_css('.title').remove + at_css('.title').name = 'h1' + # Remove navigation and header elements - css('.headerline', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove + css('.headerline', '.headlinebar', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove # Clean up code blocks - css('pre').each do |node| - # Detect language from class or parent div - if node['class']&.include?('sql') || node.at_css('code.sql') - node['data-language'] = 'sql' - elsif node['class']&.include?('language-sql') - node['data-language'] = 'sql' - end + css('div.highlighter-rouge').each do |node| + node['data-language'] = node['class'][/language-(\w+)/, 1] if node['class'] node.content = node.content.strip + node.name = 'pre' end - # Remove unnecessary attributes but keep essential ones + # Remove unnecessary attributes css('div, span, p').each do |node| node.remove_attribute('style') - node.remove_attribute('class') unless node['class'] =~ /highlight/ + node.remove_attribute('class') end # Remove empty elements diff --git a/lib/docs/filters/duckdb/entries.rb b/lib/docs/filters/duckdb/entries.rb index ea929022..cb98768a 100644 --- a/lib/docs/filters/duckdb/entries.rb +++ b/lib/docs/filters/duckdb/entries.rb @@ -2,7 +2,7 @@ module Docs class Duckdb class EntriesFilter < Docs::EntriesFilter def get_name - at_css('h1')&.content || 'DuckDB' + at_css('h1', '.title').content end def get_type diff --git a/lib/docs/scrapers/duckdb.rb b/lib/docs/scrapers/duckdb.rb index a160b3ef..98fb16ed 100644 --- a/lib/docs/scrapers/duckdb.rb +++ b/lib/docs/scrapers/duckdb.rb @@ -8,7 +8,13 @@ module Docs code: 'https://github.com/duckdb/duckdb' } + # https://duckdb.org/docs/guides/offline-copy.html + # curl -O https://duckdb.org/duckdb-docs.zip; bsdtar xf duckdb-docs.zip; cd duckdb-docs; python -m http.server + self.release = '1.1.3' + self.base_url = 'http://localhost:8000/docs/' + html_filters.push 'duckdb/entries', 'duckdb/clean_html' + text_filters.replace 'attribution', 'duckdb/attribution' options[:container] = '.documentation' @@ -29,41 +35,8 @@ module Docs Licensed under the MIT License. HTML - version '1.1' do - self.release = '1.1.x' - self.base_url = 'http://localhost:8000/docs/' - end - - # version '1.0' do - # self.release = '1.0.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - - # version '0.9' do - # self.release = '0.9.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - - # version '0.8' do - # self.release = '0.8.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - - # version '0.7' do - # self.release = '0.7.x' - # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/" - - # html_filters.push 'duckdb/clean_html' - # end - def get_latest_version(opts) - get_github_tags('duckdb', 'duckdb', opts) + get_github_tags('duckdb', 'duckdb', opts)[0]['name'] end end end