diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json index e56bd8dd..052f4918 100644 --- a/assets/javascripts/news.json +++ b/assets/javascripts/news.json @@ -1,4 +1,8 @@ [ + [ + "2024-11-23", + "New documentation: DuckDB" + ], [ "2024-08-20", "New documentation: Linux man pages" diff --git a/lib/docs/filters/duckdb/attribution.rb b/lib/docs/filters/duckdb/attribution.rb new file mode 100644 index 00000000..7591fdb8 --- /dev/null +++ b/lib/docs/filters/duckdb/attribution.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Docs + class Duckdb + class AttributionFilter < Docs::AttributionFilter + def attribution_link + url = current_url.to_s.sub! 'http://localhost:8000', 'https://duckdb.org' + %(#{url}) + end + end + end +end diff --git a/lib/docs/filters/duckdb/clean_html.rb b/lib/docs/filters/duckdb/clean_html.rb new file mode 100644 index 00000000..d739275e --- /dev/null +++ b/lib/docs/filters/duckdb/clean_html.rb @@ -0,0 +1,40 @@ +module Docs + class Duckdb + class CleanHtmlFilter < Filter + def call + # First extract the main content + @doc = at_css('#main_content_wrap', 'main') + return doc if @doc.nil? + + doc.prepend_child at_css('.title').remove + at_css('.title').name = 'h1' + + # Remove navigation and header elements + css('.headerline', '.headlinebar', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove + + # Clean up code blocks + css('div.highlighter-rouge').each do |node| + node['data-language'] = node['class'][/language-(\w+)/, 1] if node['class'] + node.content = node.content.strip + node.name = 'pre' + end + + # Remove unnecessary attributes + css('div, span, p').each do |node| + node.remove_attribute('style') + node.remove_attribute('class') + end + + # Remove empty elements + css('div, span').each do |node| + node.remove if node.content.strip.empty? + end + + # Remove script tags + css('script').remove + + doc + end + end + end +end \ No newline at end of file diff --git a/lib/docs/filters/duckdb/entries.rb b/lib/docs/filters/duckdb/entries.rb new file mode 100644 index 00000000..cb98768a --- /dev/null +++ b/lib/docs/filters/duckdb/entries.rb @@ -0,0 +1,45 @@ +module Docs + class Duckdb + class EntriesFilter < Docs::EntriesFilter + def get_name + at_css('h1', '.title').content + end + + def get_type + case subpath + when /\Asql\// + 'SQL Reference' + when /\Aapi\// + 'Client APIs' + when /\Aguides\// + 'How-to Guides' + when /\Adata\// + 'Data Import' + when /\Aoperations_manual\// + 'Operations Manual' + when /\Adev\// + 'Development' + when /\Ainternals\// + 'Internals' + when /\Aextensions\// + 'Extensions' + when /\Aarchive\// + 'Archive' + else + 'Documentation' + end + end + + def additional_entries + entries = [] + css('h2[id]', 'h3[id]').each do |node| + name = node.content.strip + # Clean up the name + name = name.gsub(/[\r\n\t]/, ' ').squeeze(' ') + entries << [name, node['id'], get_type] + end + entries + end + end + end +end \ No newline at end of file diff --git a/lib/docs/scrapers/duckdb.rb b/lib/docs/scrapers/duckdb.rb new file mode 100644 index 00000000..98fb16ed --- /dev/null +++ b/lib/docs/scrapers/duckdb.rb @@ -0,0 +1,42 @@ +module Docs + class Duckdb < UrlScraper + self.name = 'DuckDB' + self.type = 'duckdb' + self.root_path = 'index.html' + self.links = { + home: 'https://duckdb.org/', + code: 'https://github.com/duckdb/duckdb' + } + + # https://duckdb.org/docs/guides/offline-copy.html + # curl -O https://duckdb.org/duckdb-docs.zip; bsdtar xf duckdb-docs.zip; cd duckdb-docs; python -m http.server + self.release = '1.1.3' + self.base_url = 'http://localhost:8000/docs/' + + html_filters.push 'duckdb/entries', 'duckdb/clean_html' + text_filters.replace 'attribution', 'duckdb/attribution' + + options[:container] = '.documentation' + + options[:skip_patterns] = [ + /installation/, + /archive/, + /reference/, + ] + + options[:skip] = %w( + docs/archive/ + docs/installation/ + docs/api/ + ) + + options[:attribution] = <<-HTML + © Copyright 2018–2024 Stichting DuckDB Foundation
+ Licensed under the MIT License. + HTML + + def get_latest_version(opts) + get_github_tags('duckdb', 'duckdb', opts)[0]['name'] + end + end +end diff --git a/public/icons/docs/duckdb/16.png b/public/icons/docs/duckdb/16.png new file mode 100644 index 00000000..855df72e Binary files /dev/null and b/public/icons/docs/duckdb/16.png differ diff --git a/public/icons/docs/duckdb/16@2x.png b/public/icons/docs/duckdb/16@2x.png new file mode 100644 index 00000000..f128c8ca Binary files /dev/null and b/public/icons/docs/duckdb/16@2x.png differ diff --git a/public/icons/docs/duckdb/SOURCE b/public/icons/docs/duckdb/SOURCE new file mode 100644 index 00000000..286d1738 --- /dev/null +++ b/public/icons/docs/duckdb/SOURCE @@ -0,0 +1 @@ +https://github.com/duckdb/duckdb/tree/main/logo \ No newline at end of file