diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json
index e56bd8dd..052f4918 100644
--- a/assets/javascripts/news.json
+++ b/assets/javascripts/news.json
@@ -1,4 +1,8 @@
[
+ [
+ "2024-11-23",
+ "New documentation: DuckDB"
+ ],
[
"2024-08-20",
"New documentation: Linux man pages"
diff --git a/lib/docs/filters/duckdb/attribution.rb b/lib/docs/filters/duckdb/attribution.rb
new file mode 100644
index 00000000..7591fdb8
--- /dev/null
+++ b/lib/docs/filters/duckdb/attribution.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Docs
+ class Duckdb
+ class AttributionFilter < Docs::AttributionFilter
+ def attribution_link
+ url = current_url.to_s.sub! 'http://localhost:8000', 'https://duckdb.org'
+ %(#{url})
+ end
+ end
+ end
+end
diff --git a/lib/docs/filters/duckdb/clean_html.rb b/lib/docs/filters/duckdb/clean_html.rb
new file mode 100644
index 00000000..d739275e
--- /dev/null
+++ b/lib/docs/filters/duckdb/clean_html.rb
@@ -0,0 +1,40 @@
+module Docs
+ class Duckdb
+ class CleanHtmlFilter < Filter
+ def call
+ # First extract the main content
+ @doc = at_css('#main_content_wrap', 'main')
+ return doc if @doc.nil?
+
+ doc.prepend_child at_css('.title').remove
+ at_css('.title').name = 'h1'
+
+ # Remove navigation and header elements
+ css('.headerline', '.headlinebar', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove
+
+ # Clean up code blocks
+ css('div.highlighter-rouge').each do |node|
+ node['data-language'] = node['class'][/language-(\w+)/, 1] if node['class']
+ node.content = node.content.strip
+ node.name = 'pre'
+ end
+
+ # Remove unnecessary attributes
+ css('div, span, p').each do |node|
+ node.remove_attribute('style')
+ node.remove_attribute('class')
+ end
+
+ # Remove empty elements
+ css('div, span').each do |node|
+ node.remove if node.content.strip.empty?
+ end
+
+ # Remove script tags
+ css('script').remove
+
+ doc
+ end
+ end
+ end
+end
\ No newline at end of file
diff --git a/lib/docs/filters/duckdb/entries.rb b/lib/docs/filters/duckdb/entries.rb
new file mode 100644
index 00000000..cb98768a
--- /dev/null
+++ b/lib/docs/filters/duckdb/entries.rb
@@ -0,0 +1,45 @@
+module Docs
+ class Duckdb
+ class EntriesFilter < Docs::EntriesFilter
+ def get_name
+ at_css('h1', '.title').content
+ end
+
+ def get_type
+ case subpath
+ when /\Asql\//
+ 'SQL Reference'
+ when /\Aapi\//
+ 'Client APIs'
+ when /\Aguides\//
+ 'How-to Guides'
+ when /\Adata\//
+ 'Data Import'
+ when /\Aoperations_manual\//
+ 'Operations Manual'
+ when /\Adev\//
+ 'Development'
+ when /\Ainternals\//
+ 'Internals'
+ when /\Aextensions\//
+ 'Extensions'
+ when /\Aarchive\//
+ 'Archive'
+ else
+ 'Documentation'
+ end
+ end
+
+ def additional_entries
+ entries = []
+ css('h2[id]', 'h3[id]').each do |node|
+ name = node.content.strip
+ # Clean up the name
+ name = name.gsub(/[\r\n\t]/, ' ').squeeze(' ')
+ entries << [name, node['id'], get_type]
+ end
+ entries
+ end
+ end
+ end
+end
\ No newline at end of file
diff --git a/lib/docs/scrapers/duckdb.rb b/lib/docs/scrapers/duckdb.rb
new file mode 100644
index 00000000..98fb16ed
--- /dev/null
+++ b/lib/docs/scrapers/duckdb.rb
@@ -0,0 +1,42 @@
+module Docs
+ class Duckdb < UrlScraper
+ self.name = 'DuckDB'
+ self.type = 'duckdb'
+ self.root_path = 'index.html'
+ self.links = {
+ home: 'https://duckdb.org/',
+ code: 'https://github.com/duckdb/duckdb'
+ }
+
+ # https://duckdb.org/docs/guides/offline-copy.html
+ # curl -O https://duckdb.org/duckdb-docs.zip; bsdtar xf duckdb-docs.zip; cd duckdb-docs; python -m http.server
+ self.release = '1.1.3'
+ self.base_url = 'http://localhost:8000/docs/'
+
+ html_filters.push 'duckdb/entries', 'duckdb/clean_html'
+ text_filters.replace 'attribution', 'duckdb/attribution'
+
+ options[:container] = '.documentation'
+
+ options[:skip_patterns] = [
+ /installation/,
+ /archive/,
+ /reference/,
+ ]
+
+ options[:skip] = %w(
+ docs/archive/
+ docs/installation/
+ docs/api/
+ )
+
+ options[:attribution] = <<-HTML
+ © Copyright 2018–2024 Stichting DuckDB Foundation
+ Licensed under the MIT License.
+ HTML
+
+ def get_latest_version(opts)
+ get_github_tags('duckdb', 'duckdb', opts)[0]['name']
+ end
+ end
+end
diff --git a/public/icons/docs/duckdb/16.png b/public/icons/docs/duckdb/16.png
new file mode 100644
index 00000000..855df72e
Binary files /dev/null and b/public/icons/docs/duckdb/16.png differ
diff --git a/public/icons/docs/duckdb/16@2x.png b/public/icons/docs/duckdb/16@2x.png
new file mode 100644
index 00000000..f128c8ca
Binary files /dev/null and b/public/icons/docs/duckdb/16@2x.png differ
diff --git a/public/icons/docs/duckdb/SOURCE b/public/icons/docs/duckdb/SOURCE
new file mode 100644
index 00000000..286d1738
--- /dev/null
+++ b/public/icons/docs/duckdb/SOURCE
@@ -0,0 +1 @@
+https://github.com/duckdb/duckdb/tree/main/logo
\ No newline at end of file