diff --git a/assets/javascripts/news.json b/assets/javascripts/news.json
index e56bd8dd..052f4918 100644
--- a/assets/javascripts/news.json
+++ b/assets/javascripts/news.json
@@ -1,4 +1,8 @@
[
+ [
+ "2024-11-23",
+ "New documentation: DuckDB"
+ ],
[
"2024-08-20",
"New documentation: Linux man pages"
diff --git a/lib/docs/filters/duckdb/attribution.rb b/lib/docs/filters/duckdb/attribution.rb
new file mode 100644
index 00000000..7591fdb8
--- /dev/null
+++ b/lib/docs/filters/duckdb/attribution.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Docs
+ class Duckdb
+ class AttributionFilter < Docs::AttributionFilter
+ def attribution_link
+ url = current_url.to_s.sub! 'http://localhost:8000', 'https://duckdb.org'
+ %(#{url})
+ end
+ end
+ end
+end
diff --git a/lib/docs/filters/duckdb/clean_html.rb b/lib/docs/filters/duckdb/clean_html.rb
index ae518c7b..d739275e 100644
--- a/lib/docs/filters/duckdb/clean_html.rb
+++ b/lib/docs/filters/duckdb/clean_html.rb
@@ -3,27 +3,26 @@ module Docs
class CleanHtmlFilter < Filter
def call
# First extract the main content
- @doc = at_css('main')
+ @doc = at_css('#main_content_wrap', 'main')
return doc if @doc.nil?
+ doc.prepend_child at_css('.title').remove
+ at_css('.title').name = 'h1'
+
# Remove navigation and header elements
- css('.headerline', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove
+ css('.headerline', '.headlinebar', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove
# Clean up code blocks
- css('pre').each do |node|
- # Detect language from class or parent div
- if node['class']&.include?('sql') || node.at_css('code.sql')
- node['data-language'] = 'sql'
- elsif node['class']&.include?('language-sql')
- node['data-language'] = 'sql'
- end
+ css('div.highlighter-rouge').each do |node|
+ node['data-language'] = node['class'][/language-(\w+)/, 1] if node['class']
node.content = node.content.strip
+ node.name = 'pre'
end
- # Remove unnecessary attributes but keep essential ones
+ # Remove unnecessary attributes
css('div, span, p').each do |node|
node.remove_attribute('style')
- node.remove_attribute('class') unless node['class'] =~ /highlight/
+ node.remove_attribute('class')
end
# Remove empty elements
diff --git a/lib/docs/filters/duckdb/entries.rb b/lib/docs/filters/duckdb/entries.rb
index ea929022..cb98768a 100644
--- a/lib/docs/filters/duckdb/entries.rb
+++ b/lib/docs/filters/duckdb/entries.rb
@@ -2,7 +2,7 @@ module Docs
class Duckdb
class EntriesFilter < Docs::EntriesFilter
def get_name
- at_css('h1')&.content || 'DuckDB'
+ at_css('h1', '.title').content
end
def get_type
diff --git a/lib/docs/scrapers/duckdb.rb b/lib/docs/scrapers/duckdb.rb
index a160b3ef..98fb16ed 100644
--- a/lib/docs/scrapers/duckdb.rb
+++ b/lib/docs/scrapers/duckdb.rb
@@ -8,7 +8,13 @@ module Docs
code: 'https://github.com/duckdb/duckdb'
}
+ # https://duckdb.org/docs/guides/offline-copy.html
+ # curl -O https://duckdb.org/duckdb-docs.zip; bsdtar xf duckdb-docs.zip; cd duckdb-docs; python -m http.server
+ self.release = '1.1.3'
+ self.base_url = 'http://localhost:8000/docs/'
+
html_filters.push 'duckdb/entries', 'duckdb/clean_html'
+ text_filters.replace 'attribution', 'duckdb/attribution'
options[:container] = '.documentation'
@@ -29,41 +35,8 @@ module Docs
Licensed under the MIT License.
HTML
- version '1.1' do
- self.release = '1.1.x'
- self.base_url = 'http://localhost:8000/docs/'
- end
-
- # version '1.0' do
- # self.release = '1.0.x'
- # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
- # html_filters.push 'duckdb/clean_html'
- # end
-
- # version '0.9' do
- # self.release = '0.9.x'
- # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
- # html_filters.push 'duckdb/clean_html'
- # end
-
- # version '0.8' do
- # self.release = '0.8.x'
- # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
- # html_filters.push 'duckdb/clean_html'
- # end
-
- # version '0.7' do
- # self.release = '0.7.x'
- # self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
- # html_filters.push 'duckdb/clean_html'
- # end
-
def get_latest_version(opts)
- get_github_tags('duckdb', 'duckdb', opts)
+ get_github_tags('duckdb', 'duckdb', opts)[0]['name']
end
end
end