cleanup the code

pull/2392/head
Chaitanya Rahalkar 1 month ago
parent b7393f4d55
commit b218836a82

@ -1,243 +1,238 @@
module Docs module Docs
class Threejs class Threejs
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
PATTERNS = {
method_this: /\[method:this\s+([^\]]+)\]\s*\((.*?)\)/,
method_return: /\[method:([^\s\]]+)\s+([^\]]+)\]\s*\((.*?)\)/,
method_no_params: /\[method:([^\s\]]+)\s+([^\]]+)\](?!\()/,
property: /\[property:([^\]]+?)\s+([^\]]+?)\]/,
example_link: /\[example:([^\s\]]+)\s+([^\]]+)\]/,
external_link_text: /\[link:([^\s\]]+)\s+([^\]]+)\]/,
external_link: /\[link:([^\]]+)\]/,
page_link_text: /\[page:([^\]]+?)\s+([^\]]+?)\]/,
page_link: /\[page:([^\]]+?)\]/,
inline_code: /`([^`]+)`/,
name_placeholder: /\[name\]/,
constructor_param: /\[param:([^\]]+?)\s+([^\]]+?)\]/
}.freeze
def call def call
# Remove unnecessary elements remove_unnecessary_elements
wrap_code_blocks
process_sections
format_links
add_section_structure
format_notes
add_heading_attributes
doc
end
private
def remove_unnecessary_elements
css('head, script, style').remove css('head, script, style').remove
end
# Wrap code blocks with pre tags and add syntax highlighting
def wrap_code_blocks
css('code').each do |node| css('code').each do |node|
unless node.parent.name == 'pre' next if node.parent.name == 'pre'
pre = node.wrap('<pre>') pre = node.wrap('<pre>')
pre['data-language'] = 'javascript' pre['data-language'] = pre['class'] = 'language-javascript'
pre['class'] = 'language-javascript'
end
end end
end
def process_sections
# Handle source links # Handle source links
css('h2').each do |node| css('h2').each do |node|
if node.content.strip == 'Source' next unless node.content.strip == 'Source'
content = node.next_element&.inner_html handle_source_link(node)
if content
# Clean up any existing formatting
content = content.gsub(/<[^>]+>/, '')
# Extract the path from the content
if content =~ /src\/(.*?)\.js/
path = "/#{$1}.js"
formatted_link = %Q(<a class="reference external" href="https://github.com/mrdoob/three.js/blob/master/src#{path}">src#{path}</a>)
node.next_element.inner_html = formatted_link if node.next_element
end
end
end
end end
# Handle method signatures # Handle method signatures and properties
css('h3').each do |node| css('h3').each do |node|
content = node.inner_html content = node.inner_html
content = handle_method_signatures(content)
# Handle [method:this methodName]( param1, param2, ... ) format content = handle_properties(content)
content = content.gsub(/\[method:this\s+([^\]]+)\]\s*\((.*?)\)/) do |match|
method_name, params_str = $1, $2
# Format parameters
params = params_str.split(',').map do |param|
param = param.strip
if param.include?(' ')
type, name = param.split(' ', 2).map(&:strip)
"<span class='sig-param'><span class='sig-type'>#{type}</span> <span class='sig-name'>#{name}</span></span>"
else
"<span class='sig-param'>#{param}</span>"
end
end.join("<span class='sig-paren'>, </span>")
"<dt class='sig sig-object js' id='#{method_name}'>" \
"<span class='property'><span class='pre'>this</span></span>." \
"<span class='sig-name descname'>#{method_name}</span>" \
"<span class='sig-paren'>(</span>" \
"#{params}" \
"<span class='sig-paren'>)</span></dt>"
end
# Handle [method:returnType methodName]( param1, param2, ... ) format
content = content.gsub(/\[method:([^\s\]]+)\s+([^\]]+)\]\s*\((.*?)\)/) do |match|
return_type, method_name, params_str = $1, $2, $3
next if method_name.start_with?('this') # Skip if already handled above
# Format parameters
params = params_str.split(',').map do |param|
param = param.strip
if param.include?(' ')
type, name = param.split(' ', 2).map(&:strip)
"<span class='sig-param'><span class='sig-type'>#{type}</span> <span class='sig-name'>#{name}</span></span>"
else
"<span class='sig-param'>#{param}</span>"
end
end.join("<span class='sig-paren'>, </span>")
"<dt class='sig sig-object js' id='#{method_name}'>" \
"<span class='sig-name descname'>#{method_name}</span>" \
"<span class='sig-paren'>(</span>" \
"#{params}" \
"<span class='sig-paren'>)</span>" \
"<span class='sig-returns'><span class='sig-colon'>:</span> " \
"<span class='sig-type'>#{return_type}</span></span></dt>"
end
# Handle [method:returnType methodName] format (no parameters)
content = content.gsub(/\[method:([^\s\]]+)\s+([^\]]+)\](?!\()/) do |match|
return_type, method_name = $1, $2
"<dt class='sig sig-object js' id='#{method_name}'>" \
"<span class='sig-name descname'>#{method_name}</span>" \
"<span class='sig-paren'>(</span>" \
"<span class='sig-paren'>)</span>" \
"<span class='sig-returns'><span class='sig-colon'>:</span> " \
"<span class='sig-type'>#{return_type}</span></span></dt>"
end
node.inner_html = content node.inner_html = content
end end
# Handle [name] placeholders in headers and constructor # Handle name placeholders and constructor params
css('h1, h3').each do |node| css('h1, h3').each do |node|
content = node.inner_html content = node.inner_html
content = handle_name_placeholders(content)
# Replace [name] with class name content = format_constructor_params(content)
content = content.gsub(/\[name\]/) do
name = slug.split('/').last.gsub('.html', '')
"<span class='descname'>#{name}</span>"
end
# Format constructor parameters
content = content.gsub(/\[param:([^\]]+?)\s+([^\]]+?)\]/) do |match|
type, name = $1, $2
"<span class='sig-param'><span class='sig-type'>#{type}</span> <code class='sig-name'>#{name}</code></span>"
end
node.inner_html = content node.inner_html = content
end end
end
# Clean up property formatting def handle_source_link(node)
css('h3').each do |node| content = node.next_element&.inner_html
node.inner_html = node.inner_html.gsub(/\[property:([^\]]+?)\s+([^\]]+?)\]/) do |match| return unless content
type, name = $1, $2 content = content.gsub(/<[^>]+>/, '')
"<dt class='sig sig-object js'>" \ if content =~ /src\/(.*?)\.js/
"<span class='sig-name descname'>#{name}</span>" \ path = "/#{$1}.js"
"<span class='sig-colon'>:</span> " \ formatted_link = %Q(<a class="reference external" href="https://github.com/mrdoob/three.js/blob/master/src#{path}">src#{path}</a>)
"<span class='sig-type'>#{type}</span></dt>" node.next_element.inner_html = formatted_link if node.next_element
end
end
def handle_method_signatures(content)
content
.gsub(PATTERNS[:method_this]) { format_method_signature('this', $1, $2) }
.gsub(PATTERNS[:method_return]) do |match|
next if $2.start_with?('this')
format_method_signature($1, $2, $3, true)
end end
.gsub(PATTERNS[:method_no_params]) { format_method_signature($1, $2, nil, true) }
end
def format_method_signature(type_or_this, name, params_str, with_return = false)
params = if params_str
params_str.split(',').map { |param| format_parameter(param.strip) }.join("<span class='sig-paren'>, </span>")
end end
# Clean up external links html = "<dt class='sig sig-object js' id='#{name}'>"
if type_or_this == 'this'
html << "<span class='property'><span class='pre'>this</span></span>."
end
html << "<span class='sig-name descname'>#{name}</span>" \
"<span class='sig-paren'>(</span>" \
"#{params}" \
"<span class='sig-paren'>)</span>"
if with_return
html << "<span class='sig-returns'><span class='sig-colon'>:</span> " \
"<span class='sig-type'>#{type_or_this}</span></span>"
end
html << "</dt>"
end
def format_parameter(param)
if param.include?(' ')
type, name = param.split(' ', 2).map(&:strip)
"<span class='sig-param'><span class='sig-type'>#{type}</span> <span class='sig-name'>#{name}</span></span>"
else
"<span class='sig-param'>#{param}</span>"
end
end
def handle_properties(content)
content.gsub(PATTERNS[:property]) do |match|
type, name = $1, $2
"<dt class='sig sig-object js'>" \
"<span class='sig-name descname'>#{name}</span>" \
"<span class='sig-colon'>:</span> " \
"<span class='sig-type'>#{type}</span></dt>"
end
end
def handle_name_placeholders(content)
content.gsub(PATTERNS[:name_placeholder]) do
name = slug.split('/').last.gsub('.html', '')
"<span class='descname'>#{name}</span>"
end
end
def format_constructor_params(content)
content.gsub(PATTERNS[:constructor_param]) do |match|
type, name = $1, $2
"<span class='sig-param'><span class='sig-type'>#{type}</span> <code class='sig-name'>#{name}</code></span>"
end
end
def format_links
css('*').each do |node| css('*').each do |node|
next if node.text? next if node.text?
# Handle example links [example:tag Title] content = node.inner_html
node.inner_html = node.inner_html.gsub(/\[example:([^\s\]]+)\s+([^\]]+)\]/) do |match| .gsub(PATTERNS[:example_link]) { create_external_link("https://threejs.org/examples/##{$1}", $2) }
tag, title = $1, $2 .gsub(PATTERNS[:external_link_text]) { create_external_link($1, $2) }
"<a class='reference external' href='https://threejs.org/examples/##{tag}'>#{title}</a>" .gsub(PATTERNS[:external_link]) { create_external_link($1, $1) }
end .gsub(PATTERNS[:page_link_text]) { create_internal_link($1, $2) }
.gsub(PATTERNS[:page_link]) { create_internal_link($1, $1) }
# Handle external links with [link:url text] format
node.inner_html = node.inner_html.gsub(/\[link:([^\s\]]+)\s+([^\]]+)\]/) do |match| node.inner_html = content
url, text = $1, $2 end
"<a class='reference external' href='#{url}'>#{text}</a>"
end
# Handle external links with [link:url] format normalize_href_attributes
node.inner_html = node.inner_html.gsub(/\[link:([^\]]+)\]/) do |match| end
url = $1
"<a class='reference external' href='#{url}'>#{url}</a>"
end
# Handle internal page links with text def create_external_link(url, text)
node.inner_html = node.inner_html.gsub(/\[page:([^\]]+?)\s+([^\]]+?)\]/) do %Q(<a class='reference external' href='#{url}'>#{text}</a>)
path, text = $1, $2 end
"<a class='reference internal' href='#{path.downcase}'><code class='xref js js-#{path.downcase}'>#{text}</code></a>"
end
# Handle internal page links without text def create_internal_link(path, text)
node.inner_html = node.inner_html.gsub(/\[page:([^\]]+?)\]/) do |match| %Q(<a class='reference internal' href='#{path.downcase}'><code class='xref js js-#{path.downcase}'>#{text}</code></a>)
path = $1 end
"<a class='reference internal' href='#{path.downcase}'><code class='xref js js-#{path.downcase}'>#{path}</code></a>"
end
end
# Fix all href attributes to be lowercase and remove .html def normalize_href_attributes
css('a[href]').each do |link| css('a[href]').each do |link|
next if link['href'].start_with?('http') next if link['href'].start_with?('http')
link['href'] = link['href'].remove('../').downcase.sub(/\.html$/, '') link['href'] = link['href'].remove('../').downcase.sub(/\.html$/, '')
link['class'] = 'reference internal' link['class'] = 'reference internal'
end end
end
# Add section classes def add_section_structure
css('h2').each do |node| css('h2').each do |node|
node['class'] = 'section-title' node['class'] = 'section-title'
section = node.next_element section = node.next_element
if section next unless section
wrapper = doc.document.create_element('div')
wrapper['class'] = 'section' wrapper = doc.document.create_element('div')
node.after(wrapper) wrapper['class'] = 'section'
wrapper.add_child(node) node.after(wrapper)
current = section wrapper.add_child(node)
while current && current.name != 'h2'
next_el = current.next current = section
wrapper.add_child(current) while current && current.name != 'h2'
current = next_el next_el = current.next
end wrapper.add_child(current)
current = next_el
end end
end end
# Format description paragraphs css('p.desc').each { |node| node['class'] = 'section-desc' }
css('p.desc').each do |node| end
node['class'] = 'section-desc'
end
# Handle inline code/backticks in text def format_notes
css('p, li, dt, dd').each do |node| css('p').each do |node|
next if node.at_css('pre') # Skip if contains a code block next unless node.content.start_with?('Note:')
# Replace backticks with proper code formatting wrapper = doc.document.create_element('div')
node.inner_html = node.inner_html.gsub(/`([^`]+)`/) do |match| wrapper['class'] = 'admonition note'
code = $1
"<code class='docutils literal notranslate'><span class='pre'>#{code}</span></code>" title = doc.document.create_element('p')
end title['class'] = 'first admonition-title'
title.content = 'Note'
content = doc.document.create_element('p')
content['class'] = 'last'
content.inner_html = node.inner_html.sub('Note:', '').strip
wrapper.add_child(title)
wrapper.add_child(content)
node.replace(wrapper)
end end
end
# Handle inline code in property descriptions def add_heading_attributes
css('.property-type').each do |node|
node.inner_html = node.inner_html.gsub(/`([^`]+)`/) do |match|
code = $1
"<code class='docutils literal notranslate'><span class='pre'>#{code}</span></code>"
end
end
# Add proper heading IDs and classes
css('h1, h2, h3, h4').each do |node| css('h1, h2, h3, h4').each do |node|
node['id'] ||= node.content.strip.downcase.gsub(/[^\w]+/, '-') node['id'] ||= node.content.strip.downcase.gsub(/[^\w]+/, '-')
existing_class = node['class'].to_s existing_class = node['class'].to_s
node['class'] = "#{existing_class} section-header" node['class'] = "#{existing_class} section-header"
end end
# Add note styling format_inline_code
css('p').each do |node| end
if node.content.start_with?('Note:')
wrapper = doc.document.create_element('div') def format_inline_code
wrapper['class'] = 'admonition note' selectors = ['p', 'li', 'dt', 'dd', '.property-type'].join(', ')
css(selectors).each do |node|
title = doc.document.create_element('p') next if node.at_css('pre')
title['class'] = 'first admonition-title' node.inner_html = node.inner_html.gsub(PATTERNS[:inline_code]) do |match|
title.content = 'Note' "<code class='docutils literal notranslate'><span class='pre'>#{$1}</span></code>"
content = doc.document.create_element('p')
content['class'] = 'last'
content.inner_html = node.inner_html.sub('Note:', '').strip
wrapper.add_child(title)
wrapper.add_child(content)
node.replace(wrapper)
end end
end end
doc
end end
end end
end end

Loading…
Cancel
Save