diff --git a/lib/docs/filters/yarn/clean_html_berry.rb b/lib/docs/filters/yarn/clean_html_berry.rb
index 96b3ee53..8a28ce25 100644
--- a/lib/docs/filters/yarn/clean_html_berry.rb
+++ b/lib/docs/filters/yarn/clean_html_berry.rb
@@ -2,45 +2,18 @@ module Docs
class Yarn
class CleanHtmlBerryFilter < Filter
def call
- if slug.empty?
- @doc = at_css('main')
- css(
- (['div:first-child'] * 3).join('>'), # Tagline
- 'img',
- 'hr', # Footer
- 'hr + div', # Footer
- ).remove
-
- css('a').each do |link|
- link.name = 'div'
- link.css('h3').each do |node|
- node.replace("
")
- end
- end
-
- return doc
- end
-
- @doc = at_css('article')
- # Heading & edit link
- css('h1', 'h1 + a').remove unless slug.start_with?('configuration')
-
- if slug.start_with?('cli')
- css('.header-code').each do |node|
- node.name = 'span'
- end
- end
-
- if slug.start_with?('configuration')
- css('h1', 'h2').each do |node|
- node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
- end
- end
+ @doc = at_css('main .container div.theme-doc-markdown.markdown')
css('*').each do |node|
node.remove_attribute('style')
end
+ css('pre').each do |node|
+ lang = node['class'][/language-(\w+)/, 1]
+ node['data-language'] = lang if lang
+ node.content = node.css('.token-line').map(&:content).join("\n")
+ end
+
doc
end
end
diff --git a/lib/docs/filters/yarn/entries_berry.rb b/lib/docs/filters/yarn/entries_berry.rb
index 44c1e18e..6b99bfa6 100644
--- a/lib/docs/filters/yarn/entries_berry.rb
+++ b/lib/docs/filters/yarn/entries_berry.rb
@@ -2,26 +2,11 @@ module Docs
class Yarn
class EntriesBerryFilter < Docs::EntriesFilter
def get_name
- if slug.start_with?('configuration')
- filename = at_css('main .active code')
- content = filename.content
- return filename.parent.content.sub content, " (#{content})"
- end
-
- name = at_css('h1').content
-
- if slug.start_with?('getting-started')
- active_link = at_css('main .active')
- links = active_link.parent.children.to_a
- name.prepend "#{links.index(active_link) + 1}. "
- end
-
- name
+ at_css('main header h1').content
end
def get_type
- return 'CLI' if slug.start_with?('sdks', 'pnpify')
- at_css('header .active').content
+ at_css('nav.navbar a.navbar__item.navbar__link.navbar__link--active').content
end
end
end
diff --git a/lib/docs/scrapers/yarn.rb b/lib/docs/scrapers/yarn.rb
index 8cc49260..9d20bc8c 100644
--- a/lib/docs/scrapers/yarn.rb
+++ b/lib/docs/scrapers/yarn.rb
@@ -13,15 +13,16 @@ module Docs
HTML
version 'Berry' do
- self.release = '3.1.1'
+ self.release = '4.5.1'
self.base_url = 'https://yarnpkg.com/'
self.links = {
home: 'https://yarnpkg.com/',
code: 'https://github.com/yarnpkg/berry'
}
- html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry', 'title'
- options[:skip] = ['features', 'cli', 'configuration', 'advanced']
- options[:skip_patterns] = [/\Aapi/, /\Apackage/]
+ self.root_path = 'getting-started'
+ html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry'
+ options[:skip] = ['cli', 'cli/builder', 'cli/pnpify', 'cli/sdks', 'protocols']
+ options[:skip_patterns] = [/\Aapi/, /\Ablog/, /\Apackage/, /\Aassets/]
end
version 'Classic' do
@@ -38,5 +39,13 @@ module Docs
def get_latest_version(opts)
get_latest_github_release('yarnpkg', 'berry', opts)[/[\d.]+/]
end
+
+ private
+
+ # Some pages contain null bytes and cause the parser to fail
+ def parse(response)
+ response.body.gsub!(/[\x00\u0000\0]/, '')
+ super
+ end
end
end