From 2216cb46cf6ba6dbf1ab6d104ca38e011be4e3ca Mon Sep 17 00:00:00 2001 From: Rui Jiang Date: Wed, 20 Nov 2024 17:26:35 -0600 Subject: [PATCH 1/2] updated yarn version (3.1.1 > 4.5.1) --- lib/docs/filters/yarn/clean_html_berry.rb | 41 ++++------------------- lib/docs/filters/yarn/entries_berry.rb | 19 ++--------- lib/docs/scrapers/yarn.rb | 17 +++++++--- 3 files changed, 22 insertions(+), 55 deletions(-) diff --git a/lib/docs/filters/yarn/clean_html_berry.rb b/lib/docs/filters/yarn/clean_html_berry.rb index 96b3ee53..8a28ce25 100644 --- a/lib/docs/filters/yarn/clean_html_berry.rb +++ b/lib/docs/filters/yarn/clean_html_berry.rb @@ -2,45 +2,18 @@ module Docs class Yarn class CleanHtmlBerryFilter < Filter def call - if slug.empty? - @doc = at_css('main') - css( - (['div:first-child'] * 3).join('>'), # Tagline - 'img', - 'hr', # Footer - 'hr + div', # Footer - ).remove - - css('a').each do |link| - link.name = 'div' - link.css('h3').each do |node| - node.replace("

#{node.content}

") - end - end - - return doc - end - - @doc = at_css('article') - # Heading & edit link - css('h1', 'h1 + a').remove unless slug.start_with?('configuration') - - if slug.start_with?('cli') - css('.header-code').each do |node| - node.name = 'span' - end - end - - if slug.start_with?('configuration') - css('h1', 'h2').each do |node| - node.name = node.name.sub(/\d/) { |i| i.to_i + 1 } - end - end + @doc = at_css('main .container div.theme-doc-markdown.markdown') css('*').each do |node| node.remove_attribute('style') end + css('pre').each do |node| + lang = node['class'][/language-(\w+)/, 1] + node['data-language'] = lang if lang + node.content = node.css('.token-line').map(&:content).join("\n") + end + doc end end diff --git a/lib/docs/filters/yarn/entries_berry.rb b/lib/docs/filters/yarn/entries_berry.rb index 44c1e18e..6b99bfa6 100644 --- a/lib/docs/filters/yarn/entries_berry.rb +++ b/lib/docs/filters/yarn/entries_berry.rb @@ -2,26 +2,11 @@ module Docs class Yarn class EntriesBerryFilter < Docs::EntriesFilter def get_name - if slug.start_with?('configuration') - filename = at_css('main .active code') - content = filename.content - return filename.parent.content.sub content, " (#{content})" - end - - name = at_css('h1').content - - if slug.start_with?('getting-started') - active_link = at_css('main .active') - links = active_link.parent.children.to_a - name.prepend "#{links.index(active_link) + 1}. " - end - - name + at_css('main header h1').content end def get_type - return 'CLI' if slug.start_with?('sdks', 'pnpify') - at_css('header .active').content + at_css('nav.navbar a.navbar__item.navbar__link.navbar__link--active').content end end end diff --git a/lib/docs/scrapers/yarn.rb b/lib/docs/scrapers/yarn.rb index 8cc49260..9d20bc8c 100644 --- a/lib/docs/scrapers/yarn.rb +++ b/lib/docs/scrapers/yarn.rb @@ -13,15 +13,16 @@ module Docs HTML version 'Berry' do - self.release = '3.1.1' + self.release = '4.5.1' self.base_url = 'https://yarnpkg.com/' self.links = { home: 'https://yarnpkg.com/', code: 'https://github.com/yarnpkg/berry' } - html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry', 'title' - options[:skip] = ['features', 'cli', 'configuration', 'advanced'] - options[:skip_patterns] = [/\Aapi/, /\Apackage/] + self.root_path = 'getting-started' + html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry' + options[:skip] = ['cli', 'cli/builder', 'cli/pnpify', 'cli/sdks', 'protocols'] + options[:skip_patterns] = [/\Aapi/, /\Ablog/, /\Apackage/, /\Aassets/] end version 'Classic' do @@ -38,5 +39,13 @@ module Docs def get_latest_version(opts) get_latest_github_release('yarnpkg', 'berry', opts)[/[\d.]+/] end + + private + + # Some pages contain null bytes and cause the parser to fail + def parse(response) + response.body.gsub!(/[\x00\u0000\0]/, '') + super + end end end From d45d1435eb6c2dbdab32474df19fb9432cc92810 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Thu, 21 Nov 2024 19:26:35 +0100 Subject: [PATCH 2/2] Update Yarn documentation (4.5.1) --- lib/docs/scrapers/yarn.rb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/docs/scrapers/yarn.rb b/lib/docs/scrapers/yarn.rb index 9d20bc8c..6539c7d5 100644 --- a/lib/docs/scrapers/yarn.rb +++ b/lib/docs/scrapers/yarn.rb @@ -12,7 +12,7 @@ module Docs Licensed under the BSD License. HTML - version 'Berry' do + version do self.release = '4.5.1' self.base_url = 'https://yarnpkg.com/' self.links = { @@ -25,6 +25,18 @@ module Docs options[:skip_patterns] = [/\Aapi/, /\Ablog/, /\Apackage/, /\Aassets/] end + version '3' do + self.release = '3.1.1' + self.base_url = 'https://v3.yarnpkg.com/' + self.links = { + home: 'https://v3.yarnpkg.com/', + code: 'https://github.com/yarnpkg/berry' + } + self.root_path = 'getting-started' + html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry', 'title' + options[:skip] = ['features', 'cli', 'configuration', 'advanced'] + options[:skip_patterns] = [/\Aapi/, /\Apackage/] end + version 'Classic' do self.release = '1.22.17' self.base_url = 'https://classic.yarnpkg.com/en/docs/'