From 75cce72b3f51307ae2f7ae642e06b333a039c1de Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 13:11:35 +0100 Subject: [PATCH] basic scraping for Reference --- lib/docs/filters/react/clean_html_react_dev.rb | 11 +++++++++++ lib/docs/filters/react/entries_react_dev.rb | 13 +++++++++++++ lib/docs/scrapers/react.rb | 14 +++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 lib/docs/filters/react/clean_html_react_dev.rb create mode 100644 lib/docs/filters/react/entries_react_dev.rb diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb new file mode 100644 index 00000000..6e1e9f5c --- /dev/null +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -0,0 +1,11 @@ +module Docs + class React + class CleanHtmlReactDevFilter < Filter + def call + @doc = at_css('article') + + doc + end + end + end +end diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb new file mode 100644 index 00000000..5807985d --- /dev/null +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -0,0 +1,13 @@ +module Docs + class React + class EntriesReactDevFilter < Docs::EntriesFilter + def get_name + at_css('article h1').content + end + + def get_type + return 'TODO add types' + end + end + end +end diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index 7ef60cc1..02133b17 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -7,6 +7,19 @@ module Docs code: 'https://github.com/facebook/react' } + version do + self.release = '18.3.1' + # TODO add /learn + self.base_url = 'https://react.dev/reference' + + html_filters.push 'react/entries_react_dev', 'react/clean_html_react_dev' + + options[:attribution] = <<-HTML + © 2013–present Facebook Inc.
+ Licensed under the Creative Commons Attribution 4.0 International Public License. + HTML + end + version '17' do self.release = '17.0.2' self.base_url = 'https://17.reactjs.org/docs/' @@ -33,7 +46,6 @@ module Docs HTML end - def get_latest_version(opts) doc = fetch_doc('https://react.dev/', opts) doc.at_css('a[href="/versions"]').content.strip[1..-1]