diff --git a/.gitignore b/.gitignore index 8b222826..6e09c63b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ public/fonts public/docs/**/* !public/docs/docs.json !public/docs/**/index.json +/docs/ diff --git a/lib/docs.rb b/lib/docs.rb index d12e4570..3f07374d 100644 --- a/lib/docs.rb +++ b/lib/docs.rb @@ -29,6 +29,7 @@ module Docs self.rescue_errors = false class DocNotFound < NameError; end + class SetupError < StandardError; end def self.all Dir["#{root_path}/docs/scrapers/**/*.rb"]. diff --git a/lib/docs/core/doc.rb b/lib/docs/core/doc.rb index d39ddfcf..cb1cd209 100644 --- a/lib/docs/core/doc.rb +++ b/lib/docs/core/doc.rb @@ -95,6 +95,9 @@ module Docs false end end + rescue Docs::SetupError => error + puts "ERROR: #{error.message}" + false end def store_pages(store) @@ -118,6 +121,9 @@ module Docs false end end + rescue Docs::SetupError => error + puts "ERROR: #{error.message}" + false end private diff --git a/lib/docs/core/scrapers/file_scraper.rb b/lib/docs/core/scrapers/file_scraper.rb index 4ec6a72f..9d354631 100644 --- a/lib/docs/core/scrapers/file_scraper.rb +++ b/lib/docs/core/scrapers/file_scraper.rb @@ -1,14 +1,13 @@ module Docs class FileScraper < Scraper + SOURCE_DIRECTORY = File.expand_path '../../../../../docs', __FILE__ + Response = Struct.new :body, :url class << self - attr_accessor :dir - def inherited(subclass) super subclass.base_url = base_url - subclass.dir = dir end end @@ -16,13 +15,25 @@ module Docs html_filters.push 'clean_local_urls' + def source_directory + @source_directory ||= File.join(SOURCE_DIRECTORY, self.class.path) + end + private + def assert_source_directory_exists + unless Dir.exists?(source_directory) + raise SetupError, "The #{self.class.name} scraper requires the original documentation files to be stored in the \"#{source_directory}\" directory." + end + end + def request_one(url) - Response.new read_file(file_path_for(url)), URL.parse(url) + assert_source_directory_exists + Response.new read_file(url_to_path(url)), URL.parse(url) end def request_all(urls) + assert_source_directory_exists queue = [urls].flatten until queue.empty? result = yield request_one(queue.shift) @@ -34,12 +45,12 @@ module Docs response.body.present? end - def file_path_for(url) - File.join self.class.dir, url.remove(base_url.to_s) + def url_to_path(url) + url.remove(base_url.to_s) end def read_file(path) - File.read(path) + File.read(File.join(source_directory, path)) rescue instrument 'warn.doc', msg: "Failed to open file: #{path}" nil diff --git a/lib/docs/scrapers/c.rb b/lib/docs/scrapers/c.rb index e90b3679..f9289617 100644 --- a/lib/docs/scrapers/c.rb +++ b/lib/docs/scrapers/c.rb @@ -1,7 +1,6 @@ module Docs class C < FileScraper self.type = 'c' - self.dir = '/Users/Thibaut/DevDocs/Docs/c' self.base_url = 'http://en.cppreference.com/w/c/' self.root_path = 'header.html' diff --git a/lib/docs/scrapers/cpp.rb b/lib/docs/scrapers/cpp.rb index 819fd626..374f6883 100644 --- a/lib/docs/scrapers/cpp.rb +++ b/lib/docs/scrapers/cpp.rb @@ -3,7 +3,6 @@ module Docs self.name = 'C++' self.slug = 'cpp' self.type = 'c' - self.dir = '/Users/Thibaut/DevDocs/Docs/cpp' self.base_url = 'http://en.cppreference.com/w/cpp/' self.root_path = 'header.html' diff --git a/lib/docs/scrapers/dart.rb b/lib/docs/scrapers/dart.rb index 83ccc1b5..c345c22f 100644 --- a/lib/docs/scrapers/dart.rb +++ b/lib/docs/scrapers/dart.rb @@ -24,13 +24,11 @@ module Docs version '2' do self.release = '2.0.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/Dart2' self.base_url = "https://api.dartlang.org/stable/#{release}/" end version '1' do self.release = '1.24.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/Dart1' self.base_url = "https://api.dartlang.org/stable/#{release}/" end end diff --git a/lib/docs/scrapers/django.rb b/lib/docs/scrapers/django.rb index c03c19af..45273540 100644 --- a/lib/docs/scrapers/django.rb +++ b/lib/docs/scrapers/django.rb @@ -36,37 +36,31 @@ module Docs version '2.1' do self.release = '2.1.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/Django21' self.base_url = 'https://docs.djangoproject.com/en/2.1/' end version '2.0' do self.release = '2.0.7' - self.dir = '/Users/Thibaut/DevDocs/Docs/Django20' self.base_url = 'https://docs.djangoproject.com/en/2.0/' end version '1.11' do self.release = '1.11.9' - self.dir = '/Users/Thibaut/DevDocs/Docs/Django111' self.base_url = 'https://docs.djangoproject.com/en/1.11/' end version '1.10' do self.release = '1.10.8' - self.dir = '/Users/Thibaut/DevDocs/Docs/Django110' self.base_url = 'https://docs.djangoproject.com/en/1.10/' end version '1.9' do self.release = '1.9.13' - self.dir = '/Users/Thibaut/DevDocs/Docs/Django19' self.base_url = 'https://docs.djangoproject.com/en/1.9/' end version '1.8' do self.release = '1.8.18' - self.dir = '/Users/Thibaut/DevDocs/Docs/Django18' self.base_url = 'https://docs.djangoproject.com/en/1.8/' end end diff --git a/lib/docs/scrapers/erlang.rb b/lib/docs/scrapers/erlang.rb index 9c94aed5..d6aa2a0b 100644 --- a/lib/docs/scrapers/erlang.rb +++ b/lib/docs/scrapers/erlang.rb @@ -42,22 +42,18 @@ module Docs version '21' do self.release = '21.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang21' end version '20' do self.release = '20.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang20' end version '19' do self.release = '19.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang19' end version '18' do self.release = '18.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang18' end end end diff --git a/lib/docs/scrapers/gnu/gcc.rb b/lib/docs/scrapers/gnu/gcc.rb index d3a77493..be3bb54e 100644 --- a/lib/docs/scrapers/gnu/gcc.rb +++ b/lib/docs/scrapers/gnu/gcc.rb @@ -48,13 +48,11 @@ module Docs version '7' do self.release = '7.3.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcc7' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/" end version '7 CPP' do self.release = '7.3.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp7' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/" options[:replace_paths] = CPP_PATHS @@ -62,7 +60,6 @@ module Docs version '6' do self.release = '6.4.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcc6' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/" options[:root_title] = 'Using the GNU Compiler Collection (GCC)' @@ -70,7 +67,6 @@ module Docs version '6 CPP' do self.release = '6.4.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp6' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/" options[:replace_paths] = CPP_PATHS @@ -78,7 +74,6 @@ module Docs version '5' do self.release = '5.4.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcc5' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/" options[:root_title] = 'Using the GNU Compiler Collection (GCC)' @@ -86,7 +81,6 @@ module Docs version '5 CPP' do self.release = '5.4.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp5' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/" options[:replace_paths] = CPP_PATHS @@ -94,7 +88,6 @@ module Docs version '4' do self.release = '4.9.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcc4' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/" options[:root_title] = 'Using the GNU Compiler Collection (GCC)' @@ -102,7 +95,6 @@ module Docs version '4 CPP' do self.release = '4.9.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp4' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/" options[:replace_paths] = CPP_PATHS diff --git a/lib/docs/scrapers/gnu/gnu_fortran.rb b/lib/docs/scrapers/gnu/gnu_fortran.rb index c373a0bf..2610178e 100644 --- a/lib/docs/scrapers/gnu/gnu_fortran.rb +++ b/lib/docs/scrapers/gnu/gnu_fortran.rb @@ -8,25 +8,21 @@ module Docs version '7' do self.release = '7.3.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gfortran7' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/" end version '6' do self.release = '6.4.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gfortran6' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/" end version '5' do self.release = '5.4.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/gfortran5' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/" end version '4' do self.release = '4.9.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/gfortran4' self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gfortran/" end end diff --git a/lib/docs/scrapers/nokogiri2.rb b/lib/docs/scrapers/nokogiri2.rb index f7fed163..04cdf96c 100644 --- a/lib/docs/scrapers/nokogiri2.rb +++ b/lib/docs/scrapers/nokogiri2.rb @@ -3,7 +3,6 @@ module Docs self.name = 'Nokogiri' self.slug = 'nokogiri' self.release = '1.8.1' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Nokogiri' html_filters.replace 'rdoc/entries', 'nokogiri2/entries' diff --git a/lib/docs/scrapers/numpy.rb b/lib/docs/scrapers/numpy.rb index 0756e062..1327fc02 100644 --- a/lib/docs/scrapers/numpy.rb +++ b/lib/docs/scrapers/numpy.rb @@ -2,7 +2,6 @@ module Docs class Numpy < FileScraper self.name = 'NumPy' self.type = 'sphinx' - self.dir = '/Users/Thibaut/DevDocs/Docs/numpy/reference/' self.root_path = 'index.html' self.links = { home: 'http://www.numpy.org/', diff --git a/lib/docs/scrapers/openjdk.rb b/lib/docs/scrapers/openjdk.rb index deff67d6..944ac416 100644 --- a/lib/docs/scrapers/openjdk.rb +++ b/lib/docs/scrapers/openjdk.rb @@ -1,11 +1,10 @@ module Docs class Openjdk < FileScraper + # Downloaded from packages.debian.org/sid/openjdk-8-doc + # Extracting subdirectory /usr/share/doc/openjdk-8-jre-headless/api self.name = 'OpenJDK' self.type = 'openjdk' self.root_path = 'overview-summary.html' - # Downloaded from packages.debian.org/sid/openjdk-8-doc - # Extracting subdirectory /usr/share/doc/openjdk-8-jre-headless/api - self.dir = '/Users/Thibaut/DevDocs/Docs/OpenJDK' html_filters.insert_after 'internal_urls', 'openjdk/clean_urls' html_filters.push 'openjdk/entries', 'openjdk/clean_html' diff --git a/lib/docs/scrapers/perl.rb b/lib/docs/scrapers/perl.rb index d794a4d6..142ceaa5 100644 --- a/lib/docs/scrapers/perl.rb +++ b/lib/docs/scrapers/perl.rb @@ -2,7 +2,6 @@ module Docs class Perl < FileScraper self.name = 'Perl' self.type = 'perl' - self.dir = '/Users/Thibaut/DevDocs/Docs/Perl' self.root_path = 'index.html' self.links = { home: 'https://www.perl.org/' diff --git a/lib/docs/scrapers/php.rb b/lib/docs/scrapers/php.rb index a99bb34c..d4a66b5b 100644 --- a/lib/docs/scrapers/php.rb +++ b/lib/docs/scrapers/php.rb @@ -1,5 +1,7 @@ module Docs class Php < FileScraper + # Downloaded from php.net/download-docs.php + include FixInternalUrlsBehavior self.name = 'PHP' @@ -23,9 +25,6 @@ module Docs code: 'https://git.php.net/?p=php-src.git;a=summary' } - # Downloaded from php.net/download-docs.php - self.dir = '/Users/Thibaut/DevDocs/Docs/PHP' - html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title' text_filters.push 'php/fix_urls' diff --git a/lib/docs/scrapers/python.rb b/lib/docs/scrapers/python.rb index 9b7126e6..aa4336d9 100644 --- a/lib/docs/scrapers/python.rb +++ b/lib/docs/scrapers/python.rb @@ -23,33 +23,29 @@ module Docs Licensed under the PSF License. HTML - version '3.7' do + version '3.7' do # docs.python.org/3.7/download.html self.release = '3.7.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/Python37' # docs.python.org/3.7/download.html self.base_url = 'https://docs.python.org/3.7/' html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html' end - version '3.6' do + version '3.6' do # docs.python.org/3.6/download.html self.release = '3.6.6' - self.dir = '/Users/Thibaut/DevDocs/Docs/Python36' # docs.python.org/3.6/download.html self.base_url = 'https://docs.python.org/3.6/' html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html' end - version '3.5' do + version '3.5' do # docs.python.org/3.5/download.html self.release = '3.5.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/Python35' # docs.python.org/3.5/download.html self.base_url = 'https://docs.python.org/3.5/' html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html' end - version '2.7' do + version '2.7' do # docs.python.org/2.7/download.html self.release = '2.7.13' - self.dir = '/Users/Thibaut/DevDocs/Docs/Python27' # docs.python.org/2.7/download.html self.base_url = 'https://docs.python.org/2.7/' html_filters.push 'python/entries_v2', 'sphinx/clean_html', 'python/clean_html' diff --git a/lib/docs/scrapers/rdoc/minitest.rb b/lib/docs/scrapers/rdoc/minitest.rb index fa880199..0d6345f9 100644 --- a/lib/docs/scrapers/rdoc/minitest.rb +++ b/lib/docs/scrapers/rdoc/minitest.rb @@ -1,9 +1,9 @@ module Docs class Minitest < Rdoc + # Run "rake docs" in the gem directory self.name = 'Ruby / Minitest' self.slug = 'minitest' self.release = '5.10.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Minitest' # rake docs self.links = { code: 'https://github.com/seattlerb/minitest' } diff --git a/lib/docs/scrapers/rdoc/rails.rb b/lib/docs/scrapers/rdoc/rails.rb index d0365b50..b7709d78 100644 --- a/lib/docs/scrapers/rdoc/rails.rb +++ b/lib/docs/scrapers/rdoc/rails.rb @@ -4,7 +4,6 @@ module Docs self.name = 'Ruby on Rails' self.slug = 'rails' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Rails' self.initial_paths = %w(guides/index.html) self.links = { home: 'http://rubyonrails.org/', diff --git a/lib/docs/scrapers/rdoc/ruby.rb b/lib/docs/scrapers/rdoc/ruby.rb index 35f34462..0335de60 100644 --- a/lib/docs/scrapers/rdoc/ruby.rb +++ b/lib/docs/scrapers/rdoc/ruby.rb @@ -78,22 +78,18 @@ module Docs version '2.5' do self.release = '2.5.0' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Ruby25' end version '2.4' do self.release = '2.4.3' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Ruby24' end version '2.3' do self.release = '2.3.6' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Ruby23' end version '2.2' do self.release = '2.2.9' - self.dir = '/Users/Thibaut/DevDocs/Docs/RDoc/Ruby22' end end end diff --git a/lib/docs/scrapers/sqlite.rb b/lib/docs/scrapers/sqlite.rb index 6cff8ed7..790acf83 100644 --- a/lib/docs/scrapers/sqlite.rb +++ b/lib/docs/scrapers/sqlite.rb @@ -3,7 +3,6 @@ module Docs self.name = 'SQLite' self.type = 'sqlite' self.release = '3.25.2' - self.dir = '/Users/Thibaut/DevDocs/Docs/sqlite/' self.base_url = 'https://sqlite.org/' self.root_path = 'docs.html' self.initial_paths = %w(keyword_index.html) diff --git a/test/lib/docs/core/scrapers/file_scraper_test.rb b/test/lib/docs/core/scrapers/file_scraper_test.rb index a017b1dc..7d90d262 100644 --- a/test/lib/docs/core/scrapers/file_scraper_test.rb +++ b/test/lib/docs/core/scrapers/file_scraper_test.rb @@ -2,16 +2,23 @@ require 'test_helper' require 'docs' class FileScraperTest < MiniTest::Spec + ROOT_PATH = File.expand_path('../../../../../../', __FILE__) + class Scraper < Docs::FileScraper - self.dir = '/' self.html_filters = Docs::FilterStack.new self.text_filters = Docs::FilterStack.new + + version 'version' do; end end let :scraper do Scraper.new end + let :versioned_scraper do + Scraper.versions.first.new + end + let :response do OpenStruct.new body: 'body', url: Docs::URL.parse(Scraper.base_url) end @@ -22,9 +29,16 @@ class FileScraperTest < MiniTest::Spec end end + describe "#source_directory" do + it "returns the directory at docs/[slug]" do + assert_equal File.join(ROOT_PATH, 'docs', 'scraper'), scraper.source_directory + assert_equal File.join(ROOT_PATH, 'docs', 'scraper~version'), versioned_scraper.source_directory + end + end + describe "#request_one" do let :path do - File.join(Scraper.dir, 'path') + 'path' end let :result do @@ -35,20 +49,34 @@ class FileScraperTest < MiniTest::Spec stub(scraper).read_file end - it "reads a file" do - mock(scraper).read_file(path) - result + context "when the source directory doesn't exist" do + it "raises an error" do + assert_raises Docs::SetupError do + result + end + end end - describe "the returned response object" do - it "has a #body" do - stub(scraper).read_file { 'body' } - assert_equal 'body', result.body + context "when the source directory exists" do + before do + stub(scraper).assert_source_directory_exists + end + + it "reads a file" do + mock(scraper).read_file(path) + result end - it "has a #url" do - assert_equal path, result.url.to_s - assert_instance_of Docs::URL, result.url + describe "the returned response object" do + it "has a #body" do + stub(scraper).read_file { 'body' } + assert_equal 'body', result.body + end + + it "has a #url" do + assert_equal path, result.url.to_s + assert_instance_of Docs::URL, result.url + end end end end @@ -58,49 +86,63 @@ class FileScraperTest < MiniTest::Spec %w(one two) end - it "requests the given url" do - mock(scraper).request_one('url') - scraper.send(:request_all, 'url') {} - end - - it "requests the given urls" do - requests = [] - stub(scraper).request_one { |url| requests << url; nil } - scraper.send(:request_all, urls) {} - assert_equal urls, requests - end - - it "yields the responses" do - responses = [] - stub(scraper).request_one { |url| urls.index(url) } - scraper.send(:request_all, urls) { |response| responses << response; nil } - assert_equal (0...urls.length).to_a, responses + context "when the source directory doesn't exist" do + it "raises an error" do + assert_raises Docs::SetupError do + scraper.send(:request_all, urls) {} + end + end end - context "when the block returns an array" do - let :next_urls do - %w(three four) + context "when the source directory exists" do + before do + stub(scraper).assert_source_directory_exists end - let :all_urls do - urls + %w(three four) + it "requests the given url" do + mock(scraper).request_one('url') + scraper.send(:request_all, 'url') {} end - it "requests the returned urls" do + it "requests the given urls" do requests = [] - stub(scraper).request_one { |url| requests << url; url } - scraper.send(:request_all, urls) { [next_urls.shift].compact } - assert_equal all_urls, requests + stub(scraper).request_one { |url| requests << url; nil } + scraper.send(:request_all, urls) {} + assert_equal urls, requests end - it "yields their responses" do + it "yields the responses" do responses = [] - stub(scraper).request_one { |url| all_urls.index(url) } - scraper.send :request_all, urls do |response| - responses << response - [next_urls.shift].compact + stub(scraper).request_one { |url| urls.index(url) } + scraper.send(:request_all, urls) { |response| responses << response; nil } + assert_equal (0...urls.length).to_a, responses + end + + context "when the block returns an array" do + let :next_urls do + %w(three four) + end + + let :all_urls do + urls + %w(three four) + end + + it "requests the returned urls" do + requests = [] + stub(scraper).request_one { |url| requests << url; url } + scraper.send(:request_all, urls) { [next_urls.shift].compact } + assert_equal all_urls, requests + end + + it "yields their responses" do + responses = [] + stub(scraper).request_one { |url| all_urls.index(url) } + scraper.send :request_all, urls do |response| + responses << response + [next_urls.shift].compact + end + assert_equal (0...all_urls.length).to_a, responses end - assert_equal (0...all_urls.length).to_a, responses end end end @@ -126,13 +168,13 @@ class FileScraperTest < MiniTest::Spec scraper.send :read_file, 'file' end - it "returns the file's content when the file exists" do - stub(File).read('file') { 'content' } + it "returns the file's content when the file exists in the source directory" do + stub(File).read(File.join(ROOT_PATH, 'docs', 'scraper', 'file')) { 'content' } assert_equal 'content', result end it "returns nil when the file doesn't exist" do - stub(File).read('file') { raise } + stub(File).read(File.join(ROOT_PATH, 'docs', 'scraper', 'file')) { raise } assert_nil result end end