From 3eb5ccb7eac7191be39c9b411bf05dad1a4a1032 Mon Sep 17 00:00:00 2001 From: Thibaut Date: Sun, 13 Dec 2015 15:39:00 -0500 Subject: [PATCH] Raise error and stop scraping on 4xx/5xx status code --- lib/docs/core/response.rb | 4 ++++ lib/docs/core/scrapers/url_scraper.rb | 4 ++++ test/lib/docs/core/response_test.rb | 22 +++++++++++++++++++ .../docs/core/scrapers/url_scraper_test.rb | 7 +++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/docs/core/response.rb b/lib/docs/core/response.rb index 2908c101..c9477f27 100644 --- a/lib/docs/core/response.rb +++ b/lib/docs/core/response.rb @@ -4,6 +4,10 @@ module Docs code == 200 end + def error? + code != 404 && code >= 400 && code <= 599 + end + def empty? body.empty? end diff --git a/lib/docs/core/scrapers/url_scraper.rb b/lib/docs/core/scrapers/url_scraper.rb index 46eba810..7092de01 100644 --- a/lib/docs/core/scrapers/url_scraper.rb +++ b/lib/docs/core/scrapers/url_scraper.rb @@ -29,6 +29,10 @@ module Docs end def process_response?(response) + if response.error? + raise "Error status code (#{response.code}): #{response.url}" + end + response.success? && response.html? && base_url.contains?(response.effective_url) end diff --git a/test/lib/docs/core/response_test.rb b/test/lib/docs/core/response_test.rb index 3cbf2dea..744020fc 100644 --- a/test/lib/docs/core/response_test.rb +++ b/test/lib/docs/core/response_test.rb @@ -29,6 +29,28 @@ class DocsResponseTest < MiniTest::Spec end end + describe "#error?" do + it "returns false when the code is 200" do + options.code = 200 + refute response.error? + end + + it "returns false when the code is 404" do + options.code = 404 + refute response.error? + end + + it "returns true when the code is 400" do + options.code = 400 + assert response.error? + end + + it "returns true when the code is 500" do + options.code = 500 + assert response.error? + end + end + describe "#empty?" do it "returns true when the body is empty" do options.body = '' diff --git a/test/lib/docs/core/scrapers/url_scraper_test.rb b/test/lib/docs/core/scrapers/url_scraper_test.rb index 1e599edf..5ff72231 100644 --- a/test/lib/docs/core/scrapers/url_scraper_test.rb +++ b/test/lib/docs/core/scrapers/url_scraper_test.rb @@ -89,13 +89,18 @@ class DocsUrlScraperTest < MiniTest::Spec describe "#process_response?" do let :response do - OpenStruct.new success?: true, html?: true, effective_url: scraper.root_url + OpenStruct.new success?: true, html?: true, effective_url: scraper.root_url, error?: false end let :result do scraper.send :process_response?, response end + it "raises when the response is an error" do + response.send 'error?=', true + assert_raises(RuntimeError) { result } + end + it "returns false when the response isn't successful" do response.send 'success?=', false refute result