diff --git a/.image_optim.yml b/.image_optim.yml new file mode 100644 index 00000000..4709cace --- /dev/null +++ b/.image_optim.yml @@ -0,0 +1,24 @@ +verbose: false +skip_missing_workers: true +allow_lossy: true +advpng: false +gifsicle: + interlace: false + level: 3 + careful: true +jhead: false +jpegoptim: + strip: all + max_quality: 100 +jpegrecompress: false +jpegtran: false +optipng: + level: 3 + interlace: false + strip: true +pngcrush: false +pngout: false +pngquant: + quality: !ruby/range 80..99 + speed: 3 +svgo: false diff --git a/Gemfile b/Gemfile index baa850ed..931014b4 100644 --- a/Gemfile +++ b/Gemfile @@ -32,6 +32,8 @@ group :docs do gem 'typhoeus' gem 'nokogiri' gem 'html-pipeline' + gem 'image_optim' + gem 'image_optim_pack', platforms: :ruby gem 'progress_bar', require: false gem 'unix_utils', require: false gem 'tty-pager', require: false diff --git a/Gemfile.lock b/Gemfile.lock index c6d795d3..9035cf28 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -25,12 +25,25 @@ GEM ffi (>= 1.3.0) eventmachine (1.2.3) execjs (2.7.0) + exifr (1.3.1) ffi (1.9.18) + fspath (3.1.0) highline (1.7.8) html-pipeline (2.6.0) activesupport (>= 2) nokogiri (>= 1.4) i18n (0.8.4) + image_optim (0.25.0) + exifr (~> 1.2, >= 1.2.2) + fspath (~> 3.0) + image_size (~> 1.5) + in_threads (~> 1.3) + progress (~> 3.0, >= 3.0.1) + image_optim_pack (0.5.0.20170712) + fspath (>= 2.1, < 4) + image_optim (~> 0.19) + image_size (1.5.0) + in_threads (1.4.0) method_source (0.8.2) mini_portile2 (2.2.0) minitest (5.10.2) @@ -39,6 +52,7 @@ GEM nokogiri (1.8.0) mini_portile2 (~> 2.2.0) options (2.3.2) + progress (3.3.1) progress_bar (1.1.0) highline (~> 1.6) options (~> 2.3.0) @@ -109,6 +123,8 @@ DEPENDENCIES coffee-script erubi html-pipeline + image_optim + image_optim_pack minitest nokogiri progress_bar diff --git a/lib/docs/core/requester.rb b/lib/docs/core/requester.rb index 1806795c..1b4198d1 100644 --- a/lib/docs/core/requester.rb +++ b/lib/docs/core/requester.rb @@ -20,6 +20,7 @@ module Docs def initialize(options = {}) @request_options = options.extract!(:request_options)[:request_options].try(:dup) || {} options[:max_concurrency] ||= 20 + options[:pipelining] = 0 super end diff --git a/lib/docs/filters/core/images.rb b/lib/docs/filters/core/images.rb new file mode 100644 index 00000000..9c449639 --- /dev/null +++ b/lib/docs/filters/core/images.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Docs + class ImagesFilter < Filter + include Instrumentable + + def self.optimize_image_data(data) + @image_optim ||= ImageOptim.new + @image_optim.optimize_image_data(data) + end + + def call + @@cache ||= {} + + doc.css('img[src]').each do |node| + src = node['src'] + + if @@cache.key?(src) + node['src'] = @@cache[src] unless @@cache[src] == false + next + end + + @@cache[src] = false + + url = Docs::URL.parse(src) + url.scheme = 'https' if url.scheme.nil? + next unless url.scheme == 'http' || url.scheme == 'https' + + begin + Request.run(url) do |response| + unless response.success? + instrument 'broken.image', url: url, status: response.code + next + end + + unless response.mime_type.start_with?('image/') + instrument 'invalid.image', url: url, content_type: response.mime_type + next + end + + image = response.body + + unless context[:optimize_images] == false + image = self.class.optimize_image_data(image) || image + end + + size = image.bytesize + + if size > max_size + instrument 'too_big.image', url: url, size: size + next + end + + image = Base64.strict_encode64(image) + image.prepend "data:#{response.mime_type};base64," + node['src'] = @@cache[src] = image + end + rescue => exception + instrument 'error.image', url: url, exception: exception + end + end + + doc + end + + private + + def max_size + @max_size ||= context[:max_image_size] || 100.kilobytes + end + end +end diff --git a/lib/docs/subscribers/image_subscriber.rb b/lib/docs/subscribers/image_subscriber.rb new file mode 100644 index 00000000..918c8880 --- /dev/null +++ b/lib/docs/subscribers/image_subscriber.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Docs + class ImageSubscriber < Subscriber + self.namespace = 'image' + + def broken(event) + log "Skipped broken image (#{event.payload[:code]}): #{event.payload[:url]}" + end + + def invalid(event) + log "Skipped invalid image (#{event.payload[:content_type]}): #{event.payload[:url]}" + end + + def too_big(event) + log "Skipped large image (#{(event.payload[:size] / 1.kilobyte.to_f).round} KB): #{event.payload[:url]}" + end + + def error(event) + exception = event.payload[:exception] + log "ERROR: #{event.payload[:url]}" + puts " #{exception.class}: #{exception.message.gsub("\n", "\n ")}" + puts exception.backtrace.select { |line| line.start_with?(Docs.root_path) }.join("\n ").prepend("\n ") + puts "\n" + end + end +end diff --git a/lib/tasks/docs.thor b/lib/tasks/docs.thor index 7bff5521..3cdd127b 100644 --- a/lib/tasks/docs.thor +++ b/lib/tasks/docs.thor @@ -35,6 +35,7 @@ class DocsCLI < Thor return puts 'ERROR: [path] must be an absolute path.' end + Docs.install_report :image Docs.install_report :store if options[:verbose] if options[:debug] GC.disable @@ -61,7 +62,7 @@ class DocsCLI < Thor Docs.rescue_errors = true Docs.install_report :store if options[:verbose] Docs.install_report :scraper if options[:debug] - Docs.install_report :progress_bar, :doc if $stdout.tty? + Docs.install_report :progress_bar, :doc, :image if $stdout.tty? require 'unix_utils' if options[:package]