فهرست منبع

Add a `user_agent` option to WebsiteAgent.

Akinori MUSHA 11 سال پیش
والد
کامیت
e8751af629
2فایلهای تغییر یافته به همراه41 افزوده شده و 0 حذف شده
  1. 10 0
      app/models/agents/website_agent.rb
  2. 31 0
      spec/models/agents/website_agent_spec.rb

+ 10 - 0
app/models/agents/website_agent.rb

@@ -47,6 +47,8 @@ module Agents
 
       Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset.
 
+      Set `user_agent` to a custom User-Agent name if the website does not like the default value ("Faraday v#{Faraday::VERSION}").
+
       The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload.
     MD
 
@@ -105,6 +107,10 @@ module Agents
         end
       end
 
+      if options['user_agent'].present?
+        errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
+      end
+
       begin
         basic_auth_credentials()
       rescue => e
@@ -281,6 +287,10 @@ module Agents
 
     def faraday
       @faraday ||= Faraday.new { |builder|
+        if (user_agent = options['user_agent']).present?
+          builder.headers[:user_agent] = user_agent
+        end
+
         builder.use FaradayMiddleware::FollowRedirects
         builder.request :url_encoded
         if userinfo = basic_auth_credentials()

+ 31 - 0
spec/models/agents/website_agent_spec.rb

@@ -376,4 +376,35 @@ describe Agents::WebsiteAgent do
       end
     end
   end
+
+  describe "checking with User-Agent" do
+    before do
+      stub_request(:any, /example/).
+        with(headers: { 'User-Agent' => 'Sushi' }).
+        to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
+      @site = {
+        'name' => "XKCD",
+        'expected_update_period_in_days' => 2,
+        'type' => "html",
+        'url' => "http://www.example.com",
+        'mode' => 'on_change',
+        'extract' => {
+          'url' => { 'css' => "#comic img", 'attr' => "src" },
+          'title' => { 'css' => "#comic img", 'attr' => "alt" },
+          'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
+        },
+        'user_agent' => "Sushi"
+      }
+      @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site)
+      @checker.user = users(:bob)
+      @checker.save!
+    end
+
+    describe "#check" do
+      it "should check for changes" do
+        lambda { @checker.check }.should change { Event.count }.by(1)
+        lambda { @checker.check }.should_not change { Event.count }
+      end
+    end
+  end
 end