소스 검색

Rename the `array` option of WebsiteAgent to `single_array` for clarity

Akinori MUSHA 4 달 전
부모
커밋
4518d0e8db

+ 11 - 2
app/models/agents/website_agent.rb

@@ -62,7 +62,7 @@ module Agents
 
       Beware that when parsing an XML document (i.e. `type` is `xml`) using `xpath` expressions, all namespaces are stripped from the document unless the top-level option `use_namespaces` is set to `true`.
 
-      For extraction with `array` set to true, all matches will be extracted into an array. This is useful when extracting list elements or multiple parts of a website that can only be matched with the same selector.
+      For extraction with `single_array` set to true, all matches will be extracted into an array. This is useful when extracting list elements or multiple parts of a website that can only be matched with the same selector.
 
       # Scraping JSON
 
@@ -295,6 +295,15 @@ module Agents
           case extraction_type
           when 'html', 'xml'
             extract.each do |name, details|
+              details.each do |name,|
+                case name
+                when 'css', 'xpath', 'value', 'repeat', 'hidden', 'single_array'
+                  # ok
+                else
+                  errors.add(:base, "Unknown key #{name.inspect} in extraction details")
+                end
+              end
+
               case details['css']
               when String
                 # ok
@@ -639,7 +648,7 @@ module Agents
             end
             value.to_s
           end
-          if boolify(extraction_details['array'])
+          if boolify(extraction_details['single_array'])
             values << stringified_nodes
           else
             stringified_nodes.each { |n| values << n }

+ 29 - 0
db/migrate/20241027081918_website_agent_rename_array_to_single_array.rb

@@ -0,0 +1,29 @@
+class WebsiteAgentRenameArrayToSingleArray < ActiveRecord::Migration[6.1]
+  def up
+    Agents::WebsiteAgent.find_each do |agent|
+      case extract = agent.options['extract']
+      when Hash
+        extract.each_value do |details|
+          if details.is_a?(Hash) && details.key?('array')
+            details['single_array'] = details.delete('array')
+          end
+        end
+        agent.save(validate: false)
+      end
+    end
+  end
+
+  def down
+    Agents::WebsiteAgent.find_each do |agent|
+      case extract = agent.options['extract']
+      when Hash
+        extract.each_value do |details|
+          if details.is_a?(Hash) && details.key?('single_array')
+            details['array'] = details.delete('single_array')
+          end
+        end
+        agent.save(validate: false)
+      end
+    end
+  end
+end

+ 23 - 2
spec/models/agents/website_agent_spec.rb

@@ -122,6 +122,27 @@ describe Agents::WebsiteAgent do
           expect(@checker).to be_valid
         end
       end
+
+      context "in 'html' type" do
+        it "should ensure that all extractions have either 'xpath' or 'css'" do
+          @checker.options['type'] = 'html'
+          @checker.options['extract'] = {
+            'url' => { 'array' => true },
+          }
+          expect(@checker).to_not be_valid
+          expect(@checker.errors_on(:base)).to include(/When type is html or xml, all extractions must have a css or xpath attribute/) & include(/Unknown key "array"/)
+
+          @checker.options['extract'] = {
+            'url' => { 'xpath' => '//bar', 'single_array' => true },
+          }
+          expect(@checker).to be_valid
+
+          @checker.options['extract'] = {
+            'url' => { 'css' => 'bar' },
+          }
+          expect(@checker).to be_valid
+        end
+      end
     end
 
     describe "#check" do
@@ -781,7 +802,7 @@ describe Agents::WebsiteAgent do
               'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
               'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
               'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
-              'page_title': { 'xpath': '/feed/title', 'value': 'string(.)', 'repeat' => true }
+              'page_title' => { 'xpath' => '/feed/title', 'value' => 'string(.)', 'repeat' => true }
             }
           }, keep_events_for: 2.days)
           @checker.user = users(:bob)
@@ -1173,7 +1194,7 @@ fire: hot
         it 'returns an array of found nodes when the array extract_option is true' do
           stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
 
-          @checker.options['extract']['nav_links'] = {'css' => '#topLeft li', 'value' => 'normalize-space(.)', 'array' => 'true'}
+          @checker.options['extract']['nav_links'] = {'css' => '#topLeft li', 'value' => 'normalize-space(.)', 'single_array' => 'true'}
           expect {
             @checker.receive([@event])
           }.to change { Event.count }.by(1)