4 달 전 · 4518d0e8db
--- a/app/models/agents/website_agent.rb
+++ b/app/models/agents/website_agent.rb
@@ -62,7 +62,7 @@ module Agents
 
				 
			
 
				       Beware that when parsing an XML document (i.e. `type` is `xml`) using `xpath` expressions, all namespaces are stripped from the document unless the top-level option `use_namespaces` is set to `true`.
			
 
				 
			
 
				-      For extraction with `array` set to true, all matches will be extracted into an array. This is useful when extracting list elements or multiple parts of a website that can only be matched with the same selector.
			
 
				+      For extraction with `single_array` set to true, all matches will be extracted into an array. This is useful when extracting list elements or multiple parts of a website that can only be matched with the same selector.
			
 
				 
			
 
				       # Scraping JSON
			
 
				 
			
@@ -295,6 +295,15 @@ module Agents
 
				           case extraction_type
			
 
				           when 'html', 'xml'
			
 
				             extract.each do |name, details|
			
 
				+              details.each do |name,|
			
 
				+                case name
			
 
				+                when 'css', 'xpath', 'value', 'repeat', 'hidden', 'single_array'
			
 
				+                  # ok
			
 
				+                else
			
 
				+                  errors.add(:base, "Unknown key #{name.inspect} in extraction details")
			
 
				+                end
			
 
				+              end
			
 
				+
			
 
				               case details['css']
			
 
				               when String
			
 
				                 # ok
			
@@ -639,7 +648,7 @@ module Agents
 
				             end
			
 
				             value.to_s
			
 
				           end
			
 
				-          if boolify(extraction_details['array'])
			
 
				+          if boolify(extraction_details['single_array'])
			
 
				             values << stringified_nodes
			
 
				           else
			
 
				             stringified_nodes.each { |n| values << n }
			
--- a/db/migrate/20241027081918_website_agent_rename_array_to_single_array.rb
+++ b/db/migrate/20241027081918_website_agent_rename_array_to_single_array.rb
@@ -0,0 +1,29 @@
 
				+class WebsiteAgentRenameArrayToSingleArray < ActiveRecord::Migration[6.1]
			
 
				+  def up
			
 
				+    Agents::WebsiteAgent.find_each do |agent|
			
 
				+      case extract = agent.options['extract']
			
 
				+      when Hash
			
 
				+        extract.each_value do |details|
			
 
				+          if details.is_a?(Hash) && details.key?('array')
			
 
				+            details['single_array'] = details.delete('array')
			
 
				+          end
			
 
				+        end
			
 
				+        agent.save(validate: false)
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+
			
 
				+  def down
			
 
				+    Agents::WebsiteAgent.find_each do |agent|
			
 
				+      case extract = agent.options['extract']
			
 
				+      when Hash
			
 
				+        extract.each_value do |details|
			
 
				+          if details.is_a?(Hash) && details.key?('single_array')
			
 
				+            details['array'] = details.delete('single_array')
			
 
				+          end
			
 
				+        end
			
 
				+        agent.save(validate: false)
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/spec/models/agents/website_agent_spec.rb
+++ b/spec/models/agents/website_agent_spec.rb
@@ -122,6 +122,27 @@ describe Agents::WebsiteAgent do
 
				           expect(@checker).to be_valid
			
 
				         end
			
 
				       end
			
 
				+
			
 
				+      context "in 'html' type" do
			
 
				+        it "should ensure that all extractions have either 'xpath' or 'css'" do
			
 
				+          @checker.options['type'] = 'html'
			
 
				+          @checker.options['extract'] = {
			
 
				+            'url' => { 'array' => true },
			
 
				+          }
			
 
				+          expect(@checker).to_not be_valid
			
 
				+          expect(@checker.errors_on(:base)).to include(/When type is html or xml, all extractions must have a css or xpath attribute/) & include(/Unknown key "array"/)
			
 
				+
			
 
				+          @checker.options['extract'] = {
			
 
				+            'url' => { 'xpath' => '//bar', 'single_array' => true },
			
 
				+          }
			
 
				+          expect(@checker).to be_valid
			
 
				+
			
 
				+          @checker.options['extract'] = {
			
 
				+            'url' => { 'css' => 'bar' },
			
 
				+          }
			
 
				+          expect(@checker).to be_valid
			
 
				+        end
			
 
				+      end
			
 
				     end
			
 
				 
			
 
				     describe "#check" do
			
@@ -781,7 +802,7 @@ describe Agents::WebsiteAgent do
 
				               'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
			
 
				               'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
			
 
				               'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
			
 
				-              'page_title': { 'xpath': '/feed/title', 'value': 'string(.)', 'repeat' => true }
			
 
				+              'page_title' => { 'xpath' => '/feed/title', 'value' => 'string(.)', 'repeat' => true }
			
 
				             }
			
 
				           }, keep_events_for: 2.days)
			
 
				           @checker.user = users(:bob)
			
@@ -1173,7 +1194,7 @@ fire: hot
 
				         it 'returns an array of found nodes when the array extract_option is true' do
			
 
				           stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
			
 
				 
			
 
				-          @checker.options['extract']['nav_links'] = {'css' => '#topLeft li', 'value' => 'normalize-space(.)', 'array' => 'true'}
			
 
				+          @checker.options['extract']['nav_links'] = {'css' => '#topLeft li', 'value' => 'normalize-space(.)', 'single_array' => 'true'}
			
 
				           expect {
			
 
				             @checker.receive([@event])
			
 
				           }.to change { Event.count }.by(1)