csv_agent_spec.rb 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. require 'rails_helper'
  2. describe Agents::CsvAgent do
  3. before(:each) do
  4. @valid_params = {
  5. 'mode' => 'parse',
  6. 'separator' => ',',
  7. 'use_fields' => '',
  8. 'output' => 'event_per_row',
  9. 'with_header' => 'true',
  10. 'data_path' => '$.data',
  11. 'data_key' => 'data'
  12. }
  13. @checker = Agents::CsvAgent.new(:name => 'somename', :options => @valid_params)
  14. @checker.user = users(:jane)
  15. @checker.save!
  16. @lfa = Agents::LocalFileAgent.new(name: 'local', options: {path: '{{}}', watch: 'false', append: 'false', mode: 'read'})
  17. @lfa.user = users(:jane)
  18. @lfa.save!
  19. end
  20. it_behaves_like 'FileHandlingConsumer'
  21. context '#validate_options' do
  22. it 'is valid with the given options' do
  23. expect(@checker).to be_valid
  24. end
  25. it "requires with_header to be either 'true' or 'false'" do
  26. @checker.options['with_header'] = 'true'
  27. expect(@checker).to be_valid
  28. @checker.options['with_header'] = 'false'
  29. expect(@checker).to be_valid
  30. @checker.options['with_header'] = 'test'
  31. expect(@checker).not_to be_valid
  32. end
  33. it "data_path has to be set in serialize mode" do
  34. @checker.options['mode'] = 'serialize'
  35. @checker.options['data_path'] = ''
  36. expect(@checker).not_to be_valid
  37. end
  38. end
  39. context '#working' do
  40. it 'is not working without having received an event' do
  41. expect(@checker).not_to be_working
  42. end
  43. it 'is working after receiving an event without error' do
  44. @checker.last_receive_at = Time.now
  45. expect(@checker).to be_working
  46. end
  47. end
  48. context '#receive' do
  49. after(:all) do
  50. FileUtils.rm(File.join(Rails.root, 'tmp', 'csv'))
  51. end
  52. def event_with_contents(contents)
  53. path = File.join(Rails.root, 'tmp', 'csv')
  54. File.open(path, 'w') do |f|
  55. f.write(contents)
  56. end
  57. Event.new(payload: { 'file_pointer' => {'agent_id' => @lfa.id, 'file' => path } }, user_id: @checker.user_id)
  58. end
  59. context "agent options" do
  60. let(:with_headers) { event_with_contents("one,two\n1,2\n2,3") }
  61. let(:without_headers) { event_with_contents("1,2\n2,3") }
  62. context "output" do
  63. it "creates one event per row" do
  64. @checker.options['output'] = 'event_per_row'
  65. expect { @checker.receive([with_headers]) }.to change(Event, :count).by(2)
  66. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
  67. end
  68. it "creates one event per file" do
  69. @checker.options['output'] = 'event_per_file'
  70. expect { @checker.receive([with_headers]) }.to change(Event, :count).by(1)
  71. expect(Event.last.payload).to eq(@checker.options['data_key'] => [{"one"=>"1", "two"=>"2"}, {"one"=>"2", "two"=>"3"}])
  72. end
  73. end
  74. context "with_header" do
  75. it "works without headers" do
  76. @checker.options['with_header'] = 'false'
  77. expect { @checker.receive([without_headers]) }.to change(Event, :count).by(2)
  78. expect(Event.last.payload).to eq({@checker.options['data_key']=>["2", "3"]})
  79. end
  80. it "works without headers and event_per_file" do
  81. @checker.options['with_header'] = 'false'
  82. @checker.options['output'] = 'event_per_file'
  83. expect { @checker.receive([without_headers]) }.to change(Event, :count).by(1)
  84. expect(Event.last.payload).to eq({@checker.options['data_key']=>[['1', '2'], ["2", "3"]]})
  85. end
  86. end
  87. context "use_fields" do
  88. it "extracts the specified columns" do
  89. @checker.options['use_fields'] = 'one'
  90. expect { @checker.receive([with_headers]) }.to change(Event, :count).by(2)
  91. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2'})
  92. end
  93. end
  94. context "data_path" do
  95. it "can receive the CSV via a regular event" do
  96. @checker.options['data_path'] = '$.data'
  97. event = Event.new(payload: {'data' => "one,two\r\n1,2\r\n2,3"})
  98. expect { @checker.receive([event]) }.to change(Event, :count).by(2)
  99. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
  100. end
  101. end
  102. end
  103. context "handling different CSV formats" do
  104. it "works with windows line endings" do
  105. event = event_with_contents("one,two\r\n1,2\r\n2,3")
  106. expect { @checker.receive([event]) }.to change(Event, :count).by(2)
  107. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
  108. end
  109. it "works with OSX line endings" do
  110. event = event_with_contents("one,two\r1,2\r2,3")
  111. expect { @checker.receive([event]) }.to change(Event, :count).by(2)
  112. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
  113. end
  114. it "handles quotes correctly" do
  115. event = event_with_contents("\"one\",\"two\"\n1,2\n\"\"\"2, two\",3")
  116. expect { @checker.receive([event]) }.to change(Event, :count).by(2)
  117. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '"2, two', 'two' => '3'})
  118. end
  119. it "works with tab seperated csv" do
  120. event = event_with_contents("one\ttwo\r\n1\t2\r\n2\t3")
  121. @checker.options['separator'] = '\\t'
  122. expect { @checker.receive([event]) }.to change(Event, :count).by(2)
  123. expect(Event.last.payload).to eq(@checker.options['data_key'] => {'one' => '2', 'two' => '3'})
  124. end
  125. end
  126. context "serializing" do
  127. before(:each) do
  128. @checker.options['mode'] = 'serialize'
  129. @checker.options['data_path'] = '$.data'
  130. @checker.options['data_key'] = 'data'
  131. end
  132. it "writes headers when with_header is true" do
  133. event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
  134. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  135. expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n")
  136. end
  137. it "writes one row per received event" do
  138. event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
  139. event2 = Event.new(payload: { 'data' => {'key' => '2value', 'key2' => '2value2', 'key3' => '2value3'} })
  140. expect { @checker.receive([event, event2])}.to change(Event, :count).by(1)
  141. expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n\"2value\",\"2value2\",\"2value3\"\n")
  142. end
  143. it "accepts multiple rows per event" do
  144. event = Event.new(payload: { 'data' => [{'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'}, {'key' => '2value', 'key2' => '2value2', 'key3' => '2value3'}] })
  145. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  146. expect(Event.last.payload).to eq('data' => "\"key\",\"key2\",\"key3\"\n\"value\",\"value2\",\"value3\"\n\"2value\",\"2value2\",\"2value3\"\n")
  147. end
  148. it "does not write the headers when with_header is false" do
  149. @checker.options['with_header'] = 'false'
  150. event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
  151. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  152. expect(Event.last.payload).to eq('data' => "\"value\",\"value2\",\"value3\"\n")
  153. end
  154. it "only serialize the keys specified in use_fields" do
  155. @checker.options['use_fields'] = 'key2, key3'
  156. event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
  157. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  158. expect(Event.last.payload).to eq('data' => "\"key2\",\"key3\"\n\"value2\",\"value3\"\n")
  159. end
  160. it "respects the order of use_fields" do
  161. @checker.options['use_fields'] = 'key3, key'
  162. event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
  163. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  164. expect(Event.last.payload).to eq('data' => "\"key3\",\"key\"\n\"value3\",\"value\"\n")
  165. end
  166. it "respects use_fields and writes no header" do
  167. @checker.options['with_header'] = 'false'
  168. @checker.options['use_fields'] = 'key2, key3'
  169. event = Event.new(payload: { 'data' => {'key' => 'value', 'key2' => 'value2', 'key3' => 'value3'} })
  170. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  171. expect(Event.last.payload).to eq('data' => "\"value2\",\"value3\"\n")
  172. end
  173. context "arrays" do
  174. it "does not write a header" do
  175. @checker.options['with_header'] = 'false'
  176. event = Event.new(payload: { 'data' => ['value1', 'value2'] })
  177. event2 = Event.new(payload: { 'data' => ['value3', 'value4'] })
  178. expect { @checker.receive([event, event2])}.to change(Event, :count).by(1)
  179. expect(Event.last.payload).to eq('data' => "\"value1\",\"value2\"\n\"value3\",\"value4\"\n")
  180. end
  181. it "handles nested arrays" do
  182. event = Event.new(payload: { 'data' => [['value1', 'value2'], ['value3', 'value4']] })
  183. expect { @checker.receive([event])}.to change(Event, :count).by(1)
  184. expect(Event.last.payload).to eq('data' => "\"value1\",\"value2\"\n\"value3\",\"value4\"\n")
  185. end
  186. end
  187. end
  188. end
  189. context '#event_description' do
  190. it "works with event_per_row and headers" do
  191. @checker.options['output'] = 'event_per_row'
  192. @checker.options['with_header'] = 'true'
  193. description = @checker.event_description
  194. expect(description).not_to match(/\n\s+\[\n/)
  195. expect(description).to include(": {\n")
  196. end
  197. it "works with event_per_file and without headers" do
  198. @checker.options['output'] = 'event_per_file'
  199. @checker.options['with_header'] = 'false'
  200. description = @checker.event_description
  201. expect(description).to match(/\n\s+\[\n/)
  202. expect(description).not_to include(": {\n")
  203. end
  204. it "shows dummy CSV when in serialize mode" do
  205. @checker.options['mode'] = 'serialize'
  206. description = @checker.event_description
  207. expect(description).to include('"generated\",\"csv')
  208. end
  209. end
  210. end