website_agent_spec.rb 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. stub_request(:any, /xkcd\.com\/index$/).to_return(status: 301,
  11. headers: {
  12. 'Location' => 'http://xkcd.com/'
  13. })
  14. @valid_options = {
  15. 'name' => "XKCD",
  16. 'expected_update_period_in_days' => "2",
  17. 'type' => "html",
  18. 'url' => "http://xkcd.com",
  19. 'mode' => 'on_change',
  20. 'extract' => {
  21. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  22. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  23. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  24. }
  25. }
  26. @checker = Agents::WebsiteAgent.new(name: "xkcd", options: @valid_options, keep_events_for: 2.days)
  27. @checker.user = users(:bob)
  28. @checker.save!
  29. end
  30. it_behaves_like WebRequestConcern
  31. describe "validations" do
  32. before do
  33. expect(@checker).to be_valid
  34. end
  35. it "should validate the integer fields" do
  36. @checker.options['expected_update_period_in_days'] = "2"
  37. expect(@checker).to be_valid
  38. @checker.options['expected_update_period_in_days'] = "nonsense"
  39. expect(@checker).not_to be_valid
  40. end
  41. it 'should validate the http_success_codes fields' do
  42. @checker.options['http_success_codes'] = [404]
  43. expect(@checker).to be_valid
  44. @checker.options['http_success_codes'] = [404, 404]
  45. expect(@checker).not_to be_valid
  46. @checker.options['http_success_codes'] = [404, "422"]
  47. expect(@checker).to be_valid
  48. @checker.options['http_success_codes'] = [404.0]
  49. expect(@checker).not_to be_valid
  50. @checker.options['http_success_codes'] = ["not_a_code"]
  51. expect(@checker).not_to be_valid
  52. @checker.options['http_success_codes'] = []
  53. expect(@checker).to be_valid
  54. @checker.options['http_success_codes'] = ''
  55. expect(@checker).to be_valid
  56. @checker.options['http_success_codes'] = false
  57. expect(@checker).to be_valid
  58. end
  59. it "should validate uniqueness_look_back" do
  60. @checker.options['uniqueness_look_back'] = "nonsense"
  61. expect(@checker).not_to be_valid
  62. @checker.options['uniqueness_look_back'] = "2"
  63. expect(@checker).to be_valid
  64. end
  65. it "should validate mode" do
  66. @checker.options['mode'] = "nonsense"
  67. expect(@checker).not_to be_valid
  68. @checker.options['mode'] = "on_change"
  69. expect(@checker).to be_valid
  70. @checker.options['mode'] = "all"
  71. expect(@checker).to be_valid
  72. @checker.options['mode'] = ""
  73. expect(@checker).to be_valid
  74. end
  75. it "should validate the force_encoding option" do
  76. @checker.options['force_encoding'] = ''
  77. expect(@checker).to be_valid
  78. @checker.options['force_encoding'] = 'UTF-8'
  79. expect(@checker).to be_valid
  80. @checker.options['force_encoding'] = ['UTF-8']
  81. expect(@checker).not_to be_valid
  82. @checker.options['force_encoding'] = 'UTF-42'
  83. expect(@checker).not_to be_valid
  84. end
  85. context "in 'json' type" do
  86. it "should ensure that all extractions have a 'path'" do
  87. @checker.options['type'] = 'json'
  88. @checker.options['extract'] = {
  89. 'url' => { 'foo' => 'bar' },
  90. }
  91. expect(@checker).to_not be_valid
  92. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  93. @checker.options['type'] = 'json'
  94. @checker.options['extract'] = {
  95. 'url' => { 'path' => 'bar' },
  96. }
  97. expect(@checker).to be_valid
  98. end
  99. end
  100. context "in 'html' type" do
  101. it "should ensure that all extractions have either 'xpath' or 'css'" do
  102. @checker.options['type'] = 'html'
  103. @checker.options['extract'] = {
  104. 'url' => { 'array' => true },
  105. }
  106. expect(@checker).to_not be_valid
  107. expect(@checker.errors_on(:base)).to include(/When type is html or xml, all extractions must have a css or xpath attribute/) & include(/Unknown key "array"/)
  108. @checker.options['extract'] = {
  109. 'url' => { 'xpath' => '//bar', 'single_array' => true },
  110. }
  111. expect(@checker).to be_valid
  112. @checker.options['extract'] = {
  113. 'url' => { 'css' => 'bar' },
  114. }
  115. expect(@checker).to be_valid
  116. end
  117. end
  118. end
  119. describe "#check" do
  120. it "should check for changes (and update Event.expires_at)" do
  121. travel(-2.seconds) do
  122. expect { @checker.check }.to change { Event.count }.by(1)
  123. end
  124. event = Event.last
  125. expect { @checker.check }.not_to(change { Event.count })
  126. update_event = Event.last
  127. expect(update_event.expires_at).not_to eq(event.expires_at)
  128. end
  129. it "should always save events when in :all mode" do
  130. expect {
  131. @valid_options['mode'] = 'all'
  132. @checker.options = @valid_options
  133. @checker.check
  134. @checker.check
  135. }.to change { Event.count }.by(2)
  136. end
  137. it "should take uniqueness_look_back into account during deduplication" do
  138. @valid_options['mode'] = 'all'
  139. @checker.options = @valid_options
  140. @checker.check
  141. @checker.check
  142. event = Event.last
  143. event.payload = "{}"
  144. event.save
  145. expect {
  146. @valid_options['mode'] = 'on_change'
  147. @valid_options['uniqueness_look_back'] = 2
  148. @checker.options = @valid_options
  149. @checker.check
  150. }.not_to(change { Event.count })
  151. expect {
  152. @valid_options['mode'] = 'on_change'
  153. @valid_options['uniqueness_look_back'] = 1
  154. @checker.options = @valid_options
  155. @checker.check
  156. }.to change { Event.count }.by(1)
  157. end
  158. it "should log an error if the number of results for a set of extraction patterns differs" do
  159. @valid_options['extract']['url']['css'] = "div"
  160. @checker.options = @valid_options
  161. @checker.check
  162. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  163. end
  164. it "should accept an array for url" do
  165. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  166. @checker.options = @valid_options
  167. expect { @checker.save! }.not_to raise_error
  168. expect { @checker.check }.not_to raise_error
  169. end
  170. it "should parse events from all urls in array" do
  171. expect {
  172. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  173. @valid_options['mode'] = 'all'
  174. @checker.options = @valid_options
  175. @checker.check
  176. }.to change { Event.count }.by(2)
  177. end
  178. it "should follow unique rules when parsing array of urls" do
  179. expect {
  180. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  181. @checker.options = @valid_options
  182. @checker.check
  183. }.to change { Event.count }.by(1)
  184. end
  185. end
  186. describe 'http_success_codes' do
  187. it 'should allow scraping from a 404 result' do
  188. json = {
  189. 'response' => {
  190. 'version' => 2,
  191. 'title' => "hello!"
  192. }
  193. }
  194. zipped = ActiveSupport::Gzip.compress(json.to_json)
  195. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
  196. site = {
  197. 'name' => "Some JSON Response",
  198. 'expected_update_period_in_days' => "2",
  199. 'type' => "json",
  200. 'url' => "http://gzip.com",
  201. 'mode' => 'on_change',
  202. 'http_success_codes' => [404],
  203. 'extract' => {
  204. 'version' => { 'path' => 'response.version' },
  205. },
  206. # no unzip option
  207. }
  208. checker = Agents::WebsiteAgent.new(name: "Weather Site", options: site)
  209. checker.user = users(:bob)
  210. checker.save!
  211. checker.check
  212. event = Event.last
  213. expect(event.payload['version']).to eq(2)
  214. end
  215. end
  216. describe 'unzipping' do
  217. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  218. json = {
  219. 'response' => {
  220. 'version' => 2,
  221. 'title' => "hello!"
  222. }
  223. }
  224. zipped = ActiveSupport::Gzip.compress(json.to_json)
  225. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  226. site = {
  227. 'name' => "Some JSON Response",
  228. 'expected_update_period_in_days' => "2",
  229. 'type' => "json",
  230. 'url' => "http://gzip.com",
  231. 'mode' => 'on_change',
  232. 'extract' => {
  233. 'version' => { 'path' => 'response.version' },
  234. },
  235. # no unzip option
  236. }
  237. checker = Agents::WebsiteAgent.new(name: "Weather Site", options: site)
  238. checker.user = users(:bob)
  239. checker.save!
  240. checker.check
  241. event = Event.last
  242. expect(event.payload['version']).to eq(2)
  243. end
  244. it 'should unzip with unzip option' do
  245. json = {
  246. 'response' => {
  247. 'version' => 2,
  248. 'title' => "hello!"
  249. }
  250. }
  251. zipped = ActiveSupport::Gzip.compress(json.to_json)
  252. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  253. site = {
  254. 'name' => "Some JSON Response",
  255. 'expected_update_period_in_days' => "2",
  256. 'type' => "json",
  257. 'url' => "http://gzip.com",
  258. 'mode' => 'on_change',
  259. 'extract' => {
  260. 'version' => { 'path' => 'response.version' },
  261. },
  262. 'unzip' => 'gzip',
  263. }
  264. checker = Agents::WebsiteAgent.new(name: "Weather Site", options: site)
  265. checker.user = users(:bob)
  266. checker.save!
  267. checker.check
  268. event = Event.last
  269. expect(event.payload['version']).to eq(2)
  270. end
  271. it 'should either avoid or support a raw deflate stream (#1018)' do
  272. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ })
  273. .to_return(body: 'hello',
  274. status: 200)
  275. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ })
  276. .to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  277. headers: { 'Content-Encoding' => 'deflate' },
  278. status: 200)
  279. site = {
  280. 'name' => 'Some Response',
  281. 'expected_update_period_in_days' => '2',
  282. 'type' => 'text',
  283. 'url' => 'http://deflate',
  284. 'mode' => 'on_change',
  285. 'extract' => {
  286. 'content' => { 'regexp' => '.+', 'index' => 0 }
  287. }
  288. }
  289. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  290. checker.user = users(:bob)
  291. checker.save!
  292. expect {
  293. checker.check
  294. }.to change { Event.count }.by(1)
  295. event = Event.last
  296. expect(event.payload['content']).to eq('hello')
  297. end
  298. end
  299. describe 'encoding' do
  300. let :huginn do
  301. "\u{601d}\u{8003}"
  302. end
  303. let :odin do
  304. "\u{d3}\u{f0}inn"
  305. end
  306. let :url do
  307. 'http://encoding-test.example.com/'
  308. end
  309. let :content_type do
  310. raise 'define me'
  311. end
  312. let :body do
  313. raise 'define me'
  314. end
  315. before do
  316. stub_request(:any, url).to_return(
  317. headers: {
  318. 'Content-Type' => content_type,
  319. },
  320. body: body.b,
  321. status: 200
  322. )
  323. end
  324. let :options do
  325. {
  326. 'name' => 'Some agent',
  327. 'expected_update_period_in_days' => '2',
  328. 'url' => url,
  329. 'mode' => 'on_change',
  330. }
  331. end
  332. let :checker do
  333. Agents::WebsiteAgent.create!(name: 'Encoding Checker', options:) { |agent|
  334. agent.user = users(:bob)
  335. }
  336. end
  337. context 'with no encoding information' do
  338. context 'for a JSON file' do
  339. let :content_type do
  340. 'application/json'
  341. end
  342. let :body do
  343. {
  344. value: huginn,
  345. }.to_json
  346. end
  347. let :options do
  348. super().merge(
  349. 'type' => 'json',
  350. 'extract' => {
  351. 'value' => { 'path' => 'value' }
  352. }
  353. )
  354. end
  355. it 'should be assumed to be UTF-8' do
  356. expect { checker.check }.to change { Event.count }.by(1)
  357. event = Event.last
  358. expect(event.payload['value']).to eq(huginn)
  359. end
  360. end
  361. context 'for an HTML file' do
  362. let :content_type do
  363. 'text/html'
  364. end
  365. let :options do
  366. super().merge(
  367. 'type' => 'html',
  368. 'extract' => {
  369. 'value' => { 'css' => 'title', 'value' => 'string(.)' }
  370. }
  371. )
  372. end
  373. context 'with a charset in the header' do
  374. let :content_type do
  375. super() + '; charset=iso-8859-1'
  376. end
  377. let :body do
  378. <<~HTML.encode(Encoding::ISO_8859_1)
  379. <!DOCTYPE html>
  380. <title>#{odin}</title>
  381. <p>Hello, world.
  382. HTML
  383. end
  384. it 'should be detected from it' do
  385. expect { checker.check }.to change { Event.count }.by(1)
  386. event = Event.last
  387. expect(event.payload['value']).to eq(odin)
  388. end
  389. end
  390. context 'with no charset in the header' do
  391. let :body do
  392. <<~HTML.encode(Encoding::ISO_8859_1)
  393. <!DOCTYPE html>
  394. <meta charset="iso-8859-1">
  395. <title>#{odin}</title>
  396. <p>Hello, world.
  397. HTML
  398. end
  399. it 'should be detected from a meta tag' do
  400. expect { checker.check }.to change { Event.count }.by(1)
  401. event = Event.last
  402. expect(event.payload['value']).to eq(odin)
  403. end
  404. end
  405. context 'with charset desclarations both in the header and in the content' do
  406. let :content_type do
  407. super() + '; charset=iso-8859-1'
  408. end
  409. let :body do
  410. <<~HTML.encode(Encoding::ISO_8859_1)
  411. <!DOCTYPE html>
  412. <meta charset="UTF-8">
  413. <title>#{odin}</title>
  414. <p>Hello, world.
  415. HTML
  416. end
  417. it 'should be detected as that of the header' do
  418. expect { checker.check }.to change { Event.count }.by(1)
  419. event = Event.last
  420. expect(event.payload['value']).to eq(odin)
  421. end
  422. end
  423. end
  424. context 'for an XML file' do
  425. let :content_type do
  426. 'application/xml'
  427. end
  428. let :options do
  429. super().merge(
  430. 'type' => 'xml',
  431. 'extract' => {
  432. 'value' => { 'xpath' => '/root/message', 'value' => 'string(.)' }
  433. }
  434. )
  435. end
  436. context 'with a charset in the header' do
  437. let :content_type do
  438. super() + '; charset=euc-jp'
  439. end
  440. let :body do
  441. <<~XML.encode(Encoding::EUC_JP)
  442. <?xml version="1.0"?>
  443. <root>
  444. <message>#{huginn}</message>
  445. </root>
  446. XML
  447. end
  448. it 'should be detected from it' do
  449. expect { checker.check }.to change { Event.count }.by(1)
  450. event = Event.last
  451. expect(event.payload['value']).to eq(huginn)
  452. end
  453. end
  454. context 'with no charset in the header' do
  455. context 'but in XML declaration' do
  456. let :body do
  457. <<~XML.encode(Encoding::EUC_JP)
  458. <?xml version="1.0" encoding="euc-jp"?>
  459. <root>
  460. <message>#{huginn}</message>
  461. </root>
  462. XML
  463. end
  464. it 'should be detected' do
  465. expect { checker.check }.to change { Event.count }.by(1)
  466. event = Event.last
  467. expect(event.payload['value']).to eq(huginn)
  468. end
  469. end
  470. context 'but having a BOM' do
  471. let :body do
  472. <<~XML.encode(Encoding::UTF_16LE)
  473. \u{feff}<?xml version="1.0"?>
  474. <root>
  475. <message>#{huginn}</message>
  476. </root>
  477. XML
  478. end
  479. it 'should be detected' do
  480. expect { checker.check }.to change { Event.count }.by(1)
  481. event = Event.last
  482. expect(event.payload['value']).to eq(huginn)
  483. end
  484. end
  485. end
  486. end
  487. end
  488. context 'when force_encoding option is specified' do
  489. let :options do
  490. super().merge(
  491. 'force_encoding' => 'EUC-JP'
  492. )
  493. end
  494. context 'for a JSON file' do
  495. let :content_type do
  496. 'application/json'
  497. end
  498. let :body do
  499. {
  500. value: huginn,
  501. }.to_json.encode(Encoding::EUC_JP)
  502. end
  503. let :options do
  504. super().merge(
  505. 'type' => 'json',
  506. 'extract' => {
  507. 'value' => { 'path' => 'value' }
  508. }
  509. )
  510. end
  511. it 'should be forced' do
  512. expect { checker.check }.to change { Event.count }.by(1)
  513. event = Event.last
  514. expect(event.payload['value']).to eq(huginn)
  515. end
  516. end
  517. context 'for an HTML file' do
  518. let :content_type do
  519. 'text/html'
  520. end
  521. context 'with charset specified in the header and the content' do
  522. let :content_type do
  523. super() + '; charset=UTF-8'
  524. end
  525. let :body do
  526. <<~HTML.encode(Encoding::EUC_JP)
  527. <!DOCTYPE html>
  528. <meta charset="UTF-8"/>
  529. <title>#{huginn}</title>
  530. <p>Hello, world.
  531. HTML
  532. end
  533. let :options do
  534. super().merge(
  535. 'type' => 'html',
  536. 'extract' => {
  537. 'value' => { 'css' => 'title', 'value' => 'string(.)' }
  538. }
  539. )
  540. end
  541. it 'should still be forced' do
  542. expect { checker.check }.to change { Event.count }.by(1)
  543. event = Event.last
  544. expect(event.payload['value']).to eq(huginn)
  545. end
  546. end
  547. end
  548. end
  549. end
  550. describe '#working?' do
  551. it 'checks if events have been received within the expected receive period' do
  552. stubbed_time = Time.now
  553. allow(Time).to receive(:now) { stubbed_time }
  554. expect(@checker).not_to be_working # No events created
  555. @checker.check
  556. expect(@checker.reload).to be_working # Just created events
  557. @checker.error "oh no!"
  558. expect(@checker.reload).not_to be_working # There is a recent error
  559. stubbed_time = 20.minutes.from_now
  560. @checker.events.delete_all
  561. @checker.check
  562. expect(@checker.reload).to be_working # There is a newer event now
  563. stubbed_time = 2.days.from_now
  564. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  565. end
  566. end
  567. describe "parsing" do
  568. it "parses CSS" do
  569. @checker.check
  570. event = Event.last
  571. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  572. expect(event.payload['title']).to eq("Evolving")
  573. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  574. end
  575. it "parses XPath" do
  576. @valid_options['extract'].each { |key, value|
  577. value.delete('css')
  578. value['xpath'] = "//*[@id='comic']//img"
  579. }
  580. @checker.options = @valid_options
  581. @checker.check
  582. event = Event.last
  583. expect(event.payload).to match(
  584. 'url' => 'http://imgs.xkcd.com/comics/evolving.png',
  585. 'title' => 'Evolving',
  586. 'hovertext' => /^Biologists play reverse/
  587. )
  588. end
  589. it "should exclude hidden keys" do
  590. @valid_options['extract']['hovertext']['hidden'] = true
  591. @checker.options = @valid_options
  592. @checker.check
  593. event = Event.last
  594. expect(event.payload).to match(
  595. 'url' => 'http://imgs.xkcd.com/comics/evolving.png',
  596. 'title' => 'Evolving'
  597. )
  598. end
  599. it "should return an integer value if XPath evaluates to one" do
  600. rel_site = {
  601. 'name' => "XKCD",
  602. 'expected_update_period_in_days' => 2,
  603. 'type' => "html",
  604. 'url' => "http://xkcd.com",
  605. 'mode' => "on_change",
  606. 'extract' => {
  607. 'num_links' => { 'css' => "#comicLinks", 'value' => "count(./a)" }
  608. }
  609. }
  610. rel = Agents::WebsiteAgent.new(name: "xkcd", options: rel_site)
  611. rel.user = users(:bob)
  612. rel.save!
  613. rel.check
  614. event = Event.last
  615. expect(event.payload['num_links']).to eq("9")
  616. end
  617. it "should return everything concatenated if XPath returns many nodes" do
  618. rel_site = {
  619. 'name' => "XKCD",
  620. 'expected_update_period_in_days' => 2,
  621. 'type' => "html",
  622. 'url' => "http://xkcd.com",
  623. 'mode' => "on_change",
  624. 'extract' => {
  625. 'slogan' => { 'css' => "#slogan", 'value' => ".//text()" }
  626. }
  627. }
  628. rel = Agents::WebsiteAgent.new(name: "xkcd", options: rel_site)
  629. rel.user = users(:bob)
  630. rel.save!
  631. rel.check
  632. event = Event.last
  633. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, &amp; language.")
  634. end
  635. it "should return an array if XPath returns many nodes and the raw option is specified" do
  636. rel_site = {
  637. 'name' => "XKCD",
  638. 'expected_update_period_in_days' => 2,
  639. 'type' => "html",
  640. 'url' => "http://xkcd.com",
  641. 'mode' => "on_change",
  642. 'extract' => {
  643. 'slogan' => { 'css' => "#slogan", 'value' => ".//text()", 'raw' => true },
  644. 'slogan_length' => { 'css' => "#slogan", 'value' => "string-length(.)", 'raw' => true },
  645. }
  646. }
  647. rel = Agents::WebsiteAgent.new(name: "xkcd", options: rel_site)
  648. rel.user = users(:bob)
  649. rel.save!
  650. rel.check
  651. event = Event.last
  652. expect(event.payload['slogan']).to eq(["A webcomic of romance,", " sarcasm, math, &amp; language."])
  653. expect(event.payload['slogan_length']).to eq(49)
  654. end
  655. it "should return a string value returned by XPath" do
  656. rel_site = {
  657. 'name' => "XKCD",
  658. 'expected_update_period_in_days' => 2,
  659. 'type' => "html",
  660. 'url' => "http://xkcd.com",
  661. 'mode' => "on_change",
  662. 'extract' => {
  663. 'slogan' => { 'css' => "#slogan", 'value' => "string(.)" },
  664. 'slogan_length' => { 'css' => "#slogan", 'value' => "string-length(.)" },
  665. }
  666. }
  667. rel = Agents::WebsiteAgent.new(name: "xkcd", options: rel_site)
  668. rel.user = users(:bob)
  669. rel.save!
  670. rel.check
  671. event = Event.last
  672. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, & language.")
  673. expect(event.payload['slogan_length']).to eq("49")
  674. end
  675. it "should interpolate _response_" do
  676. @valid_options['url'] = 'http://xkcd.com/index'
  677. @valid_options['extract']['response_info'] =
  678. @valid_options['extract']['url'].merge(
  679. 'value' => '{{ "The reponse from " | append:_response_.url | append:" was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." | to_xpath }}'
  680. )
  681. @valid_options['extract']['original_url'] =
  682. @valid_options['extract']['url'].merge(
  683. 'value' => '{{ _url_ | to_xpath }}'
  684. )
  685. @checker.options = @valid_options
  686. @checker.check
  687. event = Event.last
  688. expect(event.payload['response_info']).to eq('The reponse from http://xkcd.com/ was 200 OK.')
  689. expect(event.payload['original_url']).to eq('http://xkcd.com/index')
  690. end
  691. it "should format and merge values in template after extraction" do
  692. @valid_options['extract']['hovertext']['hidden'] = true
  693. @valid_options['template'] = {
  694. 'title' => '{{title | upcase}}',
  695. 'summary' => '{{title}}: {{hovertext | truncate: 20}}',
  696. }
  697. @checker.options = @valid_options
  698. @checker.check
  699. expect(@checker.event_keys).to contain_exactly('url', 'title', 'summary')
  700. expect(@checker.event_description.scan(/"(\w+)": "\.\.\."/).flatten).to contain_exactly('url', 'title',
  701. 'summary')
  702. event = Event.last
  703. expect(event.payload).to eq({
  704. 'title' => 'EVOLVING',
  705. 'url' => 'http://imgs.xkcd.com/comics/evolving.png',
  706. 'summary' => 'Evolving: Biologists play r...',
  707. })
  708. end
  709. describe "XML" do
  710. before do
  711. stub_request(:any, /github_rss/).to_return(
  712. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  713. status: 200
  714. )
  715. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  716. 'name' => 'GitHub',
  717. 'expected_update_period_in_days' => '2',
  718. 'type' => 'xml',
  719. 'url' => 'http://example.com/github_rss.atom',
  720. 'mode' => 'on_change',
  721. 'extract' => {
  722. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  723. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  724. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  725. 'page_title' => { 'xpath' => '/feed/title', 'value' => 'string(.)', 'repeat' => true }
  726. }
  727. }, keep_events_for: 2.days)
  728. @checker.user = users(:bob)
  729. @checker.save!
  730. end
  731. it "works with XPath" do
  732. expect {
  733. @checker.check
  734. }.to change { Event.count }.by(20)
  735. events = Event.last(20)
  736. expect(events.size).to eq(20)
  737. expect(events.map { |event| event.payload['page_title'] }.uniq).to eq(['Recent Commits to huginn:master'])
  738. event = events.last
  739. expect(event.payload['title']).to eq('Shift to dev group')
  740. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  741. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  742. end
  743. it "works with XPath with namespaces unstripped" do
  744. @checker.options['use_namespaces'] = 'true'
  745. @checker.save!
  746. expect {
  747. @checker.check
  748. }.to change { Event.count }.by(0)
  749. @checker.options['extract'] = {
  750. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  751. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  752. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  753. }
  754. @checker.save!
  755. expect {
  756. @checker.check
  757. }.to change { Event.count }.by(20)
  758. event = Event.last
  759. expect(event.payload['title']).to eq('Shift to dev group')
  760. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  761. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  762. end
  763. it "works with CSS selectors" do
  764. @checker.options['extract'] = {
  765. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  766. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  767. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  768. }
  769. @checker.save!
  770. expect {
  771. @checker.check
  772. }.to change { Event.count }.by(20)
  773. event = Event.last
  774. expect(event.payload['title']).to be_empty
  775. expect(event.payload['thumbnail']).to be_empty
  776. @checker.options['extract'] = {
  777. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  778. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  779. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  780. }
  781. @checker.save!
  782. expect {
  783. @checker.check
  784. }.to change { Event.count }.by(20)
  785. event = Event.last
  786. expect(event.payload['title']).to eq('Shift to dev group')
  787. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  788. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  789. end
  790. it "works with CSS selectors with namespaces stripped" do
  791. @checker.options['extract'] = {
  792. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  793. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  794. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  795. }
  796. @checker.options['use_namespaces'] = 'false'
  797. @checker.save!
  798. expect {
  799. @checker.check
  800. }.to change { Event.count }.by(20)
  801. event = Event.last
  802. expect(event.payload['title']).to eq('Shift to dev group')
  803. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  804. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  805. end
  806. end
  807. describe "XML with cdata" do
  808. before do
  809. stub_request(:any, /cdata_rss/).to_return(
  810. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  811. status: 200
  812. )
  813. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  814. 'name' => 'CDATA',
  815. 'expected_update_period_in_days' => '2',
  816. 'type' => 'xml',
  817. 'url' => 'http://example.com/cdata_rss.atom',
  818. 'mode' => 'on_change',
  819. 'extract' => {
  820. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => 'string(.)' },
  821. 'title' => { 'xpath' => '/feed/entry/title', 'value' => 'string(.)' },
  822. 'content' => { 'xpath' => '/feed/entry/content', 'value' => 'string(.)' },
  823. }
  824. }, keep_events_for: 2.days)
  825. @checker.user = users(:bob)
  826. @checker.save!
  827. end
  828. it "works with XPath" do
  829. expect {
  830. @checker.check
  831. }.to change { Event.count }.by(10)
  832. event = Event.last
  833. expect(event.payload['author']).to eq('bill98')
  834. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  835. expect(event.payload['content']).to start_with('Can I ')
  836. end
  837. end
  838. describe "JSON" do
  839. it "works with paths" do
  840. json = {
  841. 'response' => {
  842. 'version' => 2,
  843. 'title' => "hello!"
  844. }
  845. }
  846. stub_request(:any, /json-site/).to_return(body: json.to_json, status: 200)
  847. site = {
  848. 'name' => "Some JSON Response",
  849. 'expected_update_period_in_days' => "2",
  850. 'type' => "json",
  851. 'url' => "http://json-site.com",
  852. 'mode' => 'on_change',
  853. 'extract' => {
  854. 'version' => { 'path' => "response.version" },
  855. 'title' => { 'path' => "response.title" }
  856. }
  857. }
  858. checker = Agents::WebsiteAgent.new(name: "Weather Site", options: site)
  859. checker.user = users(:bob)
  860. checker.save!
  861. expect(checker.event_keys).to contain_exactly('version', 'title')
  862. expect(checker.event_description.scan(/"(\w+)": "\.\.\."/).flatten).to contain_exactly('version', 'title')
  863. checker.check
  864. event = Event.last
  865. expect(event.payload['version']).to eq(2)
  866. expect(event.payload['title']).to eq("hello!")
  867. end
  868. it "can handle arrays" do
  869. json = {
  870. 'response' => {
  871. 'status' => 'ok',
  872. 'data' => [
  873. { 'title' => "first", 'version' => 2 },
  874. { 'title' => "second", 'version' => 2.5 }
  875. ]
  876. }
  877. }
  878. stub_request(:any, /json-site/).to_return(body: json.to_json, status: 200)
  879. site = {
  880. 'name' => "Some JSON Response",
  881. 'expected_update_period_in_days' => "2",
  882. 'type' => "json",
  883. 'url' => "http://json-site.com",
  884. 'mode' => 'on_change',
  885. 'extract' => {
  886. 'title' => { 'path' => "response.data[*].title" },
  887. 'version' => { 'path' => "response.data[*].version" },
  888. 'status' => { 'path' => "response.status", 'repeat' => true },
  889. }
  890. }
  891. checker = Agents::WebsiteAgent.new(name: "Weather Site", options: site)
  892. checker.user = users(:bob)
  893. checker.save!
  894. expect {
  895. checker.check
  896. }.to change { Event.count }.by(2)
  897. (event2, event1) = Event.last(2)
  898. expect(event1.payload['status']).to eq('ok')
  899. expect(event1.payload['version']).to eq(2.5)
  900. expect(event1.payload['title']).to eq("second")
  901. expect(event2.payload['status']).to eq('ok')
  902. expect(event2.payload['version']).to eq(2)
  903. expect(event2.payload['title']).to eq("first")
  904. end
  905. it "stores the whole object if :extract is not specified" do
  906. json = {
  907. 'response' => {
  908. 'version' => 2,
  909. 'title' => "hello!"
  910. }
  911. }
  912. stub_request(:any, /json-site/).to_return(body: json.to_json, status: 200)
  913. site = {
  914. 'name' => "Some JSON Response",
  915. 'expected_update_period_in_days' => "2",
  916. 'type' => "json",
  917. 'url' => "http://json-site.com",
  918. 'mode' => 'on_change'
  919. }
  920. checker = Agents::WebsiteAgent.new(name: "Weather Site", options: site)
  921. checker.user = users(:bob)
  922. checker.save!
  923. expect(checker.event_keys).to be_nil
  924. expect(checker.event_description).to match(/Events will be the raw JSON returned by the URL/)
  925. checker.check
  926. event = Event.last
  927. expect(event.payload['response']['version']).to eq(2)
  928. expect(event.payload['response']['title']).to eq("hello!")
  929. end
  930. end
  931. describe "text parsing" do
  932. before do
  933. stub_request(:any, /text-site/).to_return(body: <<~EOF, status: 200)
  934. VERSION 1
  935. water: wet
  936. fire: hot
  937. EOF
  938. site = {
  939. 'name' => 'Some Text Response',
  940. 'expected_update_period_in_days' => '2',
  941. 'type' => 'text',
  942. 'url' => 'http://text-site.com',
  943. 'mode' => 'on_change',
  944. 'extract' => {
  945. 'version' => { 'regexp' => '^VERSION (.+)$', index: 1, repeat: true },
  946. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  947. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  948. }
  949. }
  950. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  951. @checker.user = users(:bob)
  952. @checker.save!
  953. end
  954. it "works with regexp with named capture" do
  955. @checker.options = @checker.options.deep_merge('extract' => {
  956. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  957. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  958. })
  959. expect {
  960. @checker.check
  961. }.to change { Event.count }.by(2)
  962. event1, event2 = Event.last(2)
  963. expect(event1.payload['version']).to eq('1')
  964. expect(event1.payload['word']).to eq('water')
  965. expect(event1.payload['property']).to eq('wet')
  966. expect(event2.payload['version']).to eq('1')
  967. expect(event2.payload['word']).to eq('fire')
  968. expect(event2.payload['property']).to eq('hot')
  969. end
  970. it "works with regexp" do
  971. expect {
  972. @checker.check
  973. }.to change { Event.count }.by(2)
  974. event1, event2 = Event.last(2)
  975. expect(event1.payload['version']).to eq('1')
  976. expect(event1.payload['word']).to eq('water')
  977. expect(event1.payload['property']).to eq('wet')
  978. expect(event2.payload['version']).to eq('1')
  979. expect(event2.payload['word']).to eq('fire')
  980. expect(event2.payload['property']).to eq('hot')
  981. end
  982. end
  983. end
  984. describe "#receive" do
  985. describe "with a url or url_from_event" do
  986. before do
  987. @event = Event.new
  988. @event.agent = agents(:bob_rain_notifier_agent)
  989. @event.payload = {
  990. 'url' => 'http://foo.com',
  991. 'link' => 'Random'
  992. }
  993. @event.save!
  994. end
  995. it "should use url_from_event as the url to scrape" do
  996. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  997. @checker.options = @valid_options.merge(
  998. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  999. )
  1000. @checker.receive([@event])
  1001. expect(stub).to have_been_requested
  1002. end
  1003. it "should use the Agent's `url` option if url_from_event is not set" do
  1004. expect {
  1005. @checker.options = @valid_options
  1006. @checker.receive([@event])
  1007. }.to change { Event.count }.by(1)
  1008. end
  1009. it "should allow url_from_event to be an array of urls" do
  1010. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  1011. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Ffoo.com')
  1012. @checker.options = @valid_options.merge(
  1013. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}',
  1014. 'http://google.org/?url={{url | uri_escape}}']
  1015. )
  1016. @checker.receive([@event])
  1017. expect(stub1).to have_been_requested
  1018. expect(stub2).to have_been_requested
  1019. end
  1020. it "should interpolate values from incoming event payload" do
  1021. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  1022. status: 200)
  1023. expect {
  1024. @valid_options['url_from_event'] = '{{ url }}'
  1025. @valid_options['extract'] = {
  1026. 'from' => {
  1027. 'xpath' => '*[1]',
  1028. 'value' => '{{url | to_xpath}}'
  1029. },
  1030. 'to' => {
  1031. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  1032. 'value' => '@href'
  1033. },
  1034. }
  1035. @checker.options = @valid_options
  1036. @checker.receive([@event])
  1037. }.to change { Event.count }.by(1)
  1038. expect(Event.last.payload).to eq({
  1039. 'from' => 'http://foo.com',
  1040. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  1041. })
  1042. end
  1043. it "should use the options url if no url is in the event payload, and `url_from_event` is not provided" do
  1044. @checker.options['mode'] = 'merge'
  1045. @event.payload.delete('url')
  1046. expect {
  1047. @checker.receive([@event])
  1048. }.to change { Event.count }.by(1)
  1049. expect(Event.last.payload['title']).to eq('Evolving')
  1050. expect(Event.last.payload['link']).to eq('Random')
  1051. end
  1052. it "should interpolate values from incoming event payload and _response_" do
  1053. @event.payload['title'] = 'XKCD'
  1054. expect {
  1055. @valid_options['extract'] = {
  1056. 'response_info' => @valid_options['extract']['url'].merge(
  1057. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  1058. )
  1059. }
  1060. @checker.options = @valid_options
  1061. @checker.receive([@event])
  1062. }.to change { Event.count }.by(1)
  1063. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  1064. end
  1065. it "should support merging of events" do
  1066. expect {
  1067. @checker.options = @valid_options
  1068. @checker.options[:mode] = "merge"
  1069. @checker.receive([@event])
  1070. }.to change { Event.count }.by(1)
  1071. last_payload = Event.last.payload
  1072. expect(last_payload['link']).to eq('Random')
  1073. end
  1074. it 'returns an array of found nodes when the array extract_option is true' do
  1075. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  1076. status: 200)
  1077. @checker.options['extract']['nav_links'] =
  1078. { 'css' => '#topLeft li', 'value' => 'normalize-space(.)', 'single_array' => 'true' }
  1079. expect {
  1080. @checker.receive([@event])
  1081. }.to change { Event.count }.by(1)
  1082. expect(Event.last.payload['nav_links']).to eq(["Archive", "What If?", "Blag", "Store", "About"])
  1083. end
  1084. it "should set the inbound_event when logging errors" do
  1085. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  1086. status: 200)
  1087. @valid_options['extract'] = {
  1088. 'url' => { 'css' => "div", 'value' => "@src" },
  1089. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  1090. }
  1091. @checker.options = @valid_options
  1092. @checker.receive([@event])
  1093. log = @checker.logs.first
  1094. expect(log.message).to match(/Got an uneven number of matches/)
  1095. expect(log.inbound_event).to eq(@event)
  1096. end
  1097. end
  1098. describe "with a data_from_event" do
  1099. describe "with json data" do
  1100. before do
  1101. @event = Event.new
  1102. @event.agent = agents(:bob_rain_notifier_agent)
  1103. @event.payload = {
  1104. 'something' => 'some value',
  1105. 'some_object' => {
  1106. 'some_data' => { hello: 'world', href: '/world' }.to_json
  1107. },
  1108. url: 'http://example.com/',
  1109. 'headers' => {
  1110. 'Content-Type' => 'application/json'
  1111. },
  1112. 'status' => 200
  1113. }
  1114. @event.save!
  1115. @checker.options = @valid_options.merge(
  1116. 'type' => 'json',
  1117. 'data_from_event' => '{{ some_object.some_data }}',
  1118. 'extract' => {
  1119. 'value' => { 'path' => 'hello' },
  1120. 'url' => { 'path' => 'href' },
  1121. },
  1122. 'template' => {
  1123. 'value' => '{{ value }}',
  1124. 'url' => '{{ url | to_uri: _response_.url }}',
  1125. 'type' => '{{ _response_.headers.content_type }}',
  1126. 'status' => '{{ _response_.status | as_object }}'
  1127. }
  1128. )
  1129. end
  1130. it "should extract from the event data in the incoming event payload" do
  1131. expect {
  1132. @checker.receive([@event])
  1133. }.to change { Event.count }.by(1)
  1134. expect(@checker.events.last.payload).to eq({ 'value' => 'world', 'url' => 'http://example.com/world',
  1135. 'type' => 'application/json', 'status' => 200 })
  1136. end
  1137. it "should support merge mode" do
  1138. @checker.options['mode'] = "merge"
  1139. expect {
  1140. @checker.receive([@event])
  1141. }.to change { Event.count }.by(1)
  1142. expect(@checker.events.last.payload).to eq(@event.payload.merge('value' => 'world',
  1143. 'url' => 'http://example.com/world', 'type' => 'application/json', 'status' => 200))
  1144. end
  1145. it "should convert headers and status in the event data properly" do
  1146. @event.payload[:status] = '201'
  1147. @event.payload[:headers] = [['Content-Type', 'application/rss+xml']]
  1148. expect {
  1149. @checker.receive([@event])
  1150. }.to change { Event.count }.by(1)
  1151. expect(@checker.events.last.payload).to eq({ 'value' => 'world', 'url' => 'http://example.com/world',
  1152. 'type' => 'application/rss+xml', 'status' => 201 })
  1153. end
  1154. it "should ignore inconvertible headers and status in the event data" do
  1155. @event.payload[:status] = 'ok'
  1156. @event.payload[:headers] = ['Content-Type', 'Content-Length']
  1157. expect {
  1158. @checker.receive([@event])
  1159. }.to change { Event.count }.by(1)
  1160. expect(@checker.events.last.payload).to eq({ 'value' => 'world', 'url' => 'http://example.com/world',
  1161. 'type' => '', 'status' => nil })
  1162. end
  1163. it "should output an error when nothing can be found at the path" do
  1164. @checker.options = @checker.options.merge(
  1165. 'data_from_event' => '{{ some_object.mistake }}'
  1166. )
  1167. expect {
  1168. @checker.receive([@event])
  1169. }.to_not(change { Event.count })
  1170. expect(@checker.logs.last.message).to match(/No data was found in the Event payload using the template {{ some_object\.mistake }}/)
  1171. end
  1172. it "should output an error when the data cannot be parsed" do
  1173. @event.update_attribute :payload, @event.payload.merge('some_object' => { 'some_data' => '{invalid json' })
  1174. expect {
  1175. @checker.receive([@event])
  1176. }.to_not(change { Event.count })
  1177. expect(@checker.logs.last.message).to match(/Error when handling event data:/)
  1178. end
  1179. end
  1180. describe "with HTML data" do
  1181. before do
  1182. @event = Event.new
  1183. @event.agent = agents(:bob_rain_notifier_agent)
  1184. @event.payload = {
  1185. 'url' => 'http://xkcd.com',
  1186. 'some_object' => {
  1187. 'some_data' => "<div><span class='title'>Title!</span><span class='body'>Body!</span></div>"
  1188. }
  1189. }
  1190. @event.save!
  1191. @checker.options = @valid_options.merge(
  1192. 'type' => 'html',
  1193. 'data_from_event' => '{{ some_object.some_data }}',
  1194. 'extract' => {
  1195. 'title' => { 'css' => ".title", 'value' => "string(.)" },
  1196. 'body' => { 'css' => "div span.body", 'value' => "string(.)" }
  1197. }
  1198. )
  1199. end
  1200. it "should extract from the event data in the incoming event payload" do
  1201. expect {
  1202. @checker.receive([@event])
  1203. }.to change { Event.count }.by(1)
  1204. expect(@checker.events.last.payload).to eq({ 'title' => 'Title!', 'body' => 'Body!' })
  1205. end
  1206. end
  1207. end
  1208. end
  1209. end
  1210. describe "checking with http basic auth" do
  1211. before do
  1212. @valid_options = {
  1213. 'name' => "XKCD",
  1214. 'expected_update_period_in_days' => "2",
  1215. 'type' => "html",
  1216. 'url' => "http://www.example.com",
  1217. 'mode' => 'on_change',
  1218. 'extract' => {
  1219. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  1220. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  1221. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  1222. },
  1223. 'basic_auth' => "user:pass"
  1224. }
  1225. @checker = Agents::WebsiteAgent.new(name: "auth", options: @valid_options)
  1226. @checker.user = users(:bob)
  1227. @checker.save!
  1228. stub_request(:any, "www.example.com")
  1229. .with(basic_auth: ['user', 'pass'])
  1230. .to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  1231. end
  1232. describe "#check" do
  1233. it "should check for changes" do
  1234. expect { @checker.check }.to change { Event.count }.by(1)
  1235. expect { @checker.check }.not_to(change { Event.count })
  1236. end
  1237. end
  1238. end
  1239. describe "checking with headers" do
  1240. before do
  1241. stub_request(:any, /example/)
  1242. .with(headers: { 'foo' => 'bar' })
  1243. .to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  1244. @valid_options = {
  1245. 'name' => "XKCD",
  1246. 'expected_update_period_in_days' => "2",
  1247. 'type' => "html",
  1248. 'url' => "http://www.example.com",
  1249. 'mode' => 'on_change',
  1250. 'headers' => { 'foo' => 'bar' },
  1251. 'extract' => {
  1252. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  1253. }
  1254. }
  1255. @checker = Agents::WebsiteAgent.new(name: "ua", options: @valid_options)
  1256. @checker.user = users(:bob)
  1257. @checker.save!
  1258. end
  1259. describe "#check" do
  1260. it "should check for changes" do
  1261. expect { @checker.check }.to change { Event.count }.by(1)
  1262. end
  1263. end
  1264. end
  1265. describe "checking urls" do
  1266. before do
  1267. stub_request(:any, /example/)
  1268. .to_return(body: File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), status: 200)
  1269. @valid_options = {
  1270. 'name' => "Url Test",
  1271. 'expected_update_period_in_days' => "2",
  1272. 'type' => "html",
  1273. 'url' => "http://www.example.com",
  1274. 'mode' => 'all',
  1275. 'extract' => {
  1276. 'url' => { 'css' => "a", 'value' => "@href" },
  1277. },
  1278. 'template' => {
  1279. 'url' => '{{ url | to_uri }}',
  1280. }
  1281. }
  1282. @checker = Agents::WebsiteAgent.new(name: "ua", options: @valid_options)
  1283. @checker.user = users(:bob)
  1284. @checker.save!
  1285. end
  1286. describe "#check" do
  1287. before do
  1288. expect { @checker.check }.to change { Event.count }.by(8)
  1289. @events = Event.last(8)
  1290. end
  1291. it "should check hostname" do
  1292. event = @events[0]
  1293. expect(event.payload['url']).to eq("http://google.com")
  1294. end
  1295. it "should check unescaped query" do
  1296. event = @events[1]
  1297. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  1298. end
  1299. it "should check properly escaped query" do
  1300. event = @events[2]
  1301. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  1302. end
  1303. it "should check unescaped unicode url" do
  1304. event = @events[3]
  1305. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1306. end
  1307. it "should check unescaped unicode query" do
  1308. event = @events[4]
  1309. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1310. end
  1311. it "should check properly escaped unicode url" do
  1312. event = @events[5]
  1313. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1314. end
  1315. it "should check properly escaped unicode query" do
  1316. event = @events[6]
  1317. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1318. end
  1319. it "should check url with unescaped brackets in the path component" do
  1320. event = @events[7]
  1321. expect(event.payload['url']).to eq("http://[::1]/path%5B%5D?query[]=foo")
  1322. end
  1323. end
  1324. end
  1325. end