website_agent_spec.rb 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. stub_request(:any, /xkcd\.com\/index$/).to_return(status: 301,
  11. headers: {
  12. 'Location' => 'http://xkcd.com/'
  13. })
  14. @valid_options = {
  15. 'name' => "XKCD",
  16. 'expected_update_period_in_days' => "2",
  17. 'type' => "html",
  18. 'url' => "http://xkcd.com",
  19. 'mode' => 'on_change',
  20. 'extract' => {
  21. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  22. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  23. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  24. }
  25. }
  26. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2.days)
  27. @checker.user = users(:bob)
  28. @checker.save!
  29. end
  30. it_behaves_like WebRequestConcern
  31. describe "validations" do
  32. before do
  33. expect(@checker).to be_valid
  34. end
  35. it "should validate the integer fields" do
  36. @checker.options['expected_update_period_in_days'] = "2"
  37. expect(@checker).to be_valid
  38. @checker.options['expected_update_period_in_days'] = "nonsense"
  39. expect(@checker).not_to be_valid
  40. end
  41. it 'should validate the http_success_codes fields' do
  42. @checker.options['http_success_codes'] = [404]
  43. expect(@checker).to be_valid
  44. @checker.options['http_success_codes'] = [404, 404]
  45. expect(@checker).not_to be_valid
  46. @checker.options['http_success_codes'] = [404, "422"]
  47. expect(@checker).to be_valid
  48. @checker.options['http_success_codes'] = [404.0]
  49. expect(@checker).not_to be_valid
  50. @checker.options['http_success_codes'] = ["not_a_code"]
  51. expect(@checker).not_to be_valid
  52. @checker.options['http_success_codes'] = []
  53. expect(@checker).to be_valid
  54. @checker.options['http_success_codes'] = ''
  55. expect(@checker).to be_valid
  56. @checker.options['http_success_codes'] = false
  57. expect(@checker).to be_valid
  58. end
  59. it "should validate uniqueness_look_back" do
  60. @checker.options['uniqueness_look_back'] = "nonsense"
  61. expect(@checker).not_to be_valid
  62. @checker.options['uniqueness_look_back'] = "2"
  63. expect(@checker).to be_valid
  64. end
  65. it "should validate mode" do
  66. @checker.options['mode'] = "nonsense"
  67. expect(@checker).not_to be_valid
  68. @checker.options['mode'] = "on_change"
  69. expect(@checker).to be_valid
  70. @checker.options['mode'] = "all"
  71. expect(@checker).to be_valid
  72. @checker.options['mode'] = ""
  73. expect(@checker).to be_valid
  74. end
  75. it "should validate the force_encoding option" do
  76. @checker.options['force_encoding'] = ''
  77. expect(@checker).to be_valid
  78. @checker.options['force_encoding'] = 'UTF-8'
  79. expect(@checker).to be_valid
  80. @checker.options['force_encoding'] = ['UTF-8']
  81. expect(@checker).not_to be_valid
  82. @checker.options['force_encoding'] = 'UTF-42'
  83. expect(@checker).not_to be_valid
  84. end
  85. context "in 'json' type" do
  86. it "should ensure that all extractions have a 'path'" do
  87. @checker.options['type'] = 'json'
  88. @checker.options['extract'] = {
  89. 'url' => { 'foo' => 'bar' },
  90. }
  91. expect(@checker).to_not be_valid
  92. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  93. @checker.options['type'] = 'json'
  94. @checker.options['extract'] = {
  95. 'url' => { 'path' => 'bar' },
  96. }
  97. expect(@checker).to be_valid
  98. end
  99. end
  100. end
  101. describe "#check" do
  102. it "should check for changes (and update Event.expires_at)" do
  103. travel(-2.seconds) do
  104. expect { @checker.check }.to change { Event.count }.by(1)
  105. end
  106. event = Event.last
  107. expect { @checker.check }.not_to change { Event.count }
  108. update_event = Event.last
  109. expect(update_event.expires_at).not_to eq(event.expires_at)
  110. end
  111. it "should always save events when in :all mode" do
  112. expect {
  113. @valid_options['mode'] = 'all'
  114. @checker.options = @valid_options
  115. @checker.check
  116. @checker.check
  117. }.to change { Event.count }.by(2)
  118. end
  119. it "should take uniqueness_look_back into account during deduplication" do
  120. @valid_options['mode'] = 'all'
  121. @checker.options = @valid_options
  122. @checker.check
  123. @checker.check
  124. event = Event.last
  125. event.payload = "{}"
  126. event.save
  127. expect {
  128. @valid_options['mode'] = 'on_change'
  129. @valid_options['uniqueness_look_back'] = 2
  130. @checker.options = @valid_options
  131. @checker.check
  132. }.not_to change { Event.count }
  133. expect {
  134. @valid_options['mode'] = 'on_change'
  135. @valid_options['uniqueness_look_back'] = 1
  136. @checker.options = @valid_options
  137. @checker.check
  138. }.to change { Event.count }.by(1)
  139. end
  140. it "should log an error if the number of results for a set of extraction patterns differs" do
  141. @valid_options['extract']['url']['css'] = "div"
  142. @checker.options = @valid_options
  143. @checker.check
  144. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  145. end
  146. it "should accept an array for url" do
  147. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  148. @checker.options = @valid_options
  149. expect { @checker.save! }.not_to raise_error;
  150. expect { @checker.check }.not_to raise_error;
  151. end
  152. it "should parse events from all urls in array" do
  153. expect {
  154. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  155. @valid_options['mode'] = 'all'
  156. @checker.options = @valid_options
  157. @checker.check
  158. }.to change { Event.count }.by(2)
  159. end
  160. it "should follow unique rules when parsing array of urls" do
  161. expect {
  162. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  163. @checker.options = @valid_options
  164. @checker.check
  165. }.to change { Event.count }.by(1)
  166. end
  167. end
  168. describe 'http_success_codes' do
  169. it 'should allow scraping from a 404 result' do
  170. json = {
  171. 'response' => {
  172. 'version' => 2,
  173. 'title' => "hello!"
  174. }
  175. }
  176. zipped = ActiveSupport::Gzip.compress(json.to_json)
  177. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
  178. site = {
  179. 'name' => "Some JSON Response",
  180. 'expected_update_period_in_days' => "2",
  181. 'type' => "json",
  182. 'url' => "http://gzip.com",
  183. 'mode' => 'on_change',
  184. 'http_success_codes' => [404],
  185. 'extract' => {
  186. 'version' => { 'path' => 'response.version' },
  187. },
  188. # no unzip option
  189. }
  190. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  191. checker.user = users(:bob)
  192. checker.save!
  193. checker.check
  194. event = Event.last
  195. expect(event.payload['version']).to eq(2)
  196. end
  197. end
  198. describe 'unzipping' do
  199. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  200. json = {
  201. 'response' => {
  202. 'version' => 2,
  203. 'title' => "hello!"
  204. }
  205. }
  206. zipped = ActiveSupport::Gzip.compress(json.to_json)
  207. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  208. site = {
  209. 'name' => "Some JSON Response",
  210. 'expected_update_period_in_days' => "2",
  211. 'type' => "json",
  212. 'url' => "http://gzip.com",
  213. 'mode' => 'on_change',
  214. 'extract' => {
  215. 'version' => { 'path' => 'response.version' },
  216. },
  217. # no unzip option
  218. }
  219. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  220. checker.user = users(:bob)
  221. checker.save!
  222. checker.check
  223. event = Event.last
  224. expect(event.payload['version']).to eq(2)
  225. end
  226. it 'should unzip with unzip option' do
  227. json = {
  228. 'response' => {
  229. 'version' => 2,
  230. 'title' => "hello!"
  231. }
  232. }
  233. zipped = ActiveSupport::Gzip.compress(json.to_json)
  234. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  235. site = {
  236. 'name' => "Some JSON Response",
  237. 'expected_update_period_in_days' => "2",
  238. 'type' => "json",
  239. 'url' => "http://gzip.com",
  240. 'mode' => 'on_change',
  241. 'extract' => {
  242. 'version' => { 'path' => 'response.version' },
  243. },
  244. 'unzip' => 'gzip',
  245. }
  246. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  247. checker.user = users(:bob)
  248. checker.save!
  249. checker.check
  250. event = Event.last
  251. expect(event.payload['version']).to eq(2)
  252. end
  253. it 'should either avoid or support a raw deflate stream (#1018)' do
  254. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ }).
  255. to_return(body: 'hello',
  256. status: 200)
  257. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ }).
  258. to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  259. headers: { 'Content-Encoding' => 'deflate' },
  260. status: 200)
  261. site = {
  262. 'name' => 'Some Response',
  263. 'expected_update_period_in_days' => '2',
  264. 'type' => 'text',
  265. 'url' => 'http://deflate',
  266. 'mode' => 'on_change',
  267. 'extract' => {
  268. 'content' => { 'regexp' => '.+', 'index' => 0 }
  269. }
  270. }
  271. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  272. checker.user = users(:bob)
  273. checker.save!
  274. expect {
  275. checker.check
  276. }.to change { Event.count }.by(1)
  277. event = Event.last
  278. expect(event.payload['content']).to eq('hello')
  279. end
  280. end
  281. describe 'encoding' do
  282. let :huginn do
  283. "\u{601d}\u{8003}"
  284. end
  285. let :odin do
  286. "\u{d3}\u{f0}inn"
  287. end
  288. let :url do
  289. 'http://encoding-test.example.com/'
  290. end
  291. let :content_type do
  292. raise 'define me'
  293. end
  294. let :body do
  295. raise 'define me'
  296. end
  297. before do
  298. stub_request(:any, url).to_return(
  299. headers: {
  300. 'Content-Type' => content_type,
  301. },
  302. body: body.b,
  303. status: 200)
  304. end
  305. let :options do
  306. {
  307. 'name' => 'Some agent',
  308. 'expected_update_period_in_days' => '2',
  309. 'url' => url,
  310. 'mode' => 'on_change',
  311. }
  312. end
  313. let :checker do
  314. Agents::WebsiteAgent.create!(name: 'Encoding Checker', options: options) { |agent|
  315. agent.user = users(:bob)
  316. }
  317. end
  318. context 'with no encoding information' do
  319. context 'for a JSON file' do
  320. let :content_type do
  321. 'application/json'
  322. end
  323. let :body do
  324. {
  325. value: huginn,
  326. }.to_json
  327. end
  328. let :options do
  329. super().merge(
  330. 'type' => 'json',
  331. 'extract' => {
  332. 'value' => { 'path' => 'value' }
  333. }
  334. )
  335. end
  336. it 'should be assumed to be UTF-8' do
  337. expect { checker.check }.to change { Event.count }.by(1)
  338. event = Event.last
  339. expect(event.payload['value']).to eq(huginn)
  340. end
  341. end
  342. context 'for an HTML file' do
  343. let :content_type do
  344. 'text/html'
  345. end
  346. let :options do
  347. super().merge(
  348. 'type' => 'html',
  349. 'extract' => {
  350. 'value' => { 'css' => 'title', 'value' => 'string(.)' }
  351. }
  352. )
  353. end
  354. context 'with a charset in the header' do
  355. let :content_type do
  356. super() + '; charset=iso-8859-1'
  357. end
  358. let :body do
  359. <<-HTML.encode(Encoding::ISO_8859_1)
  360. <!DOCTYPE html>
  361. <title>#{odin}</title>
  362. <p>Hello, world.
  363. HTML
  364. end
  365. it 'should be detected from it' do
  366. expect { checker.check }.to change { Event.count }.by(1)
  367. event = Event.last
  368. expect(event.payload['value']).to eq(odin)
  369. end
  370. end
  371. context 'with no charset in the header' do
  372. let :body do
  373. <<-HTML.encode(Encoding::ISO_8859_1)
  374. <!DOCTYPE html>
  375. <meta charset="iso-8859-1">
  376. <title>#{odin}</title>
  377. <p>Hello, world.
  378. HTML
  379. end
  380. it 'should be detected from a meta tag' do
  381. expect { checker.check }.to change { Event.count }.by(1)
  382. event = Event.last
  383. expect(event.payload['value']).to eq(odin)
  384. end
  385. end
  386. context 'with charset desclarations both in the header and in the content' do
  387. let :content_type do
  388. super() + '; charset=iso-8859-1'
  389. end
  390. let :body do
  391. <<-HTML.encode(Encoding::ISO_8859_1)
  392. <!DOCTYPE html>
  393. <meta charset="UTF-8">
  394. <title>#{odin}</title>
  395. <p>Hello, world.
  396. HTML
  397. end
  398. it 'should be detected as that of the header' do
  399. expect { checker.check }.to change { Event.count }.by(1)
  400. event = Event.last
  401. expect(event.payload['value']).to eq(odin)
  402. end
  403. end
  404. end
  405. context 'for an XML file' do
  406. let :content_type do
  407. 'application/xml'
  408. end
  409. let :options do
  410. super().merge(
  411. 'type' => 'xml',
  412. 'extract' => {
  413. 'value' => { 'xpath' => '/root/message', 'value' => 'string(.)' }
  414. }
  415. )
  416. end
  417. context 'with a charset in the header' do
  418. let :content_type do
  419. super() + '; charset=euc-jp'
  420. end
  421. let :body do
  422. <<-XML.encode(Encoding::EUC_JP)
  423. <?xml version="1.0"?>
  424. <root>
  425. <message>#{huginn}</message>
  426. </root>
  427. XML
  428. end
  429. it 'should be detected from it' do
  430. expect { checker.check }.to change { Event.count }.by(1)
  431. event = Event.last
  432. expect(event.payload['value']).to eq(huginn)
  433. end
  434. end
  435. context 'with no charset in the header' do
  436. context 'but in XML declaration' do
  437. let :body do
  438. <<-XML.encode(Encoding::EUC_JP)
  439. <?xml version="1.0" encoding="euc-jp"?>
  440. <root>
  441. <message>#{huginn}</message>
  442. </root>
  443. XML
  444. end
  445. it 'should be detected' do
  446. expect { checker.check }.to change { Event.count }.by(1)
  447. event = Event.last
  448. expect(event.payload['value']).to eq(huginn)
  449. end
  450. end
  451. context 'but having a BOM' do
  452. let :body do
  453. <<-XML.encode(Encoding::UTF_16LE)
  454. \u{feff}<?xml version="1.0"?>
  455. <root>
  456. <message>#{huginn}</message>
  457. </root>
  458. XML
  459. end
  460. it 'should be detected' do
  461. expect { checker.check }.to change { Event.count }.by(1)
  462. event = Event.last
  463. expect(event.payload['value']).to eq(huginn)
  464. end
  465. end
  466. end
  467. end
  468. end
  469. context 'when force_encoding option is specified' do
  470. let :options do
  471. super().merge(
  472. 'force_encoding' => 'EUC-JP'
  473. )
  474. end
  475. context 'for a JSON file' do
  476. let :content_type do
  477. 'application/json'
  478. end
  479. let :body do
  480. {
  481. value: huginn,
  482. }.to_json.encode(Encoding::EUC_JP)
  483. end
  484. let :options do
  485. super().merge(
  486. 'type' => 'json',
  487. 'extract' => {
  488. 'value' => { 'path' => 'value' }
  489. }
  490. )
  491. end
  492. it 'should be forced' do
  493. expect { checker.check }.to change { Event.count }.by(1)
  494. event = Event.last
  495. expect(event.payload['value']).to eq(huginn)
  496. end
  497. end
  498. context 'for an HTML file' do
  499. let :content_type do
  500. 'text/html'
  501. end
  502. context 'with charset specified in the header and the content' do
  503. let :content_type do
  504. super() + '; charset=UTF-8'
  505. end
  506. let :body do
  507. <<-HTML.encode(Encoding::EUC_JP)
  508. <!DOCTYPE html>
  509. <meta charset="UTF-8"/>
  510. <title>#{huginn}</title>
  511. <p>Hello, world.
  512. HTML
  513. end
  514. let :options do
  515. super().merge(
  516. 'type' => 'html',
  517. 'extract' => {
  518. 'value' => { 'css' => 'title', 'value' => 'string(.)' }
  519. }
  520. )
  521. end
  522. it 'should still be forced' do
  523. expect { checker.check }.to change { Event.count }.by(1)
  524. event = Event.last
  525. expect(event.payload['value']).to eq(huginn)
  526. end
  527. end
  528. end
  529. end
  530. end
  531. describe '#working?' do
  532. it 'checks if events have been received within the expected receive period' do
  533. stubbed_time = Time.now
  534. stub(Time).now { stubbed_time }
  535. expect(@checker).not_to be_working # No events created
  536. @checker.check
  537. expect(@checker.reload).to be_working # Just created events
  538. @checker.error "oh no!"
  539. expect(@checker.reload).not_to be_working # There is a recent error
  540. stubbed_time = 20.minutes.from_now
  541. @checker.events.delete_all
  542. @checker.check
  543. expect(@checker.reload).to be_working # There is a newer event now
  544. stubbed_time = 2.days.from_now
  545. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  546. end
  547. end
  548. describe "parsing" do
  549. it "parses CSS" do
  550. @checker.check
  551. event = Event.last
  552. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  553. expect(event.payload['title']).to eq("Evolving")
  554. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  555. end
  556. it "parses XPath" do
  557. @valid_options['extract'].each { |key, value|
  558. value.delete('css')
  559. value['xpath'] = "//*[@id='comic']//img"
  560. }
  561. @checker.options = @valid_options
  562. @checker.check
  563. event = Event.last
  564. expect(event.payload).to match(
  565. 'url' => 'http://imgs.xkcd.com/comics/evolving.png',
  566. 'title' => 'Evolving',
  567. 'hovertext' => /^Biologists play reverse/
  568. )
  569. end
  570. it "should exclude hidden keys" do
  571. @valid_options['extract']['hovertext']['hidden'] = true
  572. @checker.options = @valid_options
  573. @checker.check
  574. event = Event.last
  575. expect(event.payload).to match(
  576. 'url' => 'http://imgs.xkcd.com/comics/evolving.png',
  577. 'title' => 'Evolving'
  578. )
  579. end
  580. it "should return an integer value if XPath evaluates to one" do
  581. rel_site = {
  582. 'name' => "XKCD",
  583. 'expected_update_period_in_days' => 2,
  584. 'type' => "html",
  585. 'url' => "http://xkcd.com",
  586. 'mode' => "on_change",
  587. 'extract' => {
  588. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  589. }
  590. }
  591. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  592. rel.user = users(:bob)
  593. rel.save!
  594. rel.check
  595. event = Event.last
  596. expect(event.payload['num_links']).to eq("9")
  597. end
  598. it "should return everything concatenated if XPath returns many nodes" do
  599. rel_site = {
  600. 'name' => "XKCD",
  601. 'expected_update_period_in_days' => 2,
  602. 'type' => "html",
  603. 'url' => "http://xkcd.com",
  604. 'mode' => "on_change",
  605. 'extract' => {
  606. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  607. }
  608. }
  609. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  610. rel.user = users(:bob)
  611. rel.save!
  612. rel.check
  613. event = Event.last
  614. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, &amp; language.")
  615. end
  616. it "should return a string value returned by XPath" do
  617. rel_site = {
  618. 'name' => "XKCD",
  619. 'expected_update_period_in_days' => 2,
  620. 'type' => "html",
  621. 'url' => "http://xkcd.com",
  622. 'mode' => "on_change",
  623. 'extract' => {
  624. 'slogan' => {'css' => "#slogan", 'value' => "string(.)"}
  625. }
  626. }
  627. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  628. rel.user = users(:bob)
  629. rel.save!
  630. rel.check
  631. event = Event.last
  632. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, & language.")
  633. end
  634. it "should interpolate _response_" do
  635. @valid_options['url'] = 'http://xkcd.com/index'
  636. @valid_options['extract']['response_info'] =
  637. @valid_options['extract']['url'].merge(
  638. 'value' => '{{ "The reponse from " | append:_response_.url | append:" was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." | to_xpath }}'
  639. )
  640. @valid_options['extract']['original_url'] =
  641. @valid_options['extract']['url'].merge(
  642. 'value' => '{{ _url_ | to_xpath }}'
  643. )
  644. @checker.options = @valid_options
  645. @checker.check
  646. event = Event.last
  647. expect(event.payload['response_info']).to eq('The reponse from http://xkcd.com/ was 200 OK.')
  648. expect(event.payload['original_url']).to eq('http://xkcd.com/index')
  649. end
  650. it "should format and merge values in template after extraction" do
  651. @valid_options['extract']['hovertext']['hidden'] = true
  652. @valid_options['template'] = {
  653. 'title' => '{{title | upcase}}',
  654. 'summary' => '{{title}}: {{hovertext | truncate: 20}}',
  655. }
  656. @checker.options = @valid_options
  657. @checker.check
  658. expect(@checker.event_keys).to contain_exactly('url', 'title', 'summary')
  659. expect(@checker.event_description.scan(/"(\w+)": "\.\.\."/).flatten).to contain_exactly('url', 'title', 'summary')
  660. event = Event.last
  661. expect(event.payload).to eq({
  662. 'title' => 'EVOLVING',
  663. 'url' => 'http://imgs.xkcd.com/comics/evolving.png',
  664. 'summary' => 'Evolving: Biologists play r...',
  665. })
  666. end
  667. describe "XML" do
  668. before do
  669. stub_request(:any, /github_rss/).to_return(
  670. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  671. status: 200
  672. )
  673. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  674. 'name' => 'GitHub',
  675. 'expected_update_period_in_days' => '2',
  676. 'type' => 'xml',
  677. 'url' => 'http://example.com/github_rss.atom',
  678. 'mode' => 'on_change',
  679. 'extract' => {
  680. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  681. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  682. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  683. 'page_title': { 'xpath': '/feed/title', 'value': 'string(.)', 'repeat' => true }
  684. }
  685. }, keep_events_for: 2.days)
  686. @checker.user = users(:bob)
  687. @checker.save!
  688. end
  689. it "works with XPath" do
  690. expect {
  691. @checker.check
  692. }.to change { Event.count }.by(20)
  693. events = Event.last(20)
  694. expect(events.size).to eq(20)
  695. expect(events.map { |event| event.payload['page_title'] }.uniq).to eq(['Recent Commits to huginn:master'])
  696. event = events.last
  697. expect(event.payload['title']).to eq('Shift to dev group')
  698. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  699. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  700. end
  701. it "works with XPath with namespaces unstripped" do
  702. @checker.options['use_namespaces'] = 'true'
  703. @checker.save!
  704. expect {
  705. @checker.check
  706. }.to change { Event.count }.by(0)
  707. @checker.options['extract'] = {
  708. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  709. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  710. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  711. }
  712. @checker.save!
  713. expect {
  714. @checker.check
  715. }.to change { Event.count }.by(20)
  716. event = Event.last
  717. expect(event.payload['title']).to eq('Shift to dev group')
  718. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  719. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  720. end
  721. it "works with CSS selectors" do
  722. @checker.options['extract'] = {
  723. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  724. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  725. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  726. }
  727. @checker.save!
  728. expect {
  729. @checker.check
  730. }.to change { Event.count }.by(20)
  731. event = Event.last
  732. expect(event.payload['title']).to be_empty
  733. expect(event.payload['thumbnail']).to be_empty
  734. @checker.options['extract'] = {
  735. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  736. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  737. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  738. }
  739. @checker.save!
  740. expect {
  741. @checker.check
  742. }.to change { Event.count }.by(20)
  743. event = Event.last
  744. expect(event.payload['title']).to eq('Shift to dev group')
  745. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  746. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  747. end
  748. it "works with CSS selectors with namespaces stripped" do
  749. @checker.options['extract'] = {
  750. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  751. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  752. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  753. }
  754. @checker.options['use_namespaces'] = 'false'
  755. @checker.save!
  756. expect {
  757. @checker.check
  758. }.to change { Event.count }.by(20)
  759. event = Event.last
  760. expect(event.payload['title']).to eq('Shift to dev group')
  761. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  762. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  763. end
  764. end
  765. describe "XML with cdata" do
  766. before do
  767. stub_request(:any, /cdata_rss/).to_return(
  768. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  769. status: 200
  770. )
  771. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  772. 'name' => 'CDATA',
  773. 'expected_update_period_in_days' => '2',
  774. 'type' => 'xml',
  775. 'url' => 'http://example.com/cdata_rss.atom',
  776. 'mode' => 'on_change',
  777. 'extract' => {
  778. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => 'string(.)'},
  779. 'title' => { 'xpath' => '/feed/entry/title', 'value' => 'string(.)' },
  780. 'content' => { 'xpath' => '/feed/entry/content', 'value' => 'string(.)' },
  781. }
  782. }, keep_events_for: 2.days)
  783. @checker.user = users(:bob)
  784. @checker.save!
  785. end
  786. it "works with XPath" do
  787. expect {
  788. @checker.check
  789. }.to change { Event.count }.by(10)
  790. event = Event.last
  791. expect(event.payload['author']).to eq('bill98')
  792. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  793. expect(event.payload['content']).to start_with('Can I ')
  794. end
  795. end
  796. describe "JSON" do
  797. it "works with paths" do
  798. json = {
  799. 'response' => {
  800. 'version' => 2,
  801. 'title' => "hello!"
  802. }
  803. }
  804. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  805. site = {
  806. 'name' => "Some JSON Response",
  807. 'expected_update_period_in_days' => "2",
  808. 'type' => "json",
  809. 'url' => "http://json-site.com",
  810. 'mode' => 'on_change',
  811. 'extract' => {
  812. 'version' => {'path' => "response.version"},
  813. 'title' => {'path' => "response.title"}
  814. }
  815. }
  816. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  817. checker.user = users(:bob)
  818. checker.save!
  819. expect(checker.event_keys).to contain_exactly('version', 'title')
  820. expect(checker.event_description.scan(/"(\w+)": "\.\.\."/).flatten).to contain_exactly('version', 'title')
  821. checker.check
  822. event = Event.last
  823. expect(event.payload['version']).to eq(2)
  824. expect(event.payload['title']).to eq("hello!")
  825. end
  826. it "can handle arrays" do
  827. json = {
  828. 'response' => {
  829. 'status' => 'ok',
  830. 'data' => [
  831. {'title' => "first", 'version' => 2},
  832. {'title' => "second", 'version' => 2.5}
  833. ]
  834. }
  835. }
  836. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  837. site = {
  838. 'name' => "Some JSON Response",
  839. 'expected_update_period_in_days' => "2",
  840. 'type' => "json",
  841. 'url' => "http://json-site.com",
  842. 'mode' => 'on_change',
  843. 'extract' => {
  844. 'title' => { 'path' => "response.data[*].title" },
  845. 'version' => { 'path' => "response.data[*].version" },
  846. 'status' => { 'path' => "response.status", 'repeat' => true },
  847. }
  848. }
  849. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  850. checker.user = users(:bob)
  851. checker.save!
  852. expect {
  853. checker.check
  854. }.to change { Event.count }.by(2)
  855. (event2, event1) = Event.last(2)
  856. expect(event1.payload['status']).to eq('ok')
  857. expect(event1.payload['version']).to eq(2.5)
  858. expect(event1.payload['title']).to eq("second")
  859. expect(event2.payload['status']).to eq('ok')
  860. expect(event2.payload['version']).to eq(2)
  861. expect(event2.payload['title']).to eq("first")
  862. end
  863. it "stores the whole object if :extract is not specified" do
  864. json = {
  865. 'response' => {
  866. 'version' => 2,
  867. 'title' => "hello!"
  868. }
  869. }
  870. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  871. site = {
  872. 'name' => "Some JSON Response",
  873. 'expected_update_period_in_days' => "2",
  874. 'type' => "json",
  875. 'url' => "http://json-site.com",
  876. 'mode' => 'on_change'
  877. }
  878. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  879. checker.user = users(:bob)
  880. checker.save!
  881. expect(checker.event_keys).to be_nil
  882. expect(checker.event_description).to match(/Events will be the raw JSON returned by the URL/)
  883. checker.check
  884. event = Event.last
  885. expect(event.payload['response']['version']).to eq(2)
  886. expect(event.payload['response']['title']).to eq("hello!")
  887. end
  888. end
  889. describe "text parsing" do
  890. before do
  891. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  892. VERSION 1
  893. water: wet
  894. fire: hot
  895. EOF
  896. site = {
  897. 'name' => 'Some Text Response',
  898. 'expected_update_period_in_days' => '2',
  899. 'type' => 'text',
  900. 'url' => 'http://text-site.com',
  901. 'mode' => 'on_change',
  902. 'extract' => {
  903. 'version' => { 'regexp' => '^VERSION (.+)$', index: 1, repeat: true },
  904. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  905. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  906. }
  907. }
  908. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  909. @checker.user = users(:bob)
  910. @checker.save!
  911. end
  912. it "works with regexp with named capture" do
  913. @checker.options = @checker.options.deep_merge('extract' => {
  914. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  915. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  916. })
  917. expect {
  918. @checker.check
  919. }.to change { Event.count }.by(2)
  920. event1, event2 = Event.last(2)
  921. expect(event1.payload['version']).to eq('1')
  922. expect(event1.payload['word']).to eq('water')
  923. expect(event1.payload['property']).to eq('wet')
  924. expect(event2.payload['version']).to eq('1')
  925. expect(event2.payload['word']).to eq('fire')
  926. expect(event2.payload['property']).to eq('hot')
  927. end
  928. it "works with regexp" do
  929. expect {
  930. @checker.check
  931. }.to change { Event.count }.by(2)
  932. event1, event2 = Event.last(2)
  933. expect(event1.payload['version']).to eq('1')
  934. expect(event1.payload['word']).to eq('water')
  935. expect(event1.payload['property']).to eq('wet')
  936. expect(event2.payload['version']).to eq('1')
  937. expect(event2.payload['word']).to eq('fire')
  938. expect(event2.payload['property']).to eq('hot')
  939. end
  940. end
  941. end
  942. describe "#receive" do
  943. describe "with a url or url_from_event" do
  944. before do
  945. @event = Event.new
  946. @event.agent = agents(:bob_rain_notifier_agent)
  947. @event.payload = {
  948. 'url' => 'http://foo.com',
  949. 'link' => 'Random'
  950. }
  951. @event.save!
  952. end
  953. it "should use url_from_event as the url to scrape" do
  954. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  955. @checker.options = @valid_options.merge(
  956. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  957. )
  958. @checker.receive([@event])
  959. expect(stub).to have_been_requested
  960. end
  961. it "should use the Agent's `url` option if url_from_event is not set" do
  962. expect {
  963. @checker.options = @valid_options
  964. @checker.receive([@event])
  965. }.to change { Event.count }.by(1)
  966. end
  967. it "should allow url_from_event to be an array of urls" do
  968. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  969. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Ffoo.com')
  970. @checker.options = @valid_options.merge(
  971. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}', 'http://google.org/?url={{url | uri_escape}}']
  972. )
  973. @checker.receive([@event])
  974. expect(stub1).to have_been_requested
  975. expect(stub2).to have_been_requested
  976. end
  977. it "should interpolate values from incoming event payload" do
  978. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  979. expect {
  980. @valid_options['url_from_event'] = '{{ url }}'
  981. @valid_options['extract'] = {
  982. 'from' => {
  983. 'xpath' => '*[1]',
  984. 'value' => '{{url | to_xpath}}'
  985. },
  986. 'to' => {
  987. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  988. 'value' => '@href'
  989. },
  990. }
  991. @checker.options = @valid_options
  992. @checker.receive([@event])
  993. }.to change { Event.count }.by(1)
  994. expect(Event.last.payload).to eq({
  995. 'from' => 'http://foo.com',
  996. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  997. })
  998. end
  999. it "should use the options url if no url is in the event payload, and `url_from_event` is not provided" do
  1000. @checker.options['mode'] = 'merge'
  1001. @event.payload.delete('url')
  1002. expect {
  1003. @checker.receive([@event])
  1004. }.to change { Event.count }.by(1)
  1005. expect(Event.last.payload['title']).to eq('Evolving')
  1006. expect(Event.last.payload['link']).to eq('Random')
  1007. end
  1008. it "should interpolate values from incoming event payload and _response_" do
  1009. @event.payload['title'] = 'XKCD'
  1010. expect {
  1011. @valid_options['extract'] = {
  1012. 'response_info' => @valid_options['extract']['url'].merge(
  1013. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  1014. )
  1015. }
  1016. @checker.options = @valid_options
  1017. @checker.receive([@event])
  1018. }.to change { Event.count }.by(1)
  1019. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  1020. end
  1021. it "should support merging of events" do
  1022. expect {
  1023. @checker.options = @valid_options
  1024. @checker.options[:mode] = "merge"
  1025. @checker.receive([@event])
  1026. }.to change { Event.count }.by(1)
  1027. last_payload = Event.last.payload
  1028. expect(last_payload['link']).to eq('Random')
  1029. end
  1030. it 'returns an array of found nodes when the array extract_option is true' do
  1031. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  1032. @checker.options['extract']['nav_links'] = {'css' => '#topLeft li', 'value' => 'normalize-space(.)', 'array' => 'true'}
  1033. expect {
  1034. @checker.receive([@event])
  1035. }.to change { Event.count }.by(1)
  1036. expect(Event.last.payload['nav_links']).to eq(["Archive", "What If?", "Blag", "Store", "About"])
  1037. end
  1038. it "should set the inbound_event when logging errors" do
  1039. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  1040. @valid_options['extract'] = {
  1041. 'url' => { 'css' => "div", 'value' => "@src" },
  1042. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  1043. }
  1044. @checker.options = @valid_options
  1045. @checker.receive([@event])
  1046. log = @checker.logs.first
  1047. expect(log.message).to match(/Got an uneven number of matches/)
  1048. expect(log.inbound_event).to eq(@event)
  1049. end
  1050. end
  1051. describe "with a data_from_event" do
  1052. describe "with json data" do
  1053. before do
  1054. @event = Event.new
  1055. @event.agent = agents(:bob_rain_notifier_agent)
  1056. @event.payload = {
  1057. 'something' => 'some value',
  1058. 'some_object' => {
  1059. 'some_data' => { hello: 'world', href: '/world' }.to_json
  1060. },
  1061. url: 'http://example.com/',
  1062. 'headers' => {
  1063. 'Content-Type' => 'application/json'
  1064. },
  1065. 'status' => 200
  1066. }
  1067. @event.save!
  1068. @checker.options = @valid_options.merge(
  1069. 'type' => 'json',
  1070. 'data_from_event' => '{{ some_object.some_data }}',
  1071. 'extract' => {
  1072. 'value' => { 'path' => 'hello' },
  1073. 'url' => { 'path' => 'href' },
  1074. },
  1075. 'template' => {
  1076. 'value' => '{{ value }}',
  1077. 'url' => '{{ url | to_uri: _response_.url }}',
  1078. 'type' => '{{ _response_.headers.content_type }}',
  1079. 'status' => '{{ _response_.status | as_object }}'
  1080. }
  1081. )
  1082. end
  1083. it "should extract from the event data in the incoming event payload" do
  1084. expect {
  1085. @checker.receive([@event])
  1086. }.to change { Event.count }.by(1)
  1087. expect(@checker.events.last.payload).to eq({ 'value' => 'world', 'url' => 'http://example.com/world', 'type' => 'application/json', 'status' => 200 })
  1088. end
  1089. it "should support merge mode" do
  1090. @checker.options['mode'] = "merge"
  1091. expect {
  1092. @checker.receive([@event])
  1093. }.to change { Event.count }.by(1)
  1094. expect(@checker.events.last.payload).to eq(@event.payload.merge('value' => 'world', 'url' => 'http://example.com/world', 'type' => 'application/json', 'status' => 200))
  1095. end
  1096. it "should convert headers and status in the event data properly" do
  1097. @event.payload[:status] = '201'
  1098. @event.payload[:headers] = [['Content-Type', 'application/rss+xml']]
  1099. expect {
  1100. @checker.receive([@event])
  1101. }.to change { Event.count }.by(1)
  1102. expect(@checker.events.last.payload).to eq({ 'value' => 'world', 'url' => 'http://example.com/world', 'type' => 'application/rss+xml', 'status' => 201 })
  1103. end
  1104. it "should ignore inconvertible headers and status in the event data" do
  1105. @event.payload[:status] = 'ok'
  1106. @event.payload[:headers] = ['Content-Type', 'Content-Length']
  1107. expect {
  1108. @checker.receive([@event])
  1109. }.to change { Event.count }.by(1)
  1110. expect(@checker.events.last.payload).to eq({ 'value' => 'world', 'url' => 'http://example.com/world', 'type' => '', 'status' => nil })
  1111. end
  1112. it "should output an error when nothing can be found at the path" do
  1113. @checker.options = @checker.options.merge(
  1114. 'data_from_event' => '{{ some_object.mistake }}'
  1115. )
  1116. expect {
  1117. @checker.receive([@event])
  1118. }.to_not change { Event.count }
  1119. expect(@checker.logs.last.message).to match(/No data was found in the Event payload using the template {{ some_object\.mistake }}/)
  1120. end
  1121. it "should output an error when the data cannot be parsed" do
  1122. @event.update_attribute :payload, @event.payload.merge('some_object' => { 'some_data' => '{invalid json' })
  1123. expect {
  1124. @checker.receive([@event])
  1125. }.to_not change { Event.count }
  1126. expect(@checker.logs.last.message).to match(/Error when handling event data:/)
  1127. end
  1128. end
  1129. describe "with HTML data" do
  1130. before do
  1131. @event = Event.new
  1132. @event.agent = agents(:bob_rain_notifier_agent)
  1133. @event.payload = {
  1134. 'url' => 'http://xkcd.com',
  1135. 'some_object' => {
  1136. 'some_data' => "<div><span class='title'>Title!</span><span class='body'>Body!</span></div>"
  1137. }
  1138. }
  1139. @event.save!
  1140. @checker.options = @valid_options.merge(
  1141. 'type' => 'html',
  1142. 'data_from_event' => '{{ some_object.some_data }}',
  1143. 'extract' => {
  1144. 'title' => { 'css' => ".title", 'value' => "string(.)" },
  1145. 'body' => { 'css' => "div span.body", 'value' => "string(.)" }
  1146. }
  1147. )
  1148. end
  1149. it "should extract from the event data in the incoming event payload" do
  1150. expect {
  1151. @checker.receive([@event])
  1152. }.to change { Event.count }.by(1)
  1153. expect(@checker.events.last.payload).to eq({ 'title' => 'Title!', 'body' => 'Body!' })
  1154. end
  1155. end
  1156. end
  1157. end
  1158. end
  1159. describe "checking with http basic auth" do
  1160. before do
  1161. @valid_options = {
  1162. 'name' => "XKCD",
  1163. 'expected_update_period_in_days' => "2",
  1164. 'type' => "html",
  1165. 'url' => "http://www.example.com",
  1166. 'mode' => 'on_change',
  1167. 'extract' => {
  1168. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  1169. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  1170. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  1171. },
  1172. 'basic_auth' => "user:pass"
  1173. }
  1174. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  1175. @checker.user = users(:bob)
  1176. @checker.save!
  1177. stub_request(:any, "www.example.com").
  1178. with(basic_auth: ['user', 'pass']).
  1179. to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  1180. end
  1181. describe "#check" do
  1182. it "should check for changes" do
  1183. expect { @checker.check }.to change { Event.count }.by(1)
  1184. expect { @checker.check }.not_to change { Event.count }
  1185. end
  1186. end
  1187. end
  1188. describe "checking with headers" do
  1189. before do
  1190. stub_request(:any, /example/).
  1191. with(headers: { 'foo' => 'bar' }).
  1192. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  1193. @valid_options = {
  1194. 'name' => "XKCD",
  1195. 'expected_update_period_in_days' => "2",
  1196. 'type' => "html",
  1197. 'url' => "http://www.example.com",
  1198. 'mode' => 'on_change',
  1199. 'headers' => { 'foo' => 'bar' },
  1200. 'extract' => {
  1201. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  1202. }
  1203. }
  1204. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  1205. @checker.user = users(:bob)
  1206. @checker.save!
  1207. end
  1208. describe "#check" do
  1209. it "should check for changes" do
  1210. expect { @checker.check }.to change { Event.count }.by(1)
  1211. end
  1212. end
  1213. end
  1214. describe "checking urls" do
  1215. before do
  1216. stub_request(:any, /example/).
  1217. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), :status => 200)
  1218. @valid_options = {
  1219. 'name' => "Url Test",
  1220. 'expected_update_period_in_days' => "2",
  1221. 'type' => "html",
  1222. 'url' => "http://www.example.com",
  1223. 'mode' => 'all',
  1224. 'extract' => {
  1225. 'url' => { 'css' => "a", 'value' => "@href" },
  1226. },
  1227. 'template' => {
  1228. 'url' => '{{ url | to_uri }}',
  1229. }
  1230. }
  1231. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  1232. @checker.user = users(:bob)
  1233. @checker.save!
  1234. end
  1235. describe "#check" do
  1236. before do
  1237. expect { @checker.check }.to change { Event.count }.by(8)
  1238. @events = Event.last(8)
  1239. end
  1240. it "should check hostname" do
  1241. event = @events[0]
  1242. expect(event.payload['url']).to eq("http://google.com")
  1243. end
  1244. it "should check unescaped query" do
  1245. event = @events[1]
  1246. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  1247. end
  1248. it "should check properly escaped query" do
  1249. event = @events[2]
  1250. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  1251. end
  1252. it "should check unescaped unicode url" do
  1253. event = @events[3]
  1254. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1255. end
  1256. it "should check unescaped unicode query" do
  1257. event = @events[4]
  1258. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1259. end
  1260. it "should check properly escaped unicode url" do
  1261. event = @events[5]
  1262. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1263. end
  1264. it "should check properly escaped unicode query" do
  1265. event = @events[6]
  1266. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1267. end
  1268. it "should check url with unescaped brackets in the path component" do
  1269. event = @events[7]
  1270. expect(event.payload['url']).to eq("http://[::1]/path%5B%5D?query[]=foo")
  1271. end
  1272. end
  1273. end
  1274. end