1
0

pdf_info_agent.rb 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. require 'open-uri'
  2. require 'hypdf'
  3. module Agents
  4. class PdfInfoAgent < Agent
  5. gem_dependency_check { defined?(HyPDF) }
  6. cannot_be_scheduled!
  7. no_bulk_receive!
  8. description <<~MD
  9. The PDF Info Agent returns the metadata contained within a given PDF file, using HyPDF.
  10. #{'## Include the `hypdf` gem in your `Gemfile` to use PDFInfo Agents.' if dependencies_missing?}
  11. In order for this agent to work, you need to have [HyPDF](https://devcenter.heroku.com/articles/hypdf) running and configured.
  12. It works by acting on events that contain a key `url` in their payload, and runs the [pdfinfo](https://devcenter.heroku.com/articles/hypdf#pdfinfo) command on them.
  13. MD
  14. event_description do
  15. "This will change based on the metadata in the pdf.\n\n " +
  16. Utils.pretty_print({
  17. "Title" => "Everyday Rails Testing with RSpec",
  18. "Author" => "Aaron Sumner",
  19. "Creator" => "LaTeX with hyperref package",
  20. "Producer" => "xdvipdfmx (0.7.8)",
  21. "CreationDate" => "Fri Aug 2 05",
  22. "32" => "50 2013",
  23. "Tagged" => "no",
  24. "Pages" => "150",
  25. "Encrypted" => "no",
  26. "Page size" => "612 x 792 pts (letter)",
  27. "Optimized" => "no",
  28. "PDF version" => "1.5",
  29. "url": "your url"
  30. })
  31. end
  32. def working?
  33. !recent_error_logs?
  34. end
  35. def default_options
  36. {}
  37. end
  38. def receive(incoming_events)
  39. incoming_events.each do |event|
  40. interpolate_with(event) do
  41. url_to_scrape = event.payload['url']
  42. check_url(url_to_scrape, event.payload) if url_to_scrape =~ /^https?:\/\//i
  43. end
  44. end
  45. end
  46. def check_url(in_url, payload)
  47. return unless in_url.present?
  48. Array(in_url).each do |url|
  49. log "Fetching #{url}"
  50. info = HyPDF.pdfinfo(open(url))
  51. create_event payload: info.merge(payload)
  52. end
  53. end
  54. end
  55. end