imap_folder_agent.rb 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. require 'base64'
  2. require 'delegate'
  3. require 'net/imap'
  4. require 'mail'
  5. module Agents
  6. class ImapFolderAgent < Agent
  7. include EventHeadersConcern
  8. cannot_receive_events!
  9. can_dry_run!
  10. default_schedule "every_30m"
  11. description <<-MD
  12. The Imap Folder Agent checks an IMAP server in specified folders and creates Events based on new mails found since the last run. In the first visit to a folder, this agent only checks for the initial status and does not create events.
  13. Specify an IMAP server to connect with `host`, and set `ssl` to true if the server supports IMAP over SSL. Specify `port` if you need to connect to a port other than standard (143 or 993 depending on the `ssl` value).
  14. Specify login credentials in `username` and `password`.
  15. List the names of folders to check in `folders`.
  16. To narrow mails by conditions, build a `conditions` hash with the following keys:
  17. - `subject`
  18. - `body`
  19. Specify a regular expression to match against the decoded subject/body of each mail.
  20. Use the `(?i)` directive for case-insensitive search. For example, a pattern `(?i)alert` will match "alert", "Alert"or "ALERT". You can also make only a part of a pattern to work case-insensitively: `Re: (?i:alert)` will match either "Re: Alert" or "Re: alert", but not "RE: alert".
  21. When a mail has multiple non-attachment text parts, they are prioritized according to the `mime_types` option (which see below) and the first part that matches a "body" pattern, if specified, will be chosen as the "body" value in a created event.
  22. Named captures will appear in the "matches" hash in a created event.
  23. - `from`, `to`, `cc`
  24. Specify a shell glob pattern string that is matched against mail addresses extracted from the corresponding header values of each mail.
  25. Patterns match addresses in case insensitive manner.
  26. Multiple pattern strings can be specified in an array, in which case a mail is selected if any of the patterns matches. (i.e. patterns are OR'd)
  27. - `mime_types`
  28. Specify an array of MIME types to tell which non-attachment part of a mail among its text/* parts should be used as mail body. The default value is `['text/plain', 'text/enriched', 'text/html']`.
  29. - `is_unread`
  30. Setting this to true or false means only mails that is marked as unread or read respectively, are selected.
  31. If this key is unspecified or set to null, it is ignored.
  32. - `has_attachment`
  33. Setting this to true or false means only mails that does or does not have an attachment are selected.
  34. If this key is unspecified or set to null, it is ignored.
  35. Set `mark_as_read` to true to mark found mails as read.
  36. Set `event_headers` to a list of header names you want to include in a `headers` hash in each created event, either in an array of string or in a comma-separated string.
  37. Set `event_headers_style` to one of the following values to normalize the keys of "headers" for downstream agents' convenience:
  38. * `capitalized` (default) - Header names are capitalized; e.g. "Content-Type"
  39. * `downcased` - Header names are downcased; e.g. "content-type"
  40. * `snakecased` - Header names are snakecased; e.g. "content_type"
  41. Set `include_raw_mail` to true to add a `raw_mail` value to each created event, which contains a *Base64-encoded* blob in the "RFC822" format defined in [the IMAP4 standard](https://tools.ietf.org/html/rfc3501). Note that while the result of Base64 encoding will be LF-terminated, its raw content will often be CRLF-terminated because of the nature of the e-mail protocols and formats. The primary use case for a raw mail blob is to pass to a Shell Command Agent with a command like `openssl enc -d -base64 | tr -d '\r' | procmail -Yf-`.
  42. Each agent instance memorizes the highest UID of mails that are found in the last run for each watched folder, so even if you change a set of conditions so that it matches mails that are missed previously, or if you alter the flag status of already found mails, they will not show up as new events.
  43. Also, in order to avoid duplicated notification it keeps a list of Message-Id's of 100 most recent mails, so if multiple mails of the same Message-Id are found, you will only see one event out of them.
  44. MD
  45. event_description <<-MD
  46. Events look like this:
  47. {
  48. "message_id": "...(Message-Id without angle brackets)...",
  49. "folder": "INBOX",
  50. "subject": "...",
  51. "from": "Nanashi <nanashi.gombeh@example.jp>",
  52. "to": ["Jane <jane.doe@example.com>"],
  53. "cc": [],
  54. "date": "2014-05-10T03:47:20+0900",
  55. "mime_type": "text/plain",
  56. "body": "Hello,\n\n...",
  57. "matches": {
  58. }
  59. }
  60. Additionally, "headers" will be included if the `event_headers` option is set, and "raw_mail" if the `include_raw_mail` option is set.
  61. MD
  62. IDCACHE_SIZE = 100
  63. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  64. if File.const_defined?(sym)
  65. flags | File.const_get(sym)
  66. else
  67. flags
  68. end
  69. }
  70. def working?
  71. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  72. end
  73. def default_options
  74. {
  75. 'expected_update_period_in_days' => "1",
  76. 'host' => 'imap.gmail.com',
  77. 'ssl' => true,
  78. 'username' => 'your.account',
  79. 'password' => 'your.password',
  80. 'folders' => %w[INBOX],
  81. 'conditions' => {}
  82. }
  83. end
  84. def validate_options
  85. %w[host username password].each { |key|
  86. String === options[key] or
  87. errors.add(:base, '%s is required and must be a string' % key)
  88. }
  89. if options['port'].present?
  90. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  91. end
  92. %w[ssl mark_as_read include_raw_mail].each { |key|
  93. if options[key].present?
  94. if boolify(options[key]).nil?
  95. errors.add(:base, '%s must be a boolean value' % key)
  96. end
  97. end
  98. }
  99. case mime_types = options['mime_types']
  100. when nil
  101. when Array
  102. mime_types.all? { |mime_type|
  103. String === mime_type && mime_type.start_with?('text/')
  104. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  105. if mime_types.empty?
  106. errors.add(:base, 'mime_types should not be empty')
  107. end
  108. else
  109. errors.add(:base, 'mime_types must be an array')
  110. end
  111. case folders = options['folders']
  112. when nil
  113. when Array
  114. folders.all? { |folder|
  115. String === folder
  116. } or errors.add(:base, 'folders may only contain strings')
  117. if folders.empty?
  118. errors.add(:base, 'folders should not be empty')
  119. end
  120. else
  121. errors.add(:base, 'folders must be an array')
  122. end
  123. case conditions = options['conditions']
  124. when Hash
  125. conditions.each { |key, value|
  126. value.present? or next
  127. case key
  128. when 'subject', 'body'
  129. case value
  130. when String
  131. begin
  132. Regexp.new(value)
  133. rescue
  134. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  135. end
  136. else
  137. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  138. end
  139. when 'from', 'to', 'cc'
  140. Array(value).each { |pattern|
  141. case pattern
  142. when String
  143. begin
  144. glob_match?(pattern, '')
  145. rescue
  146. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  147. end
  148. else
  149. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  150. end
  151. }
  152. when 'is_unread', 'has_attachment'
  153. case boolify(value)
  154. when true, false
  155. else
  156. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  157. end
  158. end
  159. }
  160. else
  161. errors.add(:base, 'conditions must be a hash')
  162. end
  163. if options['expected_update_period_in_days'].present?
  164. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  165. end
  166. end
  167. def check
  168. each_unread_mail { |mail, notified|
  169. message_id = mail.message_id
  170. body_parts = mail.body_parts(mime_types)
  171. matched_part = nil
  172. matches = {}
  173. interpolated['conditions'].all? { |key, value|
  174. case key
  175. when 'subject'
  176. value.present? or next true
  177. re = Regexp.new(value)
  178. if m = re.match(mail.scrubbed(:subject))
  179. m.names.each { |name|
  180. matches[name] = m[name]
  181. }
  182. true
  183. else
  184. false
  185. end
  186. when 'body'
  187. value.present? or next true
  188. re = Regexp.new(value)
  189. matched_part = body_parts.find { |part|
  190. if m = re.match(part.scrubbed(:decoded))
  191. m.names.each { |name|
  192. matches[name] = m[name]
  193. }
  194. true
  195. else
  196. false
  197. end
  198. }
  199. when 'from', 'to', 'cc'
  200. value.present? or next true
  201. begin
  202. # Mail::Field really needs to define respond_to_missing?
  203. # so we could use try(:addresses) here.
  204. addresses = mail.header[key].addresses
  205. rescue NoMethodError
  206. next false
  207. end
  208. addresses.any? { |address|
  209. Array(value).any? { |pattern|
  210. glob_match?(pattern, address)
  211. }
  212. }
  213. when 'has_attachment'
  214. boolify(value) == mail.has_attachment?
  215. when 'is_unread'
  216. true # already filtered out by each_unread_mail
  217. else
  218. log 'Unknown condition key ignored: %s' % key
  219. true
  220. end
  221. } or next
  222. if notified.include?(mail.message_id)
  223. log 'Ignoring mail: %s (already notified)' % message_id
  224. else
  225. matched_part ||= body_parts.first
  226. if matched_part
  227. mime_type = matched_part.mime_type
  228. body = matched_part.scrubbed(:decoded)
  229. else
  230. mime_type = 'text/plain'
  231. body = ''
  232. end
  233. log 'Emitting an event for mail: %s' % message_id
  234. payload = {
  235. 'message_id' => message_id,
  236. 'folder' => mail.folder,
  237. 'subject' => mail.scrubbed(:subject),
  238. 'from' => mail.from_addrs.first,
  239. 'to' => mail.to_addrs,
  240. 'cc' => mail.cc_addrs,
  241. 'date' => (mail.date.iso8601 rescue nil),
  242. 'mime_type' => mime_type,
  243. 'body' => body,
  244. 'matches' => matches,
  245. 'has_attachment' => mail.has_attachment?,
  246. }
  247. if boolify(interpolated['include_raw_mail'])
  248. payload['raw_mail'] = Base64.encode64(mail.raw_mail)
  249. end
  250. if interpolated['event_headers'].present?
  251. headers = mail.header.each_with_object({}) { |field, hash|
  252. name = field.name
  253. hash[name] = (v = hash[name]) ? "#{v}\n#{field.value.to_s}" : field.value.to_s
  254. }
  255. payload.update(event_headers_payload(headers))
  256. end
  257. create_event payload: payload
  258. notified << mail.message_id if mail.message_id
  259. end
  260. if boolify(interpolated['mark_as_read'])
  261. log 'Marking as read'
  262. mail.mark_as_read unless dry_run?
  263. end
  264. }
  265. end
  266. def each_unread_mail
  267. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  268. ssl = boolify(ssl)
  269. port = (Integer(port) if port.present?)
  270. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  271. Client.open(host, port: port, ssl: ssl) { |imap|
  272. log "Logging in as #{username}"
  273. imap.login(username, interpolated[:password])
  274. # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
  275. lastseen, seen = self.lastseen, self.make_seen
  276. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  277. # most recent notified mails.
  278. notified = self.notified
  279. interpolated['folders'].each { |folder|
  280. log "Selecting the folder: %s" % folder
  281. imap.select(Net::IMAP.encode_utf7(folder))
  282. uidvalidity = imap.uidvalidity
  283. lastseenuid = lastseen[uidvalidity]
  284. if lastseenuid.nil?
  285. maxseq = imap.responses['EXISTS'].last
  286. log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
  287. if maxseq > 0
  288. seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
  289. end
  290. next
  291. end
  292. seen[uidvalidity] = lastseenuid
  293. is_unread = boolify(interpolated['conditions']['is_unread'])
  294. uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
  295. each_with_object([]) { |data, ret|
  296. uid, flags = data.attr.values_at('UID', 'FLAGS')
  297. seen[uidvalidity] = uid
  298. next if uid <= lastseenuid
  299. case is_unread
  300. when nil, !flags.include?(:Seen)
  301. ret << uid
  302. end
  303. }
  304. log pluralize(uids.size,
  305. case is_unread
  306. when true
  307. 'new unread mail'
  308. when false
  309. 'new read mail'
  310. else
  311. 'new mail'
  312. end)
  313. next if uids.empty?
  314. imap.uid_fetch_mails(uids).each { |mail|
  315. yield mail, notified
  316. }
  317. }
  318. self.notified = notified
  319. self.lastseen = seen
  320. save!
  321. }
  322. ensure
  323. log 'Connection closed'
  324. end
  325. def mime_types
  326. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  327. end
  328. def lastseen
  329. Seen.new(memory['lastseen'])
  330. end
  331. def lastseen= value
  332. memory.delete('seen') # obsolete key
  333. memory['lastseen'] = value
  334. end
  335. def make_seen
  336. Seen.new
  337. end
  338. def notified
  339. Notified.new(memory['notified'])
  340. end
  341. def notified= value
  342. memory['notified'] = value
  343. end
  344. private
  345. def glob_match?(pattern, value)
  346. File.fnmatch?(pattern, value, FNM_FLAGS)
  347. end
  348. def pluralize(count, noun)
  349. "%d %s" % [count, noun.pluralize(count)]
  350. end
  351. def event_headers_key
  352. super || 'headers'
  353. end
  354. class Client < ::Net::IMAP
  355. class << self
  356. def open(host, *args)
  357. imap = new(host, *args)
  358. yield imap
  359. ensure
  360. imap.disconnect unless imap.nil?
  361. end
  362. end
  363. attr_reader :uidvalidity
  364. def select(folder)
  365. ret = super(@folder = folder)
  366. @uidvalidity = responses['UIDVALIDITY'].last
  367. ret
  368. end
  369. def fetch(*args)
  370. super || []
  371. end
  372. def uid_fetch(*args)
  373. super || []
  374. end
  375. def uid_fetch_mails(set)
  376. uid_fetch(set, 'RFC822.HEADER').map { |data|
  377. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  378. }
  379. end
  380. end
  381. class Seen < Hash
  382. def initialize(hash = nil)
  383. super()
  384. if hash
  385. # Deserialize a JSON hash which keys are strings
  386. hash.each { |uidvalidity, uid|
  387. self[uidvalidity.to_i] = uid
  388. }
  389. end
  390. end
  391. def []=(uidvalidity, uid)
  392. # Update only if the new value is larger than the current value
  393. if (curr = self[uidvalidity]).nil? || curr <= uid
  394. super
  395. end
  396. end
  397. end
  398. class Notified < Array
  399. def initialize(array = nil)
  400. super()
  401. replace(array) if array
  402. end
  403. def <<(value)
  404. slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
  405. super
  406. end
  407. end
  408. class Message < SimpleDelegator
  409. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  410. attr_reader :uid, :folder, :uidvalidity
  411. module Scrubbed
  412. def scrubbed(method)
  413. (@scrubbed ||= {})[method.to_sym] ||=
  414. __send__(method).try(:scrub) { |bytes| "<#{bytes.unpack('H*')[0]}>" }
  415. end
  416. end
  417. include Scrubbed
  418. def initialize(client, fetch_data, props = {})
  419. @client = client
  420. props.each { |key, value|
  421. instance_variable_set(:"@#{key}", value)
  422. }
  423. attr = fetch_data.attr
  424. @uid = attr['UID']
  425. super(Mail.read_from_string(attr['RFC822.HEADER']))
  426. end
  427. def has_attachment?
  428. @has_attachment ||=
  429. if data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  430. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  431. else
  432. false
  433. end
  434. end
  435. def raw_mail
  436. @raw_mail ||=
  437. if data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  438. data.attr['BODY[]']
  439. else
  440. ''
  441. end
  442. end
  443. def fetch
  444. @parsed ||= Mail.read_from_string(raw_mail)
  445. end
  446. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  447. mail = fetch
  448. if mail.multipart?
  449. mail.body.set_sort_order(mime_types)
  450. mail.body.sort_parts!
  451. mail.all_parts
  452. else
  453. [mail]
  454. end.select { |part|
  455. if part.multipart? || part.attachment? || !part.text? ||
  456. !mime_types.include?(part.mime_type)
  457. false
  458. else
  459. part.extend(Scrubbed)
  460. true
  461. end
  462. }
  463. end
  464. def mark_as_read
  465. @client.uid_store(@uid, '+FLAGS', [:Seen])
  466. end
  467. private
  468. def struct_has_attachment?(struct)
  469. struct.multipart? && (
  470. struct.subtype == 'MIXED' ||
  471. struct.parts.any? { |part|
  472. struct_has_attachment?(part)
  473. }
  474. )
  475. end
  476. end
  477. end
  478. end