imap_folder_agent.rb 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. require 'base64'
  2. require 'delegate'
  3. require 'net/imap'
  4. require 'mail'
  5. module Agents
  6. class ImapFolderAgent < Agent
  7. include GoogleOauth2Concern
  8. include EventHeadersConcern
  9. cannot_receive_events!
  10. can_dry_run!
  11. default_schedule "every_30m"
  12. description <<~MD
  13. The Imap Folder Agent checks an IMAP server in specified folders and creates Events based on new mails found since the last run. In the first visit to a folder, this agent only checks for the initial status and does not create events.
  14. Specify an IMAP server to connect with `host`, and set `ssl` to true if the server supports IMAP over SSL. Specify `port` if you need to connect to a port other than standard (143 or 993 depending on the `ssl` value), and specify login credentials in `username` and `password`.
  15. Alternatively, if you want to use Gmail, go to the Services page and authenticate with Google beforehand, and then select the service. In this case, `host`, `ssl`, `port`, `username` and `password` are unnecessary and will be ignored.
  16. List the names of folders to check in `folders`.
  17. Specify an array of MIME types in 'mime_types' to tell which non-attachment part of a mail among its `text/*` parts should be used as mail body. The default value is `['text/plain', 'text/enriched', 'text/html']`.
  18. To narrow mails by conditions, build a `conditions` hash with the following keys:
  19. - `subject`
  20. - `body`
  21. Specify a regular expression to match against the decoded subject/body of each mail.
  22. Use the `(?i)` directive for case-insensitive search. For example, a pattern `(?i)alert` will match "alert", "Alert"or "ALERT". You can also make only a part of a pattern to work case-insensitively: `Re: (?i:alert)` will match either "Re: Alert" or "Re: alert", but not "RE: alert".
  23. When a mail has multiple non-attachment text parts, they are prioritized according to the `mime_types` option (as mentioned above) and the first part that matches a "body" pattern, if specified, will be chosen as the "body" value in a created event.
  24. Named captures will appear in the "matches" hash in a created event.
  25. - `from`, `to`, `cc`
  26. Specify a shell glob pattern string that is matched against mail addresses extracted from the corresponding header values of each mail.
  27. Patterns match addresses in case insensitive manner.
  28. Multiple pattern strings can be specified in an array, in which case a mail is selected if any of the patterns matches. (i.e. patterns are OR'd)
  29. - `is_unread`
  30. Setting this to true or false means only mails that is marked as unread or read respectively, are selected.
  31. If this key is unspecified or set to null, it is ignored.
  32. - `has_attachment`
  33. Setting this to true or false means only mails that does or does not have an attachment are selected.
  34. If this key is unspecified or set to null, it is ignored.
  35. Set `mark_as_read` to true to mark found mails as read.
  36. Set `delete` to true to delete found mails.
  37. Set `event_headers` to a list of header names you want to include in a `headers` hash in each created event, either in an array of string or in a comma-separated string.
  38. Set `event_headers_style` to one of the following values to normalize the keys of "headers" for downstream agents' convenience:
  39. * `capitalized` (default) - Header names are capitalized; e.g. "Content-Type"
  40. * `downcased` - Header names are downcased; e.g. "content-type"
  41. * `snakecased` - Header names are snakecased; e.g. "content_type"
  42. Set `include_raw_mail` to true to add a `raw_mail` value to each created event, which contains a *Base64-encoded* blob in the "RFC822" format defined in [the IMAP4 standard](https://tools.ietf.org/html/rfc3501). Note that while the result of Base64 encoding will be LF-terminated, its raw content will often be CRLF-terminated because of the nature of the e-mail protocols and formats. The primary use case for a raw mail blob is to pass to a Shell Command Agent with a command like `openssl enc -d -base64 | tr -d '\r' | procmail -Yf-`.
  43. Each agent instance memorizes the highest UID of mails that are found in the last run for each watched folder, so even if you change a set of conditions so that it matches mails that are missed previously, or if you alter the flag status of already found mails, they will not show up as new events.
  44. Also, in order to avoid duplicated notification it keeps a list of Message-Id's of 100 most recent mails, so if multiple mails of the same Message-Id are found, you will only see one event out of them.
  45. MD
  46. event_description <<~MD
  47. Events look like this:
  48. {
  49. "message_id": "...(Message-Id without angle brackets)...",
  50. "folder": "INBOX",
  51. "subject": "...",
  52. "from": "Nanashi <nanashi.gombeh@example.jp>",
  53. "to": ["Jane <jane.doe@example.com>"],
  54. "cc": [],
  55. "date": "2014-05-10T03:47:20+0900",
  56. "mime_type": "text/plain",
  57. "body": "Hello,\n\n...",
  58. "matches": {
  59. }
  60. }
  61. Additionally, "headers" will be included if the `event_headers` option is set, and "raw_mail" if the `include_raw_mail` option is set.
  62. MD
  63. IDCACHE_SIZE = 100
  64. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  65. if File.const_defined?(sym)
  66. flags | File.const_get(sym)
  67. else
  68. flags
  69. end
  70. }
  71. def working?
  72. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  73. end
  74. def default_options
  75. {
  76. 'expected_update_period_in_days' => "1",
  77. 'host' => 'imap.gmail.com',
  78. 'ssl' => true,
  79. 'username' => 'your.account',
  80. 'password' => 'your.password',
  81. 'folders' => %w[INBOX],
  82. 'conditions' => {}
  83. }
  84. end
  85. def validate_options
  86. if !service
  87. %w[host username password].each { |key|
  88. String === options[key] or
  89. errors.add(:base, '%s is required and must be a string' % key)
  90. }
  91. end
  92. if options['port'].present?
  93. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  94. end
  95. %w[ssl mark_as_read delete include_raw_mail].each { |key|
  96. if options[key].present? && boolify(options[key]).nil?
  97. errors.add(:base, '%s must be a boolean value' % key)
  98. end
  99. }
  100. case mime_types = options['mime_types']
  101. when nil
  102. when Array
  103. mime_types.all? { |mime_type|
  104. String === mime_type && mime_type.start_with?('text/')
  105. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  106. if mime_types.empty?
  107. errors.add(:base, 'mime_types should not be empty')
  108. end
  109. else
  110. errors.add(:base, 'mime_types must be an array')
  111. end
  112. case folders = options['folders']
  113. when nil
  114. when Array
  115. folders.all? { |folder|
  116. String === folder
  117. } or errors.add(:base, 'folders may only contain strings')
  118. if folders.empty?
  119. errors.add(:base, 'folders should not be empty')
  120. end
  121. else
  122. errors.add(:base, 'folders must be an array')
  123. end
  124. case conditions = options['conditions']
  125. when Hash
  126. conditions.each { |key, value|
  127. value.present? or next
  128. case key
  129. when 'subject', 'body'
  130. case value
  131. when String
  132. begin
  133. Regexp.new(value)
  134. rescue StandardError
  135. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  136. end
  137. else
  138. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  139. end
  140. when 'from', 'to', 'cc'
  141. Array(value).each { |pattern|
  142. case pattern
  143. when String
  144. begin
  145. glob_match?(pattern, '')
  146. rescue StandardError
  147. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  148. end
  149. else
  150. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  151. end
  152. }
  153. when 'is_unread', 'has_attachment'
  154. case boolify(value)
  155. when true, false
  156. else
  157. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  158. end
  159. end
  160. }
  161. else
  162. errors.add(:base, 'conditions must be a hash')
  163. end
  164. if options['expected_update_period_in_days'].present?
  165. errors.add(
  166. :base,
  167. "Invalid expected_update_period_in_days format"
  168. ) unless is_positive_integer?(options['expected_update_period_in_days'])
  169. end
  170. end
  171. def validate_service
  172. # Override Oauthable#validate_service; service is optional in
  173. # this agent.
  174. end
  175. def check
  176. each_unread_mail { |mail, notified|
  177. message_id = mail.message_id
  178. body_parts = mail.body_parts(mime_types)
  179. matched_part = nil
  180. matches = {}
  181. interpolated['conditions'].all? { |key, value|
  182. case key
  183. when 'subject'
  184. value.present? or next true
  185. re = Regexp.new(value)
  186. if m = re.match(mail.scrubbed(:subject))
  187. m.names.each { |name|
  188. matches[name] = m[name]
  189. }
  190. true
  191. else
  192. false
  193. end
  194. when 'body'
  195. value.present? or next true
  196. re = Regexp.new(value)
  197. matched_part = body_parts.find { |part|
  198. if m = re.match(part.scrubbed(:decoded))
  199. m.names.each { |name|
  200. matches[name] = m[name]
  201. }
  202. true
  203. else
  204. false
  205. end
  206. }
  207. when 'from', 'to', 'cc'
  208. value.present? or next true
  209. begin
  210. # Mail::Field really needs to define respond_to_missing?
  211. # so we could use try(:addresses) here.
  212. addresses = mail.header[key].addresses
  213. rescue NoMethodError
  214. next false
  215. end
  216. addresses.any? { |address|
  217. Array(value).any? { |pattern|
  218. glob_match?(pattern, address)
  219. }
  220. }
  221. when 'has_attachment'
  222. boolify(value) == mail.has_attachment?
  223. when 'is_unread'
  224. true # already filtered out by each_unread_mail
  225. else
  226. log 'Unknown condition key ignored: %s' % key
  227. true
  228. end
  229. } or next
  230. if notified.include?(mail.message_id)
  231. log 'Ignoring mail: %s (already notified)' % message_id
  232. else
  233. matched_part ||= body_parts.first
  234. if matched_part
  235. mime_type = matched_part.mime_type
  236. body = matched_part.scrubbed(:decoded)
  237. else
  238. mime_type = 'text/plain'
  239. body = ''
  240. end
  241. log 'Emitting an event for mail: %s' % message_id
  242. payload = {
  243. 'message_id' => message_id,
  244. 'folder' => mail.folder,
  245. 'subject' => mail.scrubbed(:subject),
  246. 'from' => mail.from_addrs.first,
  247. 'to' => mail.to_addrs,
  248. 'cc' => mail.cc_addrs,
  249. 'date' =>
  250. begin
  251. mail.date.iso8601
  252. rescue StandardError
  253. nil
  254. end,
  255. 'mime_type' => mime_type,
  256. 'body' => body,
  257. 'matches' => matches,
  258. 'has_attachment' => mail.has_attachment?,
  259. }
  260. if boolify(interpolated['include_raw_mail'])
  261. payload['raw_mail'] = Base64.encode64(mail.raw_mail)
  262. end
  263. if interpolated['event_headers'].present?
  264. headers = mail.header.each_with_object({}) { |field, hash|
  265. name = field.name
  266. hash[name] = (v = hash[name]) ? "#{v}\n#{field.value}" : field.value.to_s
  267. }
  268. payload.update(event_headers_payload(headers))
  269. end
  270. create_event(payload:)
  271. notified << mail.message_id if mail.message_id
  272. end
  273. if boolify(interpolated['mark_as_read'])
  274. log 'Marking as read'
  275. mail.mark_as_read unless dry_run?
  276. end
  277. if boolify(interpolated['delete'])
  278. log 'Deleting'
  279. mail.delete unless dry_run?
  280. end
  281. }
  282. end
  283. def each_unread_mail
  284. if service
  285. host = 'imap.gmail.com'
  286. port = 993
  287. ssl = true
  288. username = google_oauth2_email
  289. password = google_oauth2_access_token
  290. else
  291. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  292. password = interpolated[:password]
  293. end
  294. ssl = boolify(ssl)
  295. port = (Integer(port) if port.present?)
  296. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  297. Client.open(host, port:, ssl:) { |imap|
  298. log "Logging in as #{username}"
  299. if service
  300. imap.authenticate('XOAUTH2', username, password)
  301. else
  302. imap.login(username, password)
  303. end
  304. # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
  305. lastseen = self.lastseen
  306. seen = self.make_seen
  307. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  308. # most recent notified mails.
  309. notified = self.notified
  310. interpolated['folders'].each { |folder|
  311. log "Selecting the folder: %s" % folder
  312. imap.select(Net::IMAP.encode_utf7(folder))
  313. uidvalidity = imap.uidvalidity
  314. lastseenuid = lastseen[uidvalidity]
  315. if lastseenuid.nil?
  316. maxseq = imap.responses['EXISTS'].last
  317. log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
  318. if maxseq > 0
  319. seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
  320. end
  321. next
  322. end
  323. seen[uidvalidity] = lastseenuid
  324. is_unread = boolify(interpolated['conditions']['is_unread'])
  325. uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS')
  326. .each_with_object([]) { |data, ret|
  327. uid, flags = data.attr.values_at('UID', 'FLAGS')
  328. seen[uidvalidity] = uid
  329. next if uid <= lastseenuid
  330. case is_unread
  331. when nil, !flags.include?(:Seen)
  332. ret << uid
  333. end
  334. }
  335. log pluralize(uids.size,
  336. case is_unread
  337. when true
  338. 'new unread mail'
  339. when false
  340. 'new read mail'
  341. else
  342. 'new mail'
  343. end)
  344. next if uids.empty?
  345. imap.uid_fetch_mails(uids).each { |mail|
  346. yield mail, notified
  347. }
  348. }
  349. self.notified = notified
  350. self.lastseen = seen
  351. save!
  352. }
  353. ensure
  354. log 'Connection closed'
  355. end
  356. def mime_types
  357. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  358. end
  359. def lastseen
  360. Seen.new(memory['lastseen'])
  361. end
  362. def lastseen=(value)
  363. memory.delete('seen') # obsolete key
  364. memory['lastseen'] = value
  365. end
  366. def make_seen
  367. Seen.new
  368. end
  369. def notified
  370. Notified.new(memory['notified'])
  371. end
  372. def notified=(value)
  373. memory['notified'] = value
  374. end
  375. private
  376. def glob_match?(pattern, value)
  377. File.fnmatch?(pattern, value, FNM_FLAGS)
  378. end
  379. def pluralize(count, noun)
  380. "%d %s" % [count, noun.pluralize(count)]
  381. end
  382. def event_headers_key
  383. super || 'headers'
  384. end
  385. class Client < ::Net::IMAP
  386. class << self
  387. def open(host, *args)
  388. imap = new(host, *args)
  389. yield imap
  390. ensure
  391. imap.disconnect unless imap.nil?
  392. end
  393. private
  394. def authenticators
  395. # The authenticators table is stored in the Net::IMAP instance.
  396. Net::IMAP.send(:authenticators)
  397. end
  398. end
  399. attr_reader :uidvalidity
  400. def select(folder)
  401. ret = super(@folder = folder)
  402. @uidvalidity = responses['UIDVALIDITY'].last
  403. ret
  404. end
  405. def fetch(*args)
  406. super || []
  407. end
  408. def uid_fetch(*args)
  409. super || []
  410. end
  411. def uid_fetch_mails(set)
  412. uid_fetch(set, 'RFC822.HEADER').map { |data|
  413. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  414. }
  415. end
  416. end
  417. class Seen < Hash
  418. def initialize(hash = nil)
  419. super()
  420. if hash
  421. # Deserialize a JSON hash which keys are strings
  422. hash.each { |uidvalidity, uid|
  423. self[uidvalidity.to_i] = uid
  424. }
  425. end
  426. end
  427. def []=(uidvalidity, uid)
  428. # Update only if the new value is larger than the current value
  429. if (curr = self[uidvalidity]).nil? || curr <= uid
  430. super
  431. end
  432. end
  433. end
  434. class Notified < Array
  435. def initialize(array = nil)
  436. super()
  437. replace(array) if array
  438. end
  439. def <<(value)
  440. slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
  441. super
  442. end
  443. end
  444. class Message < SimpleDelegator
  445. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  446. attr_reader :uid, :folder, :uidvalidity
  447. module Scrubbed
  448. def scrubbed(method)
  449. (@scrubbed ||= {})[method.to_sym] ||=
  450. __send__(method).try(:scrub) { |bytes| "<#{bytes.unpack1('H*')}>" }
  451. end
  452. end
  453. include Scrubbed
  454. def initialize(client, fetch_data, props = {})
  455. @client = client
  456. props.each { |key, value|
  457. instance_variable_set(:"@#{key}", value)
  458. }
  459. attr = fetch_data.attr
  460. @uid = attr['UID']
  461. super(Mail.read_from_string(attr['RFC822.HEADER']))
  462. end
  463. def has_attachment?
  464. @has_attachment ||=
  465. if data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  466. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  467. else
  468. false
  469. end
  470. end
  471. def raw_mail
  472. @raw_mail ||=
  473. if data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  474. data.attr['BODY[]']
  475. else
  476. ''
  477. end
  478. end
  479. def fetch
  480. @parsed ||= Mail.read_from_string(raw_mail)
  481. end
  482. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  483. mail = fetch
  484. if mail.multipart?
  485. mail.body.set_sort_order(mime_types)
  486. mail.body.sort_parts!
  487. mail.all_parts
  488. else
  489. [mail]
  490. end.select { |part|
  491. if part.multipart? || part.attachment? || !part.text? ||
  492. !mime_types.include?(part.mime_type)
  493. false
  494. else
  495. part.extend(Scrubbed)
  496. true
  497. end
  498. }
  499. end
  500. def mark_as_read
  501. @client.uid_store(@uid, '+FLAGS', [:Seen])
  502. end
  503. def delete
  504. @client.uid_store(@uid, '+FLAGS', [:Deleted])
  505. @client.expunge
  506. end
  507. private
  508. def struct_has_attachment?(struct)
  509. struct.multipart? && (
  510. struct.subtype == 'MIXED' ||
  511. struct.parts.any? { |part|
  512. struct_has_attachment?(part)
  513. }
  514. )
  515. end
  516. end
  517. end
  518. end