imap_folder_agent.rb 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. require 'delegate'
  2. require 'net/imap'
  3. require 'mail'
  4. module Agents
  5. class ImapFolderAgent < Agent
  6. cannot_receive_events!
  7. default_schedule "every_30m"
  8. description <<-MD
  9. The ImapFolderAgent checks an IMAP server in specified folders
  10. and creates Events based on new mails found since the last run.
  11. In the first visit to a foler, this agent only checks for the
  12. initial status and does not create events.
  13. Specify an IMAP server to connect with `host`, and set `ssl` to
  14. true if the server supports IMAP over SSL. Specify `port` if
  15. you need to connect to a port other than standard (143 or 993
  16. depending on the `ssl` value).
  17. Specify login credentials in `username` and `password`.
  18. List the names of folders to check in `folders`.
  19. To narrow mails by conditions, build a `conditions` hash with
  20. the following keys:
  21. - "subject"
  22. - "body"
  23. Specify a regular expression to match against the decoded
  24. subject/body of each mail.
  25. Use the `(?i)` directive for case-insensitive search. For
  26. example, a pattern `(?i)alert` will match "alert", "Alert"
  27. or "ALERT". You can also make only a part of a pattern to
  28. work case-insensitively: `Re: (?i:alert)` will match either
  29. "Re: Alert" or "Re: alert", but not "RE: alert".
  30. When a mail has multiple non-attachment text parts, they are
  31. prioritized according to the `mime_types` option (which see
  32. below) and the first part that matches a "body" pattern, if
  33. specified, will be chosen as the "body" value in a created
  34. event.
  35. Named captues will appear in the "matches" hash in a created
  36. event.
  37. - "from", "to", "cc"
  38. Specify a shell glob pattern string that is matched against
  39. mail addresses extracted from the corresponding header
  40. values of each mail.
  41. Patterns match addresses in case insensitive manner.
  42. Multiple pattern strings can be specified in an array, in
  43. which case a mail is selected if any of the patterns
  44. matches. (i.e. patterns are OR'd)
  45. - "mime_types"
  46. Specify an array of MIME types to tell which non-attachment
  47. part of a mail among its text/* parts should be used as mail
  48. body. The default value is `['text/plain', 'text/enriched',
  49. 'text/html']`.
  50. - "is_unread"
  51. Setting this to true or false means only mails that is
  52. marked as unread or read respectively, are selected.
  53. If this key is unspecified or set to null, it is ignored.
  54. - "has_attachment"
  55. Setting this to true or false means only mails that does or does
  56. not have an attachment are selected.
  57. If this key is unspecified or set to null, it is ignored.
  58. Set `mark_as_read` to true to mark found mails as read.
  59. Each agent instance memorizes the highest UID of mails that are
  60. found in the last run for each watched folder, so even if you
  61. change a set of conditions so that it matches mails that are
  62. missed previously, or if you alter the flag status of already
  63. found mails, they will not show up as new events.
  64. Also, in order to avoid duplicated notification it keeps a list
  65. of Message-Id's of 100 most recent mails, so if multiple mails
  66. of the same Message-Id are found, you will only see one event
  67. out of them.
  68. MD
  69. event_description <<-MD
  70. Events look like this:
  71. {
  72. "folder": "INBOX",
  73. "subject": "...",
  74. "from": "Nanashi <nanashi.gombeh@example.jp>",
  75. "to": ["Jane <jane.doe@example.com>"],
  76. "cc": [],
  77. "date": "2014-05-10T03:47:20+0900",
  78. "mime_type": "text/plain",
  79. "body": "Hello,\n\n...",
  80. "matches": {
  81. }
  82. }
  83. MD
  84. IDCACHE_SIZE = 100
  85. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  86. if File.const_defined?(sym)
  87. flags | File.const_get(sym)
  88. else
  89. flags
  90. end
  91. }
  92. def working?
  93. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  94. end
  95. def default_options
  96. {
  97. 'expected_update_period_in_days' => "1",
  98. 'host' => 'imap.gmail.com',
  99. 'ssl' => true,
  100. 'username' => 'your.account',
  101. 'password' => 'your.password',
  102. 'folders' => %w[INBOX],
  103. 'conditions' => {}
  104. }
  105. end
  106. def validate_options
  107. %w[host username password].each { |key|
  108. String === options[key] or
  109. errors.add(:base, '%s is required and must be a string' % key)
  110. }
  111. if options['port'].present?
  112. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  113. end
  114. %w[ssl mark_as_read].each { |key|
  115. if options[key].present?
  116. if boolify(options[key]).nil?
  117. errors.add(:base, '%s must be a boolean value' % key)
  118. end
  119. end
  120. }
  121. case mime_types = options['mime_types']
  122. when nil
  123. when Array
  124. mime_types.all? { |mime_type|
  125. String === mime_type && mime_type.start_with?('text/')
  126. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  127. if mime_types.empty?
  128. errors.add(:base, 'mime_types should not be empty')
  129. end
  130. else
  131. errors.add(:base, 'mime_types must be an array')
  132. end
  133. case folders = options['folders']
  134. when nil
  135. when Array
  136. folders.all? { |folder|
  137. String === folder
  138. } or errors.add(:base, 'folders may only contain strings')
  139. if folders.empty?
  140. errors.add(:base, 'folders should not be empty')
  141. end
  142. else
  143. errors.add(:base, 'folders must be an array')
  144. end
  145. case conditions = options['conditions']
  146. when Hash
  147. conditions.each { |key, value|
  148. value.present? or next
  149. case key
  150. when 'subject', 'body'
  151. case value
  152. when String
  153. begin
  154. Regexp.new(value)
  155. rescue
  156. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  157. end
  158. else
  159. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  160. end
  161. when 'from', 'to', 'cc'
  162. Array(value).each { |pattern|
  163. case pattern
  164. when String
  165. begin
  166. glob_match?(pattern, '')
  167. rescue
  168. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  169. end
  170. else
  171. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  172. end
  173. }
  174. when 'is_unread', 'has_attachment'
  175. case boolify(value)
  176. when true, false
  177. else
  178. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  179. end
  180. end
  181. }
  182. else
  183. errors.add(:base, 'conditions must be a hash')
  184. end
  185. if options['expected_update_period_in_days'].present?
  186. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  187. end
  188. end
  189. def check
  190. each_unread_mail { |mail, notified|
  191. message_id = mail.message_id
  192. body_parts = mail.body_parts(mime_types)
  193. matched_part = nil
  194. matches = {}
  195. interpolated['conditions'].all? { |key, value|
  196. case key
  197. when 'subject'
  198. value.present? or next true
  199. re = Regexp.new(value)
  200. if m = re.match(mail.subject)
  201. m.names.each { |name|
  202. matches[name] = m[name]
  203. }
  204. true
  205. else
  206. false
  207. end
  208. when 'body'
  209. value.present? or next true
  210. re = Regexp.new(value)
  211. matched_part = body_parts.find { |part|
  212. if m = re.match(part.decoded)
  213. m.names.each { |name|
  214. matches[name] = m[name]
  215. }
  216. true
  217. else
  218. false
  219. end
  220. }
  221. when 'from', 'to', 'cc'
  222. value.present? or next true
  223. mail.header[key].addresses.any? { |address|
  224. Array(value).any? { |pattern|
  225. glob_match?(pattern, address)
  226. }
  227. }
  228. when 'has_attachment'
  229. boolify(value) == mail.has_attachment?
  230. when 'is_unread'
  231. true # already filtered out by each_unread_mail
  232. else
  233. log 'Unknown condition key ignored: %s' % key
  234. true
  235. end
  236. } or next
  237. if notified.include?(mail.message_id)
  238. log 'Ignoring mail: %s (already notified)' % message_id
  239. else
  240. matched_part ||= body_parts.first
  241. if matched_part
  242. mime_type = matched_part.mime_type
  243. body = matched_part.decoded
  244. else
  245. mime_type = 'text/plain'
  246. body = ''
  247. end
  248. log 'Emitting an event for mail: %s' % message_id
  249. create_event :payload => {
  250. 'folder' => mail.folder,
  251. 'subject' => mail.subject,
  252. 'from' => mail.from_addrs.first,
  253. 'to' => mail.to_addrs,
  254. 'cc' => mail.cc_addrs,
  255. 'date' => (mail.date.iso8601 rescue nil),
  256. 'mime_type' => mime_type,
  257. 'body' => body,
  258. 'matches' => matches,
  259. 'has_attachment' => mail.has_attachment?,
  260. }
  261. notified << mail.message_id if mail.message_id
  262. end
  263. if boolify(interpolated['mark_as_read'])
  264. log 'Marking as read'
  265. mail.mark_as_read
  266. end
  267. }
  268. end
  269. def each_unread_mail
  270. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  271. ssl = boolify(ssl)
  272. port = (Integer(port) if port.present?)
  273. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  274. Client.open(host, port, ssl) { |imap|
  275. log "Logging in as #{username}"
  276. imap.login(username, interpolated[:password])
  277. # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
  278. lastseen, seen = self.lastseen, self.make_seen
  279. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  280. # most recent notified mails.
  281. notified = self.notified
  282. interpolated['folders'].each { |folder|
  283. log "Selecting the folder: %s" % folder
  284. imap.select(folder)
  285. uidvalidity = imap.uidvalidity
  286. lastseenuid = lastseen[uidvalidity]
  287. if lastseenuid.nil?
  288. maxseq = imap.responses['EXISTS'].last
  289. log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
  290. if maxseq > 0
  291. seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
  292. end
  293. next
  294. end
  295. seen[uidvalidity] = lastseenuid
  296. is_unread = boolify(interpolated['conditions']['is_unread'])
  297. uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
  298. each_with_object([]) { |data, ret|
  299. uid, flags = data.attr.values_at('UID', 'FLAGS')
  300. seen[uidvalidity] = uid
  301. next if uid <= lastseenuid
  302. case is_unread
  303. when nil, !flags.include?(:Seen)
  304. ret << uid
  305. end
  306. }
  307. log pluralize(uids.size,
  308. case is_unread
  309. when true
  310. 'new unread mail'
  311. when false
  312. 'new read mail'
  313. else
  314. 'new mail'
  315. end)
  316. next if uids.empty?
  317. imap.uid_fetch_mails(uids).each { |mail|
  318. yield mail, notified
  319. }
  320. }
  321. self.notified = notified
  322. self.lastseen = seen
  323. save!
  324. }
  325. ensure
  326. log 'Connection closed'
  327. end
  328. def mime_types
  329. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  330. end
  331. def lastseen
  332. Seen.new(memory['lastseen'])
  333. end
  334. def lastseen= value
  335. memory.delete('seen') # obsolete key
  336. memory['lastseen'] = value
  337. end
  338. def make_seen
  339. Seen.new
  340. end
  341. def notified
  342. Notified.new(memory['notified'])
  343. end
  344. def notified= value
  345. memory['notified'] = value
  346. end
  347. private
  348. def is_positive_integer?(value)
  349. Integer(value) >= 0
  350. rescue
  351. false
  352. end
  353. def glob_match?(pattern, value)
  354. File.fnmatch?(pattern, value, FNM_FLAGS)
  355. end
  356. def pluralize(count, noun)
  357. "%d %s" % [count, noun.pluralize(count)]
  358. end
  359. class Client < ::Net::IMAP
  360. class << self
  361. def open(host, port, ssl)
  362. imap = new(host, port, ssl)
  363. yield imap
  364. ensure
  365. imap.disconnect unless imap.nil?
  366. end
  367. end
  368. attr_reader :uidvalidity
  369. def select(folder)
  370. ret = super(@folder = folder)
  371. @uidvalidity = responses['UIDVALIDITY'].last
  372. ret
  373. end
  374. def uid_fetch_mails(set)
  375. uid_fetch(set, 'RFC822.HEADER').map { |data|
  376. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  377. }
  378. end
  379. end
  380. class Seen < Hash
  381. def initialize(hash = nil)
  382. super()
  383. if hash
  384. # Deserialize a JSON hash which keys are strings
  385. hash.each { |uidvalidity, uid|
  386. self[uidvalidity.to_i] = uid
  387. }
  388. end
  389. end
  390. def []=(uidvalidity, uid)
  391. # Update only if the new value is larger than the current value
  392. if (curr = self[uidvalidity]).nil? || curr <= uid
  393. super
  394. end
  395. end
  396. end
  397. class Notified < Array
  398. def initialize(array = nil)
  399. super()
  400. replace(array) if array
  401. end
  402. def <<(value)
  403. slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
  404. super
  405. end
  406. end
  407. class Message < SimpleDelegator
  408. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  409. attr_reader :uid, :folder, :uidvalidity
  410. def initialize(client, fetch_data, props = {})
  411. @client = client
  412. props.each { |key, value|
  413. instance_variable_set(:"@#{key}", value)
  414. }
  415. attr = fetch_data.attr
  416. @uid = attr['UID']
  417. super(Mail.read_from_string(attr['RFC822.HEADER']))
  418. end
  419. def has_attachment?
  420. @has_attachment ||=
  421. begin
  422. data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  423. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  424. end
  425. end
  426. def fetch
  427. @parsed ||=
  428. begin
  429. data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  430. Mail.read_from_string(data.attr['BODY[]'])
  431. end
  432. end
  433. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  434. mail = fetch
  435. if mail.multipart?
  436. mail.body.set_sort_order(mime_types)
  437. mail.body.sort_parts!
  438. mail.all_parts
  439. else
  440. [mail]
  441. end.reject { |part|
  442. part.multipart? || part.attachment? || !part.text? ||
  443. !mime_types.include?(part.mime_type)
  444. }
  445. end
  446. def mark_as_read
  447. @client.uid_store(@uid, '+FLAGS', [:Seen])
  448. end
  449. private
  450. def struct_has_attachment?(struct)
  451. struct.multipart? && (
  452. struct.subtype == 'MIXED' ||
  453. struct.parts.any? { |part|
  454. struct_has_attachment?(part)
  455. }
  456. )
  457. end
  458. end
  459. end
  460. end