feedjira_extension.rb 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. require 'feedjira'
  2. require 'digest'
  3. require 'mail'
  4. module FeedjiraExtension
  5. AUTHOR_ATTRS = %i[name email uri]
  6. LINK_ATTRS = %i[href rel type hreflang title length]
  7. ENCLOSURE_ATTRS = %i[url type length]
  8. class Author < Struct.new(*AUTHOR_ATTRS)
  9. def empty?
  10. all?(&:nil?)
  11. end
  12. def to_json(options = nil)
  13. members.flat_map { |key|
  14. if value = self[key].presence
  15. case key
  16. when :email
  17. "<#{value}>"
  18. when :uri
  19. "(#{value})"
  20. else
  21. value
  22. end
  23. else
  24. []
  25. end
  26. }.join(' ').to_json(options)
  27. end
  28. end
  29. class AtomAuthor < Author
  30. include SAXMachine
  31. AUTHOR_ATTRS.each do |attr|
  32. element attr
  33. end
  34. end
  35. class RssAuthor < Author
  36. include SAXMachine
  37. def content=(content)
  38. @content = content
  39. begin
  40. addr = Mail::Address.new(content)
  41. rescue
  42. self.name = content
  43. else
  44. self.name = addr.name rescue nil
  45. self.email = addr.address rescue nil
  46. end
  47. end
  48. value :content
  49. end
  50. class Enclosure
  51. include SAXMachine
  52. ENCLOSURE_ATTRS.each do |attr|
  53. attribute attr
  54. end
  55. def to_json(options = nil)
  56. ENCLOSURE_ATTRS.each_with_object({}) { |key, hash|
  57. if value = __send__(key)
  58. hash[key] = value
  59. end
  60. }.to_json(options)
  61. end
  62. end
  63. class AtomLink
  64. include SAXMachine
  65. LINK_ATTRS.each do |attr|
  66. attribute attr
  67. end
  68. def to_json(options = nil)
  69. LINK_ATTRS.each_with_object({}) { |key, hash|
  70. if value = __send__(key)
  71. hash[key] = value
  72. end
  73. }.to_json(options)
  74. end
  75. end
  76. class RssLinkElement
  77. include SAXMachine
  78. value :href
  79. def to_json(options = nil)
  80. {
  81. href: href
  82. }.to_json(options)
  83. end
  84. end
  85. module HasAuthors
  86. def self.included(mod)
  87. mod.module_exec do
  88. case name
  89. when /RSS/
  90. %w[
  91. itunes:author
  92. dc:creator
  93. author
  94. managingEditor
  95. ].each do |name|
  96. sax_config.top_level_elements[name].clear
  97. elements name, class: RssAuthor, as: :_authors
  98. end
  99. else
  100. elements :author, class: AtomAuthor, as: :_authors
  101. end
  102. def authors
  103. _authors.reject(&:empty?)
  104. end
  105. def alternate_link
  106. links.find { |link|
  107. link.is_a?(AtomLink) &&
  108. link.rel == 'alternate' &&
  109. (link.type == 'text/html'|| link.type.nil?)
  110. }
  111. end
  112. def url
  113. @url ||= (alternate_link || links.first).try!(:href)
  114. end
  115. end
  116. end
  117. end
  118. module HasEnclosure
  119. def self.included(mod)
  120. mod.module_exec do
  121. sax_config.top_level_elements['enclosure'].clear
  122. element :enclosure, class: Enclosure
  123. def image_enclosure
  124. case enclosure.try!(:type)
  125. when %r{\Aimage/}
  126. enclosure
  127. end
  128. end
  129. def image
  130. @image ||= image_enclosure.try!(:url)
  131. end
  132. end
  133. end
  134. end
  135. module HasLinks
  136. def self.included(mod)
  137. mod.module_exec do
  138. sax_config.top_level_elements['link'].clear
  139. sax_config.collection_elements['link'].clear
  140. case name
  141. when /RSS/
  142. elements :link, class: RssLinkElement, as: :rss_links
  143. case name
  144. when /FeedBurner/
  145. elements :'atok10:link', class: AtomLink, as: :atom_links
  146. def links
  147. @links ||= [*rss_links, *atom_links]
  148. end
  149. else
  150. alias_method :links, :rss_links
  151. end
  152. else
  153. elements :link, class: AtomLink, as: :links
  154. end
  155. def alternate_link
  156. links.find { |link|
  157. link.is_a?(AtomLink) &&
  158. link.rel == 'alternate' &&
  159. (link.type == 'text/html'|| link.type.nil?)
  160. }
  161. end
  162. def url
  163. @url ||= (alternate_link || links.first).try!(:href)
  164. end
  165. end
  166. end
  167. end
  168. module HasTimestamps
  169. attr_reader :published, :updated
  170. # Keep the "oldest" publish time found
  171. def published=(value)
  172. parsed = parse_datetime(value)
  173. @published = parsed if !@published || parsed < @published
  174. end
  175. # Keep the most recent update time found
  176. def updated=(value)
  177. parsed = parse_datetime(value)
  178. @updated = parsed if !@updated || parsed > @updated
  179. end
  180. def date_published
  181. published.try(:iso8601)
  182. end
  183. def last_updated
  184. (updated || published).try(:iso8601)
  185. end
  186. private
  187. def parse_datetime(string)
  188. DateTime.parse(string) rescue nil
  189. end
  190. end
  191. module FeedEntryExtensions
  192. def self.included(mod)
  193. mod.module_exec do
  194. include HasAuthors
  195. include HasEnclosure
  196. include HasLinks
  197. include HasTimestamps
  198. end
  199. end
  200. def id
  201. entry_id || Digest::MD5.hexdigest(content || summary || '')
  202. end
  203. end
  204. module FeedExtensions
  205. def self.included(mod)
  206. mod.module_exec do
  207. include HasAuthors
  208. include HasEnclosure
  209. include HasLinks
  210. include HasTimestamps
  211. element :id, as: :feed_id
  212. element :generator
  213. elements :rights
  214. element :published
  215. element :updated
  216. element :icon
  217. if /RSS/ === name
  218. element :guid, as: :feed_id
  219. element :copyright
  220. element :pubDate, as: :published
  221. element :'dc:date', as: :published
  222. element :lastBuildDate, as: :updated
  223. element :image, value: :url, as: :icon
  224. def copyright
  225. @copyright || super
  226. end
  227. end
  228. sax_config.collection_elements.each_value do |collection_elements|
  229. collection_elements.each do |collection_element|
  230. collection_element.accessor == 'entries' &&
  231. (entry_class = collection_element.data_class).is_a?(Class) or next
  232. entry_class.send :include, FeedEntryExtensions
  233. end
  234. end
  235. end
  236. end
  237. def copyright
  238. rights.join("\n").presence
  239. end
  240. end
  241. Feedjira::Feed.feed_classes.each do |feed_class|
  242. feed_class.send :include, FeedExtensions
  243. end
  244. end