feedjira_extension.rb 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. require 'feedjira'
  2. require 'digest'
  3. require 'mail'
  4. module FeedjiraExtension
  5. AUTHOR_ATTRS = %i[name email uri]
  6. LINK_ATTRS = %i[href rel type hreflang title length]
  7. ENCLOSURE_ATTRS = %i[url type length]
  8. class Author < Struct.new(*AUTHOR_ATTRS)
  9. def empty?
  10. all?(&:nil?)
  11. end
  12. def to_json(options = nil)
  13. each_pair.flat_map { |key, value|
  14. if value.presence
  15. case key
  16. when :email
  17. "<#{value}>"
  18. when :uri
  19. "(#{value})"
  20. else
  21. value
  22. end
  23. else
  24. []
  25. end
  26. }.join(' ').to_json(options)
  27. end
  28. end
  29. class AtomAuthor < Author
  30. include SAXMachine
  31. AUTHOR_ATTRS.each do |attr|
  32. element attr
  33. end
  34. end
  35. class RssAuthor < Author
  36. include SAXMachine
  37. def content=(content)
  38. @content = content
  39. begin
  40. addr = Mail::Address.new(content)
  41. rescue
  42. self.name = content
  43. else
  44. self.name = addr.name rescue nil
  45. self.email = addr.address rescue nil
  46. end
  47. end
  48. value :content
  49. end
  50. class ITunesRssOwner < Author
  51. include SAXMachine
  52. element :'itunes:name', as: :name
  53. element :'itunes:email', as: :email
  54. end
  55. class Enclosure
  56. include SAXMachine
  57. ENCLOSURE_ATTRS.each do |attr|
  58. attribute attr
  59. end
  60. def to_json(options = nil)
  61. ENCLOSURE_ATTRS.each_with_object({}) { |key, hash|
  62. if value = __send__(key)
  63. hash[key] = value
  64. end
  65. }.to_json(options)
  66. end
  67. end
  68. class AtomLink
  69. include SAXMachine
  70. LINK_ATTRS.each do |attr|
  71. attribute attr
  72. end
  73. def empty?
  74. LINK_ATTRS.all? { |attr|
  75. __send__(attr).nil?
  76. }
  77. end
  78. def to_json(options = nil)
  79. LINK_ATTRS.each_with_object({}) { |key, hash|
  80. if value = __send__(key)
  81. hash[key] = value
  82. end
  83. }.to_json(options)
  84. end
  85. end
  86. class RssLinkElement
  87. include SAXMachine
  88. value :href
  89. def empty?
  90. !href.is_a?(String)
  91. end
  92. def to_json(options = nil)
  93. case href
  94. when String
  95. { href: href }
  96. else
  97. # Ignore non-string values, because SaxMachine leaks its
  98. # internal value :no_buffer when the content of an element
  99. # is empty.
  100. {}
  101. end.to_json(options)
  102. end
  103. end
  104. module HasAuthors
  105. def self.included(mod)
  106. mod.module_exec do
  107. case name
  108. when /RSS/
  109. %w[
  110. itunes:author
  111. dc:creator
  112. author
  113. managingEditor
  114. ].each do |name|
  115. sax_config.top_level_elements[name].clear
  116. elements name, class: RssAuthor, as: :_authors
  117. end
  118. else
  119. elements :author, class: AtomAuthor, as: :_authors
  120. end
  121. def authors
  122. _authors.reject(&:empty?)
  123. end
  124. end
  125. end
  126. end
  127. module HasEnclosure
  128. def self.included(mod)
  129. mod.module_exec do
  130. sax_config.top_level_elements['enclosure'].clear
  131. element :enclosure, class: Enclosure
  132. def image_enclosure
  133. case enclosure.try!(:type)
  134. when %r{\Aimage/}
  135. enclosure
  136. end
  137. end
  138. def image
  139. @image ||= image_enclosure.try!(:url)
  140. end
  141. end
  142. end
  143. end
  144. module HasLinks
  145. def self.included(mod)
  146. mod.module_exec do
  147. sax_config.top_level_elements['link'].clear
  148. sax_config.collection_elements['link'].clear
  149. case name
  150. when /RSS/
  151. elements :link, class: RssLinkElement, as: :rss_links
  152. case name
  153. when /FeedBurner/
  154. elements :'atok10:link', class: AtomLink, as: :atom_links
  155. def _links
  156. [*rss_links, *atom_links]
  157. end
  158. else
  159. alias_method :_links, :rss_links
  160. end
  161. prepend(
  162. Module.new {
  163. def url
  164. super || (alternate_link || links.first).try!(:href)
  165. end
  166. }
  167. )
  168. when /Atom/
  169. elements :link, class: AtomLink, as: :_links
  170. def url
  171. (alternate_link || links.first).try!(:href)
  172. end
  173. end
  174. def links
  175. _links.reject(&:empty?)
  176. end
  177. def alternate_link
  178. links.find { |link|
  179. link.is_a?(AtomLink) &&
  180. link.rel == 'alternate' &&
  181. (link.type == 'text/html'|| link.type.nil?)
  182. }
  183. end
  184. end
  185. end
  186. end
  187. module HasTimestamps
  188. attr_reader :published, :updated
  189. # Keep the "oldest" publish time found
  190. def published=(value)
  191. parsed = parse_datetime(value)
  192. @published = parsed if !@published || parsed < @published
  193. end
  194. # Keep the most recent update time found
  195. def updated=(value)
  196. parsed = parse_datetime(value)
  197. @updated = parsed if !@updated || parsed > @updated
  198. end
  199. def date_published
  200. published.try(:iso8601)
  201. end
  202. def last_updated
  203. (updated || published).try(:iso8601)
  204. end
  205. private
  206. def parse_datetime(string)
  207. DateTime.parse(string) rescue nil
  208. end
  209. end
  210. module FeedEntryExtensions
  211. def self.included(mod)
  212. mod.module_exec do
  213. include HasAuthors
  214. include HasEnclosure
  215. include HasLinks
  216. include HasTimestamps
  217. end
  218. end
  219. def id
  220. entry_id || @dc_identifier || Digest::MD5.hexdigest(content || summary || '')
  221. end
  222. end
  223. module FeedExtensions
  224. def self.included(mod)
  225. mod.module_exec do
  226. include HasAuthors
  227. include HasEnclosure
  228. include HasLinks
  229. include HasTimestamps
  230. element :id, as: :feed_id
  231. element :generator
  232. elements :rights
  233. element :published
  234. element :updated
  235. element :icon
  236. if /RSS/ === name
  237. element :guid, as: :feed_id
  238. element :copyright
  239. element :pubDate, as: :published
  240. element :'dc:date', as: :published
  241. element :lastBuildDate, as: :updated
  242. element :image, value: :url, as: :icon
  243. def copyright
  244. @copyright || super
  245. end
  246. if /ITunes/ === name
  247. sax_config.collection_elements['itunes:owner'].clear
  248. elements :"itunes:owner", as: :_itunes_owners, class: ITunesRssOwner
  249. private :_itunes_owners
  250. def itunes_owners
  251. _itunes_owners.reject(&:empty?)
  252. end
  253. end
  254. else
  255. element :subtitle, as: :description unless method_defined?(:description)
  256. end
  257. sax_config.collection_elements.each_value do |collection_elements|
  258. collection_elements.each do |collection_element|
  259. collection_element.accessor == 'entries' &&
  260. (entry_class = collection_element.data_class).is_a?(Class) or next
  261. entry_class.send :include, FeedEntryExtensions
  262. end
  263. end
  264. end
  265. end
  266. def copyright
  267. rights.join("\n").presence
  268. end
  269. end
  270. Feedjira.parsers.each do |feed_class|
  271. feed_class.send :include, FeedExtensions
  272. end
  273. end