1
0

human_task_agent.rb 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. module Agents
  2. class HumanTaskAgent < Agent
  3. default_schedule "every_10m"
  4. gem_dependency_check { defined?(RTurk) }
  5. description <<~MD
  6. The Human Task Agent is used to create Human Intelligence Tasks (HITs) on Mechanical Turk.
  7. #{'## Include `rturk` in your Gemfile to use this Agent!' if dependencies_missing?}
  8. HITs can be created in response to events, or on a schedule. Set `trigger_on` to either `schedule` or `event`.
  9. # Schedule
  10. The schedule of this Agent is how often it should check for completed HITs, __NOT__ how often to submit one. To configure how often a new HIT
  11. should be submitted when in `schedule` mode, set `submission_period` to a number of hours.
  12. # Example
  13. If created with an event, all HIT fields can contain interpolated values via [liquid templating](https://github.com/huginn/huginn/wiki/Formatting-Events-using-Liquid).
  14. For example, if the incoming event was a Twitter event, you could make a HITT to rate its sentiment like this:
  15. {
  16. "expected_receive_period_in_days": 2,
  17. "trigger_on": "event",
  18. "hit": {
  19. "assignments": 1,
  20. "title": "Sentiment evaluation",
  21. "description": "Please rate the sentiment of this message: '{{message}}'",
  22. "reward": 0.05,
  23. "lifetime_in_seconds": "3600",
  24. "questions": [
  25. {
  26. "type": "selection",
  27. "key": "sentiment",
  28. "name": "Sentiment",
  29. "required": "true",
  30. "question": "Please select the best sentiment value:",
  31. "selections": [
  32. { "key": "happy", "text": "Happy" },
  33. { "key": "sad", "text": "Sad" },
  34. { "key": "neutral", "text": "Neutral" }
  35. ]
  36. },
  37. {
  38. "type": "free_text",
  39. "key": "feedback",
  40. "name": "Have any feedback for us?",
  41. "required": "false",
  42. "question": "Feedback",
  43. "default": "Type here...",
  44. "min_length": "2",
  45. "max_length": "2000"
  46. }
  47. ]
  48. }
  49. }
  50. As you can see, you configure the created HIT with the `hit` option. Required fields are `title`, which is the
  51. title of the created HIT, `description`, which is the description of the HIT, and `questions` which is an array of
  52. questions. Questions can be of `type` _selection_ or _free\\_text_. Both types require the `key`, `name`, `required`,
  53. `type`, and `question` configuration options. Additionally, _selection_ requires a `selections` array of options, each of
  54. which contain `key` and `text`. For _free\\_text_, the special configuration options are all optional, and are
  55. `default`, `min_length`, and `max_length`.
  56. By default, all answers are emitted in a single event. If you'd like separate events for each answer, set `separate_answers` to `true`.
  57. # Combining answers
  58. There are a couple of ways to combine HITs that have multiple `assignments`, all of which involve setting `combination_mode` at the top level.
  59. ## Taking the majority
  60. Option 1: if all of your `questions` are of `type` _selection_, you can set `combination_mode` to `take_majority`.
  61. This will cause the Agent to automatically select the majority vote for each question across all `assignments` and return it as `majority_answer`.
  62. If all selections are numeric, an `average_answer` will also be generated.
  63. Option 2: you can have the Agent ask additional human workers to rank the `assignments` and return the most highly ranked answer.
  64. To do this, set `combination_mode` to `poll` and provide a `poll_options` object. Here is an example:
  65. {
  66. "trigger_on": "schedule",
  67. "submission_period": 12,
  68. "combination_mode": "poll",
  69. "poll_options": {
  70. "title": "Take a poll about some jokes",
  71. "instructions": "Please rank these jokes from most funny (5) to least funny (1)",
  72. "assignments": 3,
  73. "row_template": "{{joke}}"
  74. },
  75. "hit": {
  76. "assignments": 5,
  77. "title": "Tell a joke",
  78. "description": "Please tell me a joke",
  79. "reward": 0.05,
  80. "lifetime_in_seconds": "3600",
  81. "questions": [
  82. {
  83. "type": "free_text",
  84. "key": "joke",
  85. "name": "Your joke",
  86. "required": "true",
  87. "question": "Joke",
  88. "min_length": "2",
  89. "max_length": "2000"
  90. }
  91. ]
  92. }
  93. }
  94. Resulting events will have the original `answers`, as well as the `poll` results, and a field called `best_answer` that contains the best answer as determined by the poll. (Note that `separate_answers` won't work when doing a poll.)
  95. # Other settings
  96. `lifetime_in_seconds` is the number of seconds a HIT is left on Amazon before it's automatically closed. The default is 1 day.
  97. As with most Agents, `expected_receive_period_in_days` is required if `trigger_on` is set to `event`.
  98. MD
  99. event_description <<~MD
  100. Events look like:
  101. {
  102. "answers": [
  103. {
  104. "feedback": "Hello!",
  105. "sentiment": "happy"
  106. }
  107. ]
  108. }
  109. MD
  110. def validate_options
  111. options['hit'] ||= {}
  112. options['hit']['questions'] ||= []
  113. errors.add(
  114. :base, "'trigger_on' must be one of 'schedule' or 'event'"
  115. ) unless %w[schedule event].include?(options['trigger_on'])
  116. errors.add(
  117. :base,
  118. "'hit.assignments' should specify the number of HIT assignments to create"
  119. ) unless options['hit']['assignments'].present? &&
  120. options['hit']['assignments'].to_i > 0
  121. errors.add(:base, "'hit.title' must be provided") unless options['hit']['title'].present?
  122. errors.add(:base, "'hit.description' must be provided") unless options['hit']['description'].present?
  123. errors.add(:base, "'hit.questions' must be provided") unless options['hit']['questions'].present?
  124. if options['trigger_on'] == "event"
  125. errors.add(
  126. :base,
  127. "'expected_receive_period_in_days' is required when 'trigger_on' is set to 'event'"
  128. ) unless options['expected_receive_period_in_days'].present?
  129. elsif options['trigger_on'] == "schedule"
  130. errors.add(
  131. :base,
  132. "'submission_period' must be set to a positive number of hours when 'trigger_on' is set to 'schedule'"
  133. ) unless options['submission_period'].present? &&
  134. options['submission_period'].to_i > 0
  135. end
  136. if options['hit']['questions'].any? { |question|
  137. %w[key name required type question].any? { |k| question[k].blank? }
  138. }
  139. errors.add(:base, "all questions must set 'key', 'name', 'required', 'type', and 'question'")
  140. end
  141. if options['hit']['questions'].any? { |question|
  142. question['type'] == "selection" && (
  143. question['selections'].blank? ||
  144. question['selections'].any? { |s| s['key'].blank? || s['text'].blank? }
  145. )
  146. }
  147. errors.add(:base,
  148. "all questions of type 'selection' must have a selections array with selections that set 'key' and 'name'")
  149. end
  150. if take_majority? && options['hit']['questions'].any? { |question| question['type'] != "selection" }
  151. errors.add(:base, "all questions must be of type 'selection' to use the 'take_majority' option")
  152. end
  153. if create_poll?
  154. errors.add(
  155. :base,
  156. "poll_options is required when combination_mode is set to 'poll' and must have the keys 'title', 'instructions', 'row_template', and 'assignments'"
  157. ) unless options['poll_options'].is_a?(Hash) &&
  158. options['poll_options']['title'].present? &&
  159. options['poll_options']['instructions'].present? &&
  160. options['poll_options']['row_template'].present? &&
  161. options['poll_options']['assignments'].to_i > 0
  162. end
  163. end
  164. def default_options
  165. {
  166. 'expected_receive_period_in_days' => 2,
  167. 'trigger_on' => "event",
  168. 'hit' =>
  169. {
  170. 'assignments' => 1,
  171. 'title' => "Sentiment evaluation",
  172. 'description' => "Please rate the sentiment of this message: '{{message}}'",
  173. 'reward' => 0.05,
  174. 'lifetime_in_seconds' => 24 * 60 * 60,
  175. 'questions' =>
  176. [
  177. {
  178. 'type' => "selection",
  179. 'key' => "sentiment",
  180. 'name' => "Sentiment",
  181. 'required' => "true",
  182. 'question' => "Please select the best sentiment value:",
  183. 'selections' =>
  184. [
  185. { 'key' => "happy", 'text' => "Happy" },
  186. { 'key' => "sad", 'text' => "Sad" },
  187. { 'key' => "neutral", 'text' => "Neutral" }
  188. ]
  189. },
  190. {
  191. 'type' => "free_text",
  192. 'key' => "feedback",
  193. 'name' => "Have any feedback for us?",
  194. 'required' => "false",
  195. 'question' => "Feedback",
  196. 'default' => "Type here...",
  197. 'min_length' => "2",
  198. 'max_length' => "2000"
  199. }
  200. ]
  201. }
  202. }
  203. end
  204. def working?
  205. last_receive_at && last_receive_at > interpolated['expected_receive_period_in_days'].to_i.days.ago && !recent_error_logs?
  206. end
  207. def check
  208. review_hits
  209. if interpolated['trigger_on'] == "schedule" && (memory['last_schedule'] || 0) <= Time.now.to_i - interpolated['submission_period'].to_i * 60 * 60
  210. memory['last_schedule'] = Time.now.to_i
  211. create_basic_hit
  212. end
  213. end
  214. def receive(incoming_events)
  215. if interpolated['trigger_on'] == "event"
  216. incoming_events.each do |event|
  217. create_basic_hit event
  218. end
  219. end
  220. end
  221. protected
  222. if defined?(RTurk)
  223. def take_majority?
  224. interpolated['combination_mode'] == "take_majority" || interpolated['take_majority'] == "true"
  225. end
  226. def create_poll?
  227. interpolated['combination_mode'] == "poll"
  228. end
  229. def event_for_hit(hit_id)
  230. if memory['hits'][hit_id].is_a?(Hash)
  231. Event.find_by_id(memory['hits'][hit_id]['event_id'])
  232. else
  233. nil
  234. end
  235. end
  236. def hit_type(hit_id)
  237. if memory['hits'][hit_id].is_a?(Hash) && memory['hits'][hit_id]['type']
  238. memory['hits'][hit_id]['type']
  239. else
  240. 'user'
  241. end
  242. end
  243. def review_hits
  244. reviewable_hit_ids = RTurk::GetReviewableHITs.create.hit_ids
  245. my_reviewed_hit_ids = reviewable_hit_ids & (memory['hits'] || {}).keys
  246. if reviewable_hit_ids.length > 0
  247. log "MTurk reports #{reviewable_hit_ids.length} HITs, of which I own [#{my_reviewed_hit_ids.to_sentence}]"
  248. end
  249. my_reviewed_hit_ids.each do |hit_id|
  250. hit = RTurk::Hit.new(hit_id)
  251. assignments = hit.assignments
  252. log "Looking at HIT #{hit_id}. I found #{assignments.length} assignments#{" with the statuses: #{assignments.map(&:status).to_sentence}" if assignments.length > 0}"
  253. next unless assignments.length == hit.max_assignments &&
  254. assignments.all? { |assignment|
  255. assignment.status == "Submitted"
  256. }
  257. inbound_event = event_for_hit(hit_id)
  258. if hit_type(hit_id) == 'poll'
  259. # handle completed polls
  260. log "Handling a poll: #{hit_id}"
  261. scores = {}
  262. assignments.each do |assignment|
  263. assignment.answers.each do |index, rating|
  264. scores[index] ||= 0
  265. scores[index] += rating.to_i
  266. end
  267. end
  268. top_answer = scores.to_a.sort { |b, a| a.last <=> b.last }.first.first
  269. payload = {
  270. 'answers' => memory['hits'][hit_id]['answers'],
  271. 'poll' => assignments.map(&:answers),
  272. 'best_answer' => memory['hits'][hit_id]['answers'][top_answer.to_i - 1]
  273. }
  274. event = create_event(payload:)
  275. log("Event emitted with answer(s) for poll", outbound_event: event, inbound_event:)
  276. else
  277. # handle normal completed HITs
  278. payload = { 'answers' => assignments.map(&:answers) }
  279. if take_majority?
  280. counts = {}
  281. options['hit']['questions'].each do |question|
  282. question_counts = question['selections'].each_with_object({}) { |selection, memo|
  283. memo[selection['key']] = 0
  284. }
  285. assignments.each do |assignment|
  286. answers = ActiveSupport::HashWithIndifferentAccess.new(assignment.answers)
  287. answer = answers[question['key']]
  288. question_counts[answer] += 1
  289. end
  290. counts[question['key']] = question_counts
  291. end
  292. payload['counts'] = counts
  293. majority_answer = counts.each_with_object({}) do |(key, question_counts), memo|
  294. memo[key] = question_counts.to_a.sort_by(&:last).last.first
  295. end
  296. payload['majority_answer'] = majority_answer
  297. if all_questions_are_numeric?
  298. average_answer = counts.each_with_object({}) do |(key, question_counts), memo|
  299. sum = divisor = 0
  300. question_counts.to_a.each do |num, count|
  301. sum += num.to_s.to_f * count
  302. divisor += count
  303. end
  304. memo[key] = sum / divisor.to_f
  305. end
  306. payload['average_answer'] = average_answer
  307. end
  308. end
  309. if create_poll?
  310. questions = []
  311. selections = 5.times.map { |i| { 'key' => i + 1, 'text' => i + 1 } }.reverse
  312. assignments.length.times do |index|
  313. questions << {
  314. 'type' => "selection",
  315. 'name' => "Item #{index + 1}",
  316. 'key' => index,
  317. 'required' => "true",
  318. 'question' => interpolate_string(options['poll_options']['row_template'],
  319. assignments[index].answers),
  320. 'selections' => selections
  321. }
  322. end
  323. poll_hit = create_hit(
  324. 'title' => options['poll_options']['title'],
  325. 'description' => options['poll_options']['instructions'],
  326. 'questions' => questions,
  327. 'assignments' => options['poll_options']['assignments'],
  328. 'lifetime_in_seconds' => options['poll_options']['lifetime_in_seconds'],
  329. 'reward' => options['poll_options']['reward'],
  330. 'payload' => inbound_event && inbound_event.payload,
  331. 'metadata' => {
  332. 'type' => 'poll',
  333. 'original_hit' => hit_id,
  334. 'answers' => assignments.map(&:answers),
  335. 'event_id' => inbound_event && inbound_event.id
  336. }
  337. )
  338. log(
  339. "Poll HIT created with ID #{poll_hit.id} and URL #{poll_hit.url}. Original HIT: #{hit_id}",
  340. inbound_event:
  341. )
  342. elsif options[:separate_answers]
  343. payload['answers'].each.with_index do |answer, index|
  344. sub_payload = payload.dup
  345. sub_payload.delete('answers')
  346. sub_payload['answer'] = answer
  347. event = create_event payload: sub_payload
  348. log("Event emitted with answer ##{index}", outbound_event: event, inbound_event:)
  349. end
  350. else
  351. event = create_event(payload:)
  352. log("Event emitted with answer(s)", outbound_event: event, inbound_event:)
  353. end
  354. end
  355. assignments.each(&:approve!)
  356. hit.dispose!
  357. memory['hits'].delete(hit_id)
  358. end
  359. end
  360. def all_questions_are_numeric?
  361. interpolated['hit']['questions'].all? do |question|
  362. question['selections'].all? do |selection|
  363. value = selection['key']
  364. value == value.to_f.to_s || value == value.to_i.to_s
  365. end
  366. end
  367. end
  368. def create_basic_hit(event = nil)
  369. hit = create_hit(
  370. 'title' => options['hit']['title'],
  371. 'description' => options['hit']['description'],
  372. 'questions' => options['hit']['questions'],
  373. 'assignments' => options['hit']['assignments'],
  374. 'lifetime_in_seconds' => options['hit']['lifetime_in_seconds'],
  375. 'reward' => options['hit']['reward'],
  376. 'payload' => event && event.payload,
  377. 'metadata' => { 'event_id' => event && event.id }
  378. )
  379. log("HIT created with ID #{hit.id} and URL #{hit.url}", inbound_event: event)
  380. end
  381. def create_hit(opts = {})
  382. payload = opts['payload'] || {}
  383. title = interpolate_string(opts['title'], payload).strip
  384. description = interpolate_string(opts['description'], payload).strip
  385. questions = interpolate_options(opts['questions'], payload)
  386. hit = RTurk::Hit.create(title:) do |hit|
  387. hit.max_assignments = (opts['assignments'] || 1).to_i
  388. hit.description = description
  389. hit.lifetime = (opts['lifetime_in_seconds'] || 24 * 60 * 60).to_i
  390. hit.question_form AgentQuestionForm.new(title:, description:, questions:)
  391. hit.reward = (opts['reward'] || 0.05).to_f
  392. # hit.qualifications.add :approval_rate, { gt: 80 }
  393. end
  394. memory['hits'] ||= {}
  395. memory['hits'][hit.id] = opts['metadata'] || {}
  396. hit
  397. end
  398. # RTurk Question Form
  399. class AgentQuestionForm < RTurk::QuestionForm
  400. needs :title, :description, :questions
  401. def question_form_content
  402. Overview do
  403. Title do
  404. text @title
  405. end
  406. Text do
  407. text @description
  408. end
  409. end
  410. @questions.each.with_index do |question, index|
  411. Question do
  412. QuestionIdentifier do
  413. text question['key'] || "question_#{index}"
  414. end
  415. DisplayName do
  416. text question['name'] || "Question ##{index}"
  417. end
  418. IsRequired do
  419. text question['required'] || 'true'
  420. end
  421. QuestionContent do
  422. Text do
  423. text question['question']
  424. end
  425. end
  426. AnswerSpecification do
  427. if question['type'] == "selection"
  428. SelectionAnswer do
  429. StyleSuggestion do
  430. text 'radiobutton'
  431. end
  432. Selections do
  433. question['selections'].each do |selection|
  434. Selection do
  435. SelectionIdentifier do
  436. text selection['key']
  437. end
  438. Text do
  439. text selection['text']
  440. end
  441. end
  442. end
  443. end
  444. end
  445. else
  446. FreeTextAnswer do
  447. if question['min_length'].present? || question['max_length'].present?
  448. Constraints do
  449. lengths = {}
  450. lengths['minLength'] = question['min_length'].to_s if question['min_length'].present?
  451. lengths['maxLength'] = question['max_length'].to_s if question['max_length'].present?
  452. Length lengths
  453. end
  454. end
  455. if question['default'].present?
  456. DefaultText do
  457. text question['default']
  458. end
  459. end
  460. end
  461. end
  462. end
  463. end
  464. end
  465. end
  466. end
  467. end
  468. end
  469. end