1
0

test_metrics.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. import math
  2. from unittest.mock import MagicMock
  3. import pytest
  4. import torch
  5. from aphrodite.spec_decode.metrics import AsyncMetricsCollector
  6. def test_initial_call_returns_none():
  7. """Expect first call to get metrics to return None.
  8. """
  9. spec_decode_sampler = MagicMock()
  10. spec_decode_sampler.num_accepted_tokens = torch.tensor(0,
  11. dtype=torch.long,
  12. device='cuda')
  13. spec_decode_sampler.num_emitted_tokens = torch.tensor(0,
  14. dtype=torch.long,
  15. device='cuda')
  16. spec_decode_sampler.num_draft_tokens = 0
  17. collector = AsyncMetricsCollector(spec_decode_sampler)
  18. collector.init_gpu_tensors(rank=0)
  19. maybe_metrics = collector.maybe_collect_rejsample_metrics(k=5)
  20. assert maybe_metrics is None
  21. def test_second_call_returns_metrics():
  22. """Expect second call to not return None.
  23. """
  24. spec_decode_sampler = MagicMock()
  25. spec_decode_sampler.num_accepted_tokens = torch.tensor(0,
  26. dtype=torch.long,
  27. device='cuda')
  28. spec_decode_sampler.num_emitted_tokens = torch.tensor(0,
  29. dtype=torch.long,
  30. device='cuda')
  31. spec_decode_sampler.num_draft_tokens = 0
  32. collect_interval_s = 5.0
  33. timer = MagicMock()
  34. timer.side_effect = [
  35. 0.0, collect_interval_s + 0.1, collect_interval_s + 0.2
  36. ]
  37. collector = AsyncMetricsCollector(spec_decode_sampler=spec_decode_sampler,
  38. timer=timer,
  39. collect_interval_s=collect_interval_s)
  40. collector.init_gpu_tensors(rank=0)
  41. _ = collector.maybe_collect_rejsample_metrics(k=5)
  42. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  43. assert metrics is not None
  44. @pytest.mark.parametrize("rank", [1, 2, 3, 4])
  45. def test_nonzero_rank_noop(rank):
  46. """Verify nonzero ranks don't collect metrics.
  47. """
  48. spec_decode_sampler = MagicMock()
  49. spec_decode_sampler.num_accepted_tokens = torch.tensor(0,
  50. dtype=torch.long,
  51. device='cuda')
  52. spec_decode_sampler.num_emitted_tokens = torch.tensor(0,
  53. dtype=torch.long,
  54. device='cuda')
  55. spec_decode_sampler.num_draft_tokens = 0
  56. collector = AsyncMetricsCollector(spec_decode_sampler)
  57. collector.init_gpu_tensors(rank=rank)
  58. _ = collector.maybe_collect_rejsample_metrics(k=5)
  59. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  60. assert metrics is None
  61. def test_noop_until_time():
  62. """Verify metrics aren't collected until enough time passes.
  63. """
  64. spec_decode_sampler = MagicMock()
  65. spec_decode_sampler.num_accepted_tokens = torch.tensor(0,
  66. dtype=torch.long,
  67. device='cuda')
  68. spec_decode_sampler.num_emitted_tokens = torch.tensor(0,
  69. dtype=torch.long,
  70. device='cuda')
  71. spec_decode_sampler.num_draft_tokens = 0
  72. collect_interval_s = 5.0
  73. timer = MagicMock()
  74. timer.side_effect = [
  75. 0.0, collect_interval_s - 0.1, collect_interval_s - 0.1,
  76. collect_interval_s + 0.1, collect_interval_s + 0.1
  77. ]
  78. collector = AsyncMetricsCollector(spec_decode_sampler=spec_decode_sampler,
  79. timer=timer,
  80. collect_interval_s=collect_interval_s)
  81. collector.init_gpu_tensors(rank=0)
  82. _ = collector.maybe_collect_rejsample_metrics(k=5)
  83. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  84. assert metrics is None
  85. _ = collector.maybe_collect_rejsample_metrics(k=5)
  86. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  87. assert metrics is not None
  88. def test_timer_is_reset():
  89. """Verify that the internal timer inside AsyncMetricsCollector
  90. is reset after collection.
  91. """
  92. spec_decode_sampler = MagicMock()
  93. spec_decode_sampler.num_accepted_tokens = torch.tensor(0,
  94. dtype=torch.long,
  95. device='cuda')
  96. spec_decode_sampler.num_emitted_tokens = torch.tensor(0,
  97. dtype=torch.long,
  98. device='cuda')
  99. spec_decode_sampler.num_draft_tokens = 0
  100. collect_interval_s = 5.0
  101. timer = MagicMock()
  102. timer.side_effect = [
  103. 0.0,
  104. collect_interval_s + 0.1,
  105. collect_interval_s + 0.1,
  106. collect_interval_s + 0.2,
  107. collect_interval_s + 0.2,
  108. 2 * collect_interval_s + 0.1,
  109. 2 * collect_interval_s + 0.1,
  110. ]
  111. collector = AsyncMetricsCollector(spec_decode_sampler=spec_decode_sampler,
  112. timer=timer,
  113. collect_interval_s=collect_interval_s)
  114. collector.init_gpu_tensors(rank=0)
  115. _ = collector.maybe_collect_rejsample_metrics(k=5)
  116. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  117. assert metrics is not None
  118. _ = collector.maybe_collect_rejsample_metrics(k=5)
  119. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  120. assert metrics is None
  121. _ = collector.maybe_collect_rejsample_metrics(k=5)
  122. metrics = collector.maybe_collect_rejsample_metrics(k=5)
  123. assert metrics is not None
  124. @pytest.mark.parametrize("has_data", [True, False])
  125. def test_initial_metrics_has_correct_values(has_data: bool):
  126. """Test correctness of metrics data.
  127. """
  128. if has_data:
  129. num_accepted_tokens = 103
  130. num_emitted_tokens = 104
  131. num_draft_tokens = 105
  132. else:
  133. num_accepted_tokens = 0
  134. num_emitted_tokens = 0
  135. num_draft_tokens = 0
  136. k = 5
  137. max_num_emitted_tokens = AsyncMetricsCollector.get_max_num_emitted_tokens(
  138. num_draft_tokens, k)
  139. spec_decode_sampler = MagicMock()
  140. spec_decode_sampler.num_accepted_tokens = torch.tensor(num_accepted_tokens,
  141. dtype=torch.long,
  142. device='cuda')
  143. spec_decode_sampler.num_emitted_tokens = torch.tensor(num_emitted_tokens,
  144. dtype=torch.long,
  145. device='cuda')
  146. spec_decode_sampler.num_draft_tokens = num_draft_tokens
  147. collect_interval_s = 5.0
  148. timer = MagicMock()
  149. timer.side_effect = [
  150. 0.0, collect_interval_s + 0.1, collect_interval_s + 0.2
  151. ]
  152. collector = AsyncMetricsCollector(spec_decode_sampler=spec_decode_sampler,
  153. timer=timer,
  154. collect_interval_s=collect_interval_s)
  155. collector.init_gpu_tensors(rank=0)
  156. _ = collector.maybe_collect_rejsample_metrics(k)
  157. metrics = collector.maybe_collect_rejsample_metrics(k)
  158. assert metrics.num_spec_tokens == k
  159. assert metrics.accepted_tokens == num_accepted_tokens
  160. assert metrics.draft_tokens == num_draft_tokens
  161. assert metrics.emitted_tokens == num_emitted_tokens
  162. if has_data:
  163. assert (metrics.draft_acceptance_rate == num_accepted_tokens /
  164. num_draft_tokens)
  165. assert (metrics.system_efficiency == num_emitted_tokens /
  166. max_num_emitted_tokens)
  167. else:
  168. assert math.isnan(metrics.draft_acceptance_rate)
  169. assert math.isnan(metrics.system_efficiency)