benchmark.py 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # ruff: noqa
  2. import random
  3. import string
  4. import timeit
  5. from typing import Callable
  6. import c_utils
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. # Import both implementations
  10. from utils import extract_intermediate_diff as py_diff
  11. from utils import find_all_indices as py_indices
  12. from utils import find_common_prefix as py_prefix
  13. from utils import find_common_suffix as py_suffix
  14. def generate_random_string(length: int) -> str:
  15. """Generate a random string of given length."""
  16. return ''.join(random.choices(string.ascii_letters + string.digits + '{}[]":', k=length))
  17. def benchmark_function(func: Callable, *args, number: int = 10000) -> float:
  18. """Measure execution time of a function."""
  19. return timeit.timeit(lambda: func(*args), number=number) / number
  20. def run_benchmarks():
  21. # Test cases with increasing sizes
  22. sizes = [10, 100, 1000, 10000]
  23. results = {
  24. 'prefix': {'python': [], 'c': []},
  25. 'suffix': {'python': [], 'c': []},
  26. 'diff': {'python': [], 'c': []},
  27. 'indices': {'python': [], 'c': []}
  28. }
  29. for size in sizes:
  30. # Generate test strings
  31. s1 = generate_random_string(size)
  32. s2 = s1[:size//2] + generate_random_string(size//2) # Share prefix
  33. text = "hello " * (size // 5) # For find_all_indices
  34. # Benchmark prefix
  35. py_time = benchmark_function(py_prefix, s1, s2)
  36. c_time = benchmark_function(c_utils.find_common_prefix, s1, s2)
  37. results['prefix']['python'].append(py_time)
  38. results['prefix']['c'].append(c_time)
  39. # Benchmark suffix
  40. py_time = benchmark_function(py_suffix, s1, s2)
  41. c_time = benchmark_function(c_utils.find_common_suffix, s1, s2)
  42. results['suffix']['python'].append(py_time)
  43. results['suffix']['c'].append(c_time)
  44. # Benchmark diff
  45. py_time = benchmark_function(py_diff, s1, s2)
  46. c_time = benchmark_function(c_utils.extract_intermediate_diff, s1, s2)
  47. results['diff']['python'].append(py_time)
  48. results['diff']['c'].append(c_time)
  49. # Benchmark indices
  50. py_time = benchmark_function(py_indices, text, "hello")
  51. c_time = benchmark_function(c_utils.find_all_indices, text, "hello")
  52. results['indices']['python'].append(py_time)
  53. results['indices']['c'].append(c_time)
  54. print(f"\nResults for size {size}:")
  55. for func_name in results:
  56. speedup = results[func_name]['python'][-1] / results[func_name]['c'][-1]
  57. print(f"{func_name}:")
  58. print(f" Python: {results[func_name]['python'][-1]*1e6:.2f} µs")
  59. print(f" C: {results[func_name]['c'][-1]*1e6:.2f} µs")
  60. print(f" Speedup: {speedup:.2f}x")
  61. # Plot results
  62. fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
  63. plots = {
  64. 'prefix': ax1,
  65. 'suffix': ax2,
  66. 'diff': ax3,
  67. 'indices': ax4
  68. }
  69. for func_name, ax in plots.items():
  70. ax.plot(sizes, results[func_name]['python'], 'b-', label='Python')
  71. ax.plot(sizes, results[func_name]['c'], 'r-', label='C')
  72. ax.set_title(f'{func_name} Performance')
  73. ax.set_xlabel('Input Size')
  74. ax.set_ylabel('Time (seconds)')
  75. ax.set_xscale('log')
  76. ax.set_yscale('log')
  77. ax.legend()
  78. ax.grid(True)
  79. plt.tight_layout()
  80. plt.savefig('benchmark_results.png')
  81. plt.close()
  82. if __name__ == "__main__":
  83. run_benchmarks()