utils.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. def find_common_prefix(s1: str, s2: str) -> str:
  2. """
  3. Finds a common prefix that is shared between two strings, if there is one.
  4. Order of arguments is NOT important.
  5. This function is provided as a UTILITY for extracting information from JSON
  6. generated by partial_json_parser, to help in ensuring that the right tokens
  7. are returned in streaming, so that close-quotes, close-brackets and
  8. close-braces are not returned prematurely.
  9. e.g. find_common_prefix('{"fruit": "ap"}', '{"fruit": "apple"}') ->
  10. '{"fruit": "ap'
  11. """
  12. prefix = ""
  13. min_length = min(len(s1), len(s2))
  14. for i in range(0, min_length):
  15. if s1[i] == s2[i]:
  16. prefix += s1[i]
  17. else:
  18. break
  19. return prefix
  20. def find_common_suffix(s1: str, s2: str) -> str:
  21. """
  22. Finds a common suffix shared between two strings, if there is one. Order of
  23. arguments is NOT important.
  24. Stops when the suffix ends OR it hits an alphanumeric character
  25. e.g. find_common_suffix('{"fruit": "ap"}', '{"fruit": "apple"}') -> '"}'
  26. """
  27. suffix = ""
  28. min_length = min(len(s1), len(s2))
  29. for i in range(1, min_length + 1):
  30. if s1[-i] == s2[-i] and not s1[-i].isalnum():
  31. suffix = s1[-i] + suffix
  32. else:
  33. break
  34. return suffix
  35. def extract_intermediate_diff(curr: str, old: str) -> str:
  36. """
  37. Given two strings, extract the difference in the middle between two strings
  38. that are known to have a common prefix and/or suffix.
  39. This function is provided as a UTILITY for extracting information from JSON
  40. generated by partial_json_parser, to help in ensuring that the right tokens
  41. are returned in streaming, so that close-quotes, close-brackets and
  42. close-braces are not returned prematurely. The order of arguments IS
  43. important - the new version of the partially-parsed JSON must be the first
  44. argument, and the secnod argument must be from the previous generation.
  45. What it returns, is tokens that should be streamed to the client.
  46. e.g. extract_intermediate_diff('{"fruit": "apple"}', '{"fruit": "ap"}')
  47. -> 'ple'
  48. """
  49. suffix = find_common_suffix(curr, old)
  50. old = old[::-1].replace(suffix[::-1], "", 1)[::-1]
  51. prefix = find_common_prefix(curr, old)
  52. diff = curr
  53. if len(suffix):
  54. diff = diff[::-1].replace(suffix[::-1], "", 1)[::-1]
  55. if len(prefix):
  56. # replace the prefix only once in case it's mirrored
  57. diff = diff.replace(prefix, "", 1)
  58. return diff
  59. def find_all_indices(string, substring):
  60. """
  61. Find all (starting) indices of a substring in a given string. Useful for
  62. tool call extraction
  63. """
  64. indices = []
  65. index = -1
  66. while True:
  67. index = string.find(substring, index + 1)
  68. if index == -1:
  69. break
  70. indices.append(index)
  71. return indices