1
0
Эх сурвалжийг харах

Use codepoint offsets in identifier functions

CurrentIdentifierFinished and LastEnteredCharIsIdentifierChar incorrectly use
byte offsets with unicode lines. Convert those offsets to codepoint offsets.
micbou 7 жил өмнө
parent
commit
eb3c0cd8c1

+ 2 - 4
python/ycm/base.py

@@ -65,11 +65,10 @@ def CompletionStartColumn():
 
 
 def CurrentIdentifierFinished():
-  current_column = vimsupport.CurrentColumn()
+  line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
   previous_char_index = current_column - 1
   if previous_char_index < 0:
     return True
-  line = vimsupport.CurrentLineContents()
   filetype = vimsupport.CurrentFiletypes()[ 0 ]
   regex = identifier_utils.IdentifierRegexForFiletype( filetype )
 
@@ -82,10 +81,9 @@ def CurrentIdentifierFinished():
 
 
 def LastEnteredCharIsIdentifierChar():
-  current_column = vimsupport.CurrentColumn()
+  line, current_column = vimsupport.CurrentLineContentsAndCodepointColumn()
   if current_column - 1 < 0:
     return False
-  line = vimsupport.CurrentLineContents()
   filetype = vimsupport.CurrentFiletypes()[ 0 ]
   return (
     identifier_utils.StartOfLongestIdentifierEndingAtIndex(

+ 36 - 1
python/ycm/tests/base_test.py

@@ -211,6 +211,22 @@ def LastEnteredCharIsIdentifierChar_NotIdentChar_test():
       ok_( not base.LastEnteredCharIsIdentifierChar() )
 
 
+def LastEnteredCharIsIdentifierChar_Unicode_test():
+  with MockCurrentFiletypes():
+    # CurrentColumn returns a byte offset and character ø is 2 bytes length.
+    with MockCurrentColumnAndLineContents( 5, 'føo(' ):
+      ok_( not base.LastEnteredCharIsIdentifierChar() )
+
+    with MockCurrentColumnAndLineContents( 4, 'føo(' ):
+      ok_( base.LastEnteredCharIsIdentifierChar() )
+
+    with MockCurrentColumnAndLineContents( 3, 'føo(' ):
+      ok_( base.LastEnteredCharIsIdentifierChar() )
+
+    with MockCurrentColumnAndLineContents( 1, 'føo(' ):
+      ok_( base.LastEnteredCharIsIdentifierChar() )
+
+
 def CurrentIdentifierFinished_Basic_test():
   with MockCurrentFiletypes():
     with MockCurrentColumnAndLineContents( 3, 'ab;' ):
@@ -234,11 +250,14 @@ def CurrentIdentifierFinished_NothingBeforeColumn_test():
 def CurrentIdentifierFinished_InvalidColumn_test():
   with MockCurrentFiletypes():
     with MockCurrentColumnAndLineContents( 5, '' ):
-      ok_( not base.CurrentIdentifierFinished() )
+      ok_( base.CurrentIdentifierFinished() )
 
     with MockCurrentColumnAndLineContents( 5, 'abc' ):
       ok_( not base.CurrentIdentifierFinished() )
 
+    with MockCurrentColumnAndLineContents( 4, 'ab;' ):
+      ok_( base.CurrentIdentifierFinished() )
+
 
 def CurrentIdentifierFinished_InMiddleOfLine_test():
   with MockCurrentFiletypes():
@@ -268,3 +287,19 @@ def CurrentIdentifierFinished_WhitespaceOnly_test():
 
     with MockCurrentColumnAndLineContents( 3, '\t\t\t\t' ):
       ok_( base.CurrentIdentifierFinished() )
+
+
+def CurrentIdentifierFinished_Unicode_test():
+  with MockCurrentFiletypes():
+    # CurrentColumn returns a byte offset and character ø is 2 bytes length.
+    with MockCurrentColumnAndLineContents( 6, 'føo ' ):
+      ok_( base.CurrentIdentifierFinished() )
+
+    with MockCurrentColumnAndLineContents( 5, 'føo ' ):
+      ok_( base.CurrentIdentifierFinished() )
+
+    with MockCurrentColumnAndLineContents( 4, 'føo ' ):
+      ok_( not base.CurrentIdentifierFinished() )
+
+    with MockCurrentColumnAndLineContents( 3, 'føo ' ):
+      ok_( not base.CurrentIdentifierFinished() )

+ 13 - 2
python/ycm/vimsupport.py

@@ -29,8 +29,8 @@ import os
 import json
 import re
 from collections import defaultdict
-from ycmd.utils import ( GetCurrentDirectory, JoinLinesAsUnicode, ToBytes,
-                         ToUnicode )
+from ycmd.utils import ( ByteOffsetToCodepointOffset, GetCurrentDirectory,
+                         JoinLinesAsUnicode, ToBytes, ToUnicode )
 from ycmd import user_options_store
 
 BUFFER_COMMAND_MAP = { 'same-buffer'      : 'edit',
@@ -73,6 +73,17 @@ def CurrentLineContents():
   return ToUnicode( vim.current.line )
 
 
+def CurrentLineContentsAndCodepointColumn():
+  """Returns the line contents as a unicode string and the 0-based current
+  column as a codepoint offset. If the current column is outside the line,
+  returns the column position at the end of the line."""
+  line = CurrentLineContents()
+  byte_column = CurrentColumn()
+  # ByteOffsetToCodepointOffset expects 1-based offset.
+  column = ByteOffsetToCodepointOffset( line, byte_column + 1 ) - 1
+  return line, column
+
+
 def TextAfterCursor():
   """Returns the text after CurrentColumn."""
   return ToUnicode( vim.current.line[ CurrentColumn(): ] )