Ver código fonte

Improve extraction of syntax keywords

Use the following strategy to extract identifiers from syntax
highlighting:
 - ignore match and region: they mostly contain arguments,
   syntax groups and regular expressions;
 - ignore "nextgroup=" if first word and subsequent arguments
   "skipempty", "skipwhite", and "skipnl";
 - ignore "contained" argument if first word;
 - add remaining words to the list of identifiers.
Fix a bug where the word "match" was extracted while not being a keyword
of the syntax language.
micbou 8 anos atrás
pai
commit
75d41d1137
2 arquivos alterados com 74 adições e 54 exclusões
  1. 51 40
      python/ycm/syntax_parse.py
  2. 23 14
      python/ycm/tests/syntax_parse_test.py

+ 51 - 40
python/ycm/syntax_parse.py

@@ -38,43 +38,28 @@ SYNTAX_GROUP_REGEX = re.compile(
       $""",
   re.VERBOSE )
 
-KEYWORD_REGEX = re.compile( r'^[\w,]+$' )
+KEYWORD_REGEX = re.compile( r'^(\w+),?$' )
 
 SYNTAX_ARGUMENT_REGEX = re.compile(
   r"^\w+=.*$" )
 
-SYNTAX_ARGUMENTS = set([
-  'cchar',
-  'conceal',
-  'contained',
-  'containedin',
-  'nextgroup',
-  'skipempty',
-  'skipnl',
-  'skipwhite',
-  'transparent',
-  'concealends',
-  'contains',
-  'display',
-  'extend',
-  'fold',
-  'oneline',
-  'keepend',
-  'excludenl',
-])
+SYNTAX_REGION_ARGUMENT_REGEX = re.compile(
+  r"^(?:matchgroup|start)=.*$")
 
-# We want to parse lines starting with these args
-ALLOWED_SYNTAX_ARGUMENTS = set([
-  'contained',
+# See ":h syn-nextgroup".
+SYNTAX_NEXTGROUP_ARGUMENTS = set([
+  'skipwhite',
+  'skipnl',
+  'skipempty'
 ])
 
-# These are the parent groups from which we want to extract keywords
+# These are the parent groups from which we want to extract keywords.
 ROOT_GROUPS = set([
   'Statement',
   'Boolean',
   'Include',
   'Type',
-  'Identifier',
+  'Identifier'
 ])
 
 
@@ -149,7 +134,7 @@ def _CreateInitialGroupMap():
   type_group       = SyntaxGroup( 'Type' )
   identifier_group = SyntaxGroup( 'Identifier' )
 
-  # See `:h group-name` for details on how the initial group hierarchy is built
+  # See ":h group-name" for details on how the initial group hierarchy is built.
   group_name_to_group = {
     'Statement': statement_group,
     'Type': type_group,
@@ -202,23 +187,49 @@ def _GetAllDescendentats( root_group ):
   return descendants
 
 
-def _ExtractKeywordsFromGroup( group ):
+def _ExtractKeywordsFromLine( line ):
+  if line.startswith( 'links to ' ):
+    return []
+
+  # Ignore "syntax match" lines (see ":h syn-match").
+  if line.startswith( 'match ' ):
+    return []
+
+  words = line.split()
+  if not words:
+    return []
+
+  # Ignore "syntax region" lines (see ":h syn-region"). They always start
+  # with matchgroup= or start= in the syntax list.
+  if SYNTAX_REGION_ARGUMENT_REGEX.match( words[ 0 ] ):
+    return []
+
+  # Ignore "nextgroup=" argument in first position and the arguments
+  # "skipwhite", "skipnl", and "skipempty" that immediately come after.
+  nextgroup_at_start = False
+  if words[ 0 ].startswith( 'nextgroup=' ):
+    nextgroup_at_start = True
+    words = words[ 1: ]
+
+  # Ignore "contained" argument in first position.
+  if words[ 0 ] == 'contained':
+    words = words[ 1: ]
+
   keywords = []
-  for line in group.lines:
-    if line.startswith( 'links to ' ):
+  for word in words:
+    if nextgroup_at_start and word in SYNTAX_NEXTGROUP_ARGUMENTS:
       continue
 
-    words = line.split()
-    if not words or ( words[ 0 ] in SYNTAX_ARGUMENTS and
-                      words[ 0 ] not in ALLOWED_SYNTAX_ARGUMENTS ):
-      continue
+    nextgroup_at_start = False
 
-    for word in words:
-      if ( word not in SYNTAX_ARGUMENTS and
-           not SYNTAX_ARGUMENT_REGEX.match( word ) and
-           KEYWORD_REGEX.match( word ) ):
+    keyword_matched = KEYWORD_REGEX.match( word )
+    if keyword_matched:
+      keywords.append( keyword_matched.group( 1 ) )
+  return keywords
 
-        if word.endswith( ',' ):
-          word = word[ :-1 ]
-        keywords.append( word )
+
+def _ExtractKeywordsFromGroup( group ):
+  keywords = []
+  for line in group.lines:
+    keywords.extend( _ExtractKeywordsFromLine( line ) )
   return keywords

+ 23 - 14
python/ycm/tests/syntax_parse_test.py

@@ -43,10 +43,10 @@ def KeywordsFromSyntaxListOutput_PythonSyntax_test():
     'bytearray', 'IndexError', 'all', 'help', 'vars', 'SyntaxError', 'global',
     'elif', 'unicode', 'sorted', 'memoryview', 'isinstance', 'except',
     'nonlocal', 'NameError', 'finally', 'BytesWarning', 'dict', 'IOError',
-    'pass', 'oct', 'match', 'bin', 'SystemExit', 'return', 'StandardError',
-    'format', 'TabError', 'break', 'next', 'not', 'UnicodeDecodeError',
-    'False', 'RuntimeWarning', 'list', 'iter', 'try', 'reload', 'Warning',
-    'round', 'dir', 'cmp', 'set', 'bytes', 'UnicodeTranslateError', 'intern',
+    'pass', 'oct', 'bin', 'SystemExit', 'return', 'StandardError', 'format',
+    'TabError', 'break', 'next', 'not', 'UnicodeDecodeError', 'False',
+    'RuntimeWarning', 'list', 'iter', 'try', 'reload', 'Warning', 'round',
+    'dir', 'cmp', 'set', 'bytes', 'UnicodeTranslateError', 'intern',
     'issubclass', 'yield', 'Ellipsis', 'hash', 'locals', 'BufferError',
     'slice', 'for', 'FloatingPointError', 'sum', 'VMSError', 'getattr', 'abs',
     'print', 'import', 'True', 'FutureWarning', 'ImportWarning', 'None',
@@ -77,8 +77,8 @@ def KeywordsFromSyntaxListOutput_PythonSyntax_test():
 def KeywordsFromSyntaxListOutput_CppSyntax_test():
   expected_keywords = (
     'int_fast32_t', 'FILE', 'size_t', 'bitor', 'typedef', 'const', 'struct',
-    'uint8_t', 'fpos_t', 'thread_local', 'unsigned', 'uint_least16_t', 'match',
-    'do', 'intptr_t', 'uint_least64_t', 'return', 'auto', 'void', '_Complex',
+    'uint8_t', 'fpos_t', 'thread_local', 'unsigned', 'uint_least16_t', 'do',
+    'intptr_t', 'uint_least64_t', 'return', 'auto', 'void', '_Complex',
     'break', '_Alignof', 'not', 'using', '_Static_assert', '_Thread_local',
     'public', 'uint_fast16_t', 'this', 'continue', 'char32_t', 'int16_t',
     'intmax_t', 'static', 'clock_t', 'sizeof', 'int_fast64_t', 'mbstate_t',
@@ -108,7 +108,7 @@ def KeywordsFromSyntaxListOutput_JavaSyntax_test():
   expected_keywords = (
     'code', 'text', 'cols', 'datetime', 'disabled', 'shape', 'codetype', 'alt',
     'compact', 'style', 'valuetype', 'short', 'finally', 'continue', 'extends',
-    'valign', 'match', 'bordercolor', 'do', 'return', 'rel', 'rules', 'void',
+    'valign', 'bordercolor', 'do', 'return', 'rel', 'rules', 'void',
     'nohref', 'abbr', 'background', 'scrolling', 'instanceof', 'name',
     'summary', 'try', 'default', 'noshade', 'coords', 'dir', 'frame', 'usemap',
     'ismap', 'static', 'hspace', 'vlink', 'for', 'selected', 'rev', 'vspace',
@@ -273,25 +273,25 @@ def ExtractKeywordsFromGroup_KeywordStarts_test():
   assert_that( syntax_parse._ExtractKeywordsFromGroup(
                  syntax_parse.SyntaxGroup( '', [
                    'foo bar',
-                   'transparent boo baa',
+                   'contained boo baa',
                    'zoo goo',
                  ] ) ),
-               contains_inanyorder( 'foo', 'bar', 'zoo', 'goo' ) )
+               contains_inanyorder( 'foo', 'bar', 'boo', 'baa', 'zoo', 'goo' ) )
 
 
 def ExtractKeywordsFromGroup_KeywordMiddle_test():
   assert_that( syntax_parse._ExtractKeywordsFromGroup(
                  syntax_parse.SyntaxGroup( '', [
-                   'foo oneline bar',
+                   'foo contained bar',
                    'zoo goo'
                  ] ) ),
-               contains_inanyorder( 'foo', 'bar', 'zoo', 'goo' ) )
+               contains_inanyorder( 'foo', 'contained', 'bar', 'zoo', 'goo' ) )
 
 
 def ExtractKeywordsFromGroup_KeywordAssign_test():
   assert_that( syntax_parse._ExtractKeywordsFromGroup(
                  syntax_parse.SyntaxGroup( '', [
-                   'foo end=zoo((^^//)) bar',
+                   'nextgroup=zoo skipwhite foo bar',
                    'zoo goo',
                  ] ) ),
                contains_inanyorder( 'foo', 'bar', 'zoo', 'goo' ) )
@@ -300,10 +300,19 @@ def ExtractKeywordsFromGroup_KeywordAssign_test():
 def ExtractKeywordsFromGroup_KeywordAssignAndMiddle_test():
   assert_that( syntax_parse._ExtractKeywordsFromGroup(
                  syntax_parse.SyntaxGroup( '', [
-                   'foo end=zoo((^^//)) transparent bar',
+                   'nextgroup=zoo foo skipnl bar',
                    'zoo goo',
                  ] ) ),
-               contains_inanyorder( 'foo', 'bar', 'zoo', 'goo' ) )
+               contains_inanyorder( 'foo', 'skipnl', 'bar', 'zoo', 'goo' ) )
+
+
+def ExtractKeywordsFromGroup_KeywordWithoutNextgroup_test():
+  assert_that( syntax_parse._ExtractKeywordsFromGroup(
+                 syntax_parse.SyntaxGroup( '', [
+                   'skipempty foo bar',
+                   'zoo goo',
+                 ] ) ),
+               contains_inanyorder( 'skipempty', 'foo', 'bar', 'zoo', 'goo' ) )
 
 
 def ExtractKeywordsFromGroup_ContainedSyntaxArgAllowed_test():