syntax_parse.py 5.7 KB


  1. # Copyright (C) 2013 Google Inc.
  2. #
  3. # This file is part of YouCompleteMe.
  4. #
  5. # YouCompleteMe is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # YouCompleteMe is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
  17. import re
  18. import vim
  19. from ycm import vimsupport
  20. SYNTAX_GROUP_REGEX = re.compile(
  21. r"""^
  22. (?P<group_name>\w+)
  23. \s+
  24. xxx
  25. \s+
  26. (?P<content>.+?)
  27. $""",
  28. re.VERBOSE )
  29. KEYWORD_REGEX = re.compile( r'^[\w,]+$' )
  30. SYNTAX_ARGUMENT_REGEX = re.compile(
  31. r"^\w+=.*$" )
  32. SYNTAX_ARGUMENTS = set([
  33. 'cchar',
  34. 'conceal',
  35. 'contained',
  36. 'containedin',
  37. 'nextgroup',
  38. 'skipempty',
  39. 'skipnl',
  40. 'skipwhite',
  41. 'transparent',
  42. 'concealends',
  43. 'contains',
  44. 'display',
  45. 'extend',
  46. 'fold',
  47. 'oneline',
  48. 'keepend',
  49. 'excludenl',
  50. ])
  51. # We want to parse lines starting with these args
  52. ALLOWED_SYNTAX_ARGUMENTS = set([
  53. 'contained',
  54. ])
  55. # These are the parent groups from which we want to extract keywords
  56. ROOT_GROUPS = set([
  57. 'Statement',
  58. 'Boolean',
  59. 'Include',
  60. 'Type',
  61. 'Identifier',
  62. ])
  63. class SyntaxGroup( object ):
  64. def __init__( self, name, lines = None ):
  65. self.name = name
  66. self.lines = lines if lines else []
  67. self.children = []
  68. def SyntaxKeywordsForCurrentBuffer():
  69. vim.command( 'redir => b:ycm_syntax' )
  70. vim.command( 'silent! syntax list' )
  71. vim.command( 'redir END' )
  72. syntax_output = vimsupport.GetVariableValue( 'b:ycm_syntax' )
  73. return _KeywordsFromSyntaxListOutput( syntax_output )
  74. def _KeywordsFromSyntaxListOutput( syntax_output ):
  75. group_name_to_group = _SyntaxGroupsFromOutput( syntax_output )
  76. _ConnectGroupChildren( group_name_to_group )
  77. groups_with_keywords = []
  78. for root_group in ROOT_GROUPS:
  79. groups_with_keywords.extend(
  80. _GetAllDescendentats( group_name_to_group[ root_group ] ) )
  81. keywords = []
  82. for group in groups_with_keywords:
  83. keywords.extend( _ExtractKeywordsFromGroup( group ) )
  84. return set( keywords )
  85. def _SyntaxGroupsFromOutput( syntax_output ):
  86. group_name_to_group = _CreateInitialGroupMap()
  87. lines = syntax_output.split( '\n' )
  88. looking_for_group = True
  89. current_group = None
  90. for line in lines:
  91. if not line:
  92. continue
  93. match = SYNTAX_GROUP_REGEX.search( line )
  94. if match:
  95. if looking_for_group:
  96. looking_for_group = False
  97. else:
  98. group_name_to_group[ current_group.name ] = current_group
  99. current_group = SyntaxGroup( match.group( 'group_name' ),
  100. [ match.group( 'content').strip() ] )
  101. else:
  102. if looking_for_group:
  103. continue
  104. if line[ 0 ] == ' ' or line[ 0 ] == '\t':
  105. current_group.lines.append( line.strip() )
  106. if current_group:
  107. group_name_to_group[ current_group.name ] = current_group
  108. return group_name_to_group
  109. def _CreateInitialGroupMap():
  110. def AddToGroupMap( name, parent ):
  111. new_group = SyntaxGroup( name )
  112. group_name_to_group[ name ] = new_group
  113. parent.children.append( new_group )
  114. statement_group = SyntaxGroup( 'Statement' )
  115. type_group = SyntaxGroup( 'Type' )
  116. identifier_group = SyntaxGroup( 'Identifier' )
  117. # See `:h group-name` for details on how the initial group hierarchy is built
  118. group_name_to_group = {
  119. 'Statement': statement_group,
  120. 'Type': type_group,
  121. 'Boolean': SyntaxGroup( 'Boolean' ),
  122. 'Include': SyntaxGroup( 'Include' ),
  123. 'Identifier': identifier_group,
  124. }
  125. AddToGroupMap( 'Conditional', statement_group )
  126. AddToGroupMap( 'Repeat' , statement_group )
  127. AddToGroupMap( 'Label' , statement_group )
  128. AddToGroupMap( 'Operator' , statement_group )
  129. AddToGroupMap( 'Keyword' , statement_group )
  130. AddToGroupMap( 'Exception' , statement_group )
  131. AddToGroupMap( 'StorageClass', type_group )
  132. AddToGroupMap( 'Structure' , type_group )
  133. AddToGroupMap( 'Typedef' , type_group )
  134. AddToGroupMap( 'Function', identifier_group )
  135. return group_name_to_group
  136. def _ConnectGroupChildren( group_name_to_group ):
  137. def GetParentNames( group ):
  138. links_to = 'links to '
  139. parent_names = []
  140. for line in group.lines:
  141. if line.startswith( links_to ):
  142. parent_names.append( line[ len( links_to ): ] )
  143. return parent_names
  144. for group in group_name_to_group.itervalues():
  145. parent_names = GetParentNames( group )
  146. for parent_name in parent_names:
  147. try:
  148. parent_group = group_name_to_group[ parent_name ]
  149. except KeyError:
  150. continue
  151. parent_group.children.append( group )
  152. def _GetAllDescendentats( root_group ):
  153. descendants = []
  154. for child in root_group.children:
  155. descendants.append( child )
  156. descendants.extend( _GetAllDescendentats( child ) )
  157. return descendants
  158. def _ExtractKeywordsFromGroup( group ):
  159. keywords = []
  160. for line in group.lines:
  161. if line.startswith( 'links to ' ):
  162. continue
  163. words = line.split()
  164. if not words or ( words[ 0 ] in SYNTAX_ARGUMENTS and
  165. words[ 0 ] not in ALLOWED_SYNTAX_ARGUMENTS ):
  166. continue
  167. for word in words:
  168. if ( word not in SYNTAX_ARGUMENTS and
  169. not SYNTAX_ARGUMENT_REGEX.match( word ) and
  170. KEYWORD_REGEX.match( word ) ):
  171. if word.endswith( ',' ):
  172. word = word[ :-1 ]
  173. keywords.append( word )
  174. return keywords