123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- // Copyright (C) 2011, 2012 Google Inc.
- //
- // This file is part of YouCompleteMe.
- //
- // YouCompleteMe is free software: you can redistribute it and/or modify
- // it under the terms of the GNU General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // YouCompleteMe is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // You should have received a copy of the GNU General Public License
- // along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
- #include "IdentifierUtils.h"
- #include "Utils.h"
- #include "standard.h"
- #include <boost/unordered_map.hpp>
- #include <boost/assign/list_of.hpp>
- #include <boost/regex.hpp>
- #include <boost/algorithm/string/regex.hpp>
- namespace YouCompleteMe {
- namespace fs = boost::filesystem;
- namespace {
- const char *const COMMENT_AND_STRING_REGEX =
- "//.*?$" // Anything following '//'
- "|"
- "#.*?$" // Anything following '#'
- "|"
- "/\\*.*?\\*/" // C-style comments, '/* ... */'
- "|"
- // Anything inside single quotes, '...', but mind:
- // 1. that the starting single quote is not escaped
- // 2. the escaped slash (\\)
- // 3. the escaped single quote inside the string
- // "(?<!\\\\)'(?:\\\\\\\\|\\\\'|.)*?'"
- "(?<!\\\\)'(?:\\\\\\\\|\\\\'|.)*?'"
- "|"
- // Anything inside double quotes, "...", but mind:
- // 1. that the starting double quote is not escaped
- // 2. the escaped slash (\\)
- // 3. the escaped double quote inside the string
- "(?<!\\\\)\"(?:\\\\\\\\|\\\\\"|.)*?\"";
- const char *const IDENTIFIER_REGEX = "[_a-zA-Z]\\w*";
- // For details on the tag format supported, see here for details:
- // http://ctags.sourceforge.net/FORMAT
- // TL;DR: The only supported format is the one Exuberant Ctags emits.
- const char *const TAG_REGEX =
- "^([^\\t\\n\\r]+)" // The first field is the identifier
- "\\t" // A TAB char is the field separator
- // The second field is the path to the file that has the identifier; either
- // absolute or relative to the tags file.
- "([^\\t\\n\\r]+)"
- "\\t.*?" // Non-greedy everything
- "language:([^\\t\\n\\r]+)" // We want to capture the language of the file
- ".*?$";
- // Only used as the equality comparer for the below unordered_map which stores
- // const char* pointers and not std::string but needs to hash based on string
- // values and not pointer values.
- // When passed a const char* this will create a temporary std::string for
- // comparison, but it's fast enough for our use case.
- struct StringEqualityComparer :
- std::binary_function< std::string, std::string, bool > {
- bool operator()( const std::string &a, const std::string &b ) const {
- return a == b;
- }
- };
- // List of languages Exuberant Ctags supports:
- // ctags --list-languages
- // To map a language name to a filetype, see this file:
- // :e $VIMRUNTIME/filetype.vim
- // This is a map of const char* and not std::string to prevent issues with
- // static initialization.
- const boost::unordered_map < const char *,
- const char *,
- boost::hash< std::string >,
- StringEqualityComparer > LANG_TO_FILETYPE =
- boost::assign::map_list_of
- ( "Ant" , "ant" )
- ( "Asm" , "asm" )
- ( "Awk" , "awk" )
- ( "Basic" , "basic" )
- ( "C++" , "cpp" )
- ( "C#" , "cs" )
- ( "C" , "c" )
- ( "COBOL" , "cobol" )
- ( "DosBatch" , "dosbatch" )
- ( "Eiffel" , "eiffel" )
- ( "Erlang" , "erlang" )
- ( "Fortran" , "fortran" )
- ( "HTML" , "html" )
- ( "Java" , "java" )
- ( "JavaScript" , "javascript" )
- ( "Lisp" , "lisp" )
- ( "Lua" , "lua" )
- ( "Make" , "make" )
- ( "MatLab" , "matlab" )
- ( "OCaml" , "ocaml" )
- ( "Pascal" , "pascal" )
- ( "Perl" , "perl" )
- ( "PHP" , "php" )
- ( "Python" , "python" )
- ( "REXX" , "rexx" )
- ( "Ruby" , "ruby" )
- ( "Scheme" , "scheme" )
- ( "Sh" , "sh" )
- ( "SLang" , "slang" )
- ( "SML" , "sml" )
- ( "SQL" , "sql" )
- ( "Tcl" , "tcl" )
- ( "Tex" , "tex" )
- ( "Vera" , "vera" )
- ( "Verilog" , "verilog" )
- ( "VHDL" , "vhdl" )
- ( "Vim" , "vim" )
- ( "YACC" , "yacc" );
- const char *const NOT_FOUND = "YCMFOOBAR_NOT_FOUND";
- } // unnamed namespace
- std::string RemoveIdentifierFreeText( std::string text ) {
- boost::erase_all_regex( text, boost::regex( COMMENT_AND_STRING_REGEX ) );
- return text;
- }
- std::vector< std::string > ExtractIdentifiersFromText(
- const std::string &text ) {
- std::string::const_iterator start = text.begin();
- std::string::const_iterator end = text.end();
- boost::smatch matches;
- const boost::regex expression( IDENTIFIER_REGEX );
- std::vector< std::string > identifiers;
- while ( boost::regex_search( start, end, matches, expression ) ) {
- identifiers.push_back( matches[ 0 ] );
- start = matches[ 0 ].second;
- }
- return identifiers;
- }
- FiletypeIdentifierMap ExtractIdentifiersFromTagsFile(
- const fs::path &path_to_tag_file ) {
- FiletypeIdentifierMap filetype_identifier_map;
- std::string tags_file_contents;
- try {
- tags_file_contents = ReadUtf8File( path_to_tag_file );
- } catch ( ... ) {
- return filetype_identifier_map;
- }
- std::string::const_iterator start = tags_file_contents.begin();
- std::string::const_iterator end = tags_file_contents.end();
- boost::smatch matches;
- const boost::regex expression( TAG_REGEX );
- const boost::match_flag_type options = boost::match_not_dot_newline;
- while ( boost::regex_search( start, end, matches, expression, options ) ) {
- start = matches[ 0 ].second;
- std::string language( matches[ 3 ] );
- std::string filetype = FindWithDefault( LANG_TO_FILETYPE,
- language.c_str(),
- NOT_FOUND );
- if ( filetype == NOT_FOUND )
- continue;
- std::string identifier( matches[ 1 ] );
- fs::path path( matches[ 2 ].str() );
- if ( path.is_relative() )
- path = path_to_tag_file.parent_path() / path;
- filetype_identifier_map[ filetype ][ path.string() ].push_back( identifier );
- }
- return filetype_identifier_map;
- }
- } // namespace YouCompleteMe
|