|
@@ -16,13 +16,20 @@
|
|
|
// along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
#include "IdentifierUtils.h"
|
|
|
+#include "Utils.h"
|
|
|
#include "standard.h"
|
|
|
|
|
|
+#include <boost/unordered_map.hpp>
|
|
|
+#include <boost/assign/list_of.hpp>
|
|
|
#include <boost/regex.hpp>
|
|
|
#include <boost/algorithm/string/regex.hpp>
|
|
|
|
|
|
namespace YouCompleteMe {
|
|
|
|
|
|
+namespace fs = boost::filesystem;
|
|
|
+
|
|
|
+namespace {
|
|
|
+
|
|
|
const char *COMMENT_AND_STRING_REGEX =
|
|
|
"//.*?$" // Anything following '//'
|
|
|
"|"
|
|
@@ -40,6 +47,69 @@ const char *COMMENT_AND_STRING_REGEX =
|
|
|
|
|
|
const char *IDENTIFIER_REGEX = "[_a-zA-Z]\\w*";
|
|
|
|
|
|
+// For details on the tag format supported, see here for details:
|
|
|
+// http://ctags.sourceforge.net/FORMAT
|
|
|
+// TL;DR: The only supported format is the one Exuberant Ctags emits.
|
|
|
+const char *TAG_REGEX =
|
|
|
+ "^([^\\t\\n\\r]+)" // The first field is the identifier
|
|
|
+ "\\t" // A TAB char is the field separator
|
|
|
+ // The second field is the path to the file that has the identifier; either
|
|
|
+ // absolute or relative to the tags file.
|
|
|
+ "([^\\t\\n\\r]+)"
|
|
|
+ "\\t.*?" // Non-greedy everything
|
|
|
+ "language:([^\\t\\n\\r]+)" // We want to capture the language of the file
|
|
|
+ ".*?$";
|
|
|
+
|
|
|
+
|
|
|
+// List of languages Exuberant Ctags supports:
|
|
|
+// ctags --list-languages
|
|
|
+// To map a language name to a filetype, see this file:
|
|
|
+// :e $VIMRUNTIME/filetype.vim
|
|
|
+const boost::unordered_map< std::string, std::string > LANG_TO_FILETYPE =
|
|
|
+ boost::assign::map_list_of
|
|
|
+ ( std::string( "Ant" ), std::string( "ant" ) )
|
|
|
+ ( std::string( "Asm" ), std::string( "asm" ) )
|
|
|
+ ( std::string( "Awk" ), std::string( "awk" ) )
|
|
|
+ ( std::string( "Basic" ), std::string( "basic" ) )
|
|
|
+ ( std::string( "C++" ), std::string( "cpp" ) )
|
|
|
+ ( std::string( "C#" ), std::string( "cs" ) )
|
|
|
+ ( std::string( "C" ), std::string( "c" ) )
|
|
|
+ ( std::string( "COBOL" ), std::string( "cobol" ) )
|
|
|
+ ( std::string( "DosBatch" ), std::string( "dosbatch" ) )
|
|
|
+ ( std::string( "Eiffel" ), std::string( "eiffel" ) )
|
|
|
+ ( std::string( "Erlang" ), std::string( "erlang" ) )
|
|
|
+ ( std::string( "Fortran" ), std::string( "fortran" ) )
|
|
|
+ ( std::string( "HTML" ), std::string( "html" ) )
|
|
|
+ ( std::string( "Java" ), std::string( "java" ) )
|
|
|
+ ( std::string( "JavaScript" ), std::string( "javascript" ) )
|
|
|
+ ( std::string( "Lisp" ), std::string( "lisp" ) )
|
|
|
+ ( std::string( "Lua" ), std::string( "lua" ) )
|
|
|
+ ( std::string( "Make" ), std::string( "make" ) )
|
|
|
+ ( std::string( "MatLab" ), std::string( "matlab" ) )
|
|
|
+ ( std::string( "OCaml" ), std::string( "ocaml" ) )
|
|
|
+ ( std::string( "Pascal" ), std::string( "pascal" ) )
|
|
|
+ ( std::string( "Perl" ), std::string( "perl" ) )
|
|
|
+ ( std::string( "PHP" ), std::string( "php" ) )
|
|
|
+ ( std::string( "Python" ), std::string( "python" ) )
|
|
|
+ ( std::string( "REXX" ), std::string( "rexx" ) )
|
|
|
+ ( std::string( "Ruby" ), std::string( "ruby" ) )
|
|
|
+ ( std::string( "Scheme" ), std::string( "scheme" ) )
|
|
|
+ ( std::string( "Sh" ), std::string( "sh" ) )
|
|
|
+ ( std::string( "SLang" ), std::string( "slang" ) )
|
|
|
+ ( std::string( "SML" ), std::string( "sml" ) )
|
|
|
+ ( std::string( "SQL" ), std::string( "sql" ) )
|
|
|
+ ( std::string( "Tcl" ), std::string( "tcl" ) )
|
|
|
+ ( std::string( "Tex" ), std::string( "tex" ) )
|
|
|
+ ( std::string( "Vera" ), std::string( "vera" ) )
|
|
|
+ ( std::string( "Verilog" ), std::string( "verilog" ) )
|
|
|
+ ( std::string( "VHDL" ), std::string( "vhdl" ) )
|
|
|
+ ( std::string( "Vim" ), std::string( "vim" ) )
|
|
|
+ ( std::string( "YACC" ), std::string( "yacc" ) );
|
|
|
+
|
|
|
+const std::string NOT_FOUND = "YCMFOOBAR_NOT_FOUND";
|
|
|
+
|
|
|
+} // unnamed namespace
|
|
|
+
|
|
|
|
|
|
std::string RemoveIdentifierFreeText( std::string text ) {
|
|
|
boost::erase_all_regex( text, boost::regex( COMMENT_AND_STRING_REGEX ) );
|
|
@@ -52,8 +122,8 @@ std::vector< std::string > ExtractIdentifiersFromText(
|
|
|
std::string::const_iterator start = text.begin();
|
|
|
std::string::const_iterator end = text.end();
|
|
|
|
|
|
- boost::match_results< std::string::const_iterator > matches;
|
|
|
- boost::regex expression( IDENTIFIER_REGEX );
|
|
|
+ boost::smatch matches;
|
|
|
+ const boost::regex expression( IDENTIFIER_REGEX );
|
|
|
|
|
|
std::vector< std::string > identifiers;
|
|
|
|
|
@@ -65,4 +135,46 @@ std::vector< std::string > ExtractIdentifiersFromText(
|
|
|
return identifiers;
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+FiletypeIdentifierMap ExtractIdentifiersFromTagsFile(
|
|
|
+ const fs::path &path_to_tag_file ) {
|
|
|
+ FiletypeIdentifierMap filetype_identifier_map;
|
|
|
+ std::string tags_file_contents;
|
|
|
+
|
|
|
+ try {
|
|
|
+ tags_file_contents = ReadUtf8File( path_to_tag_file );
|
|
|
+ } catch (...) {
|
|
|
+ return filetype_identifier_map;
|
|
|
+ }
|
|
|
+
|
|
|
+ std::string::const_iterator start = tags_file_contents.begin();
|
|
|
+ std::string::const_iterator end = tags_file_contents.end();
|
|
|
+
|
|
|
+ boost::smatch matches;
|
|
|
+ const boost::regex expression( TAG_REGEX );
|
|
|
+ const boost::match_flag_type options = boost::match_not_dot_newline;
|
|
|
+
|
|
|
+ while ( boost::regex_search( start, end, matches, expression, options ) ) {
|
|
|
+ start = matches[ 0 ].second;
|
|
|
+
|
|
|
+ std::string language( matches[ 3 ] );
|
|
|
+ std::string filetype = FindWithDefault( LANG_TO_FILETYPE,
|
|
|
+ language,
|
|
|
+ NOT_FOUND );
|
|
|
+
|
|
|
+ if ( filetype == NOT_FOUND )
|
|
|
+ continue;
|
|
|
+
|
|
|
+ std::string identifier( matches[ 1 ] );
|
|
|
+ fs::path path( matches[ 2 ] );
|
|
|
+
|
|
|
+ if ( path.is_relative() )
|
|
|
+ path = path_to_tag_file.parent_path() / path;
|
|
|
+
|
|
|
+ filetype_identifier_map[ filetype ][ path.string() ].push_back( identifier );
|
|
|
+ }
|
|
|
+
|
|
|
+ return filetype_identifier_map;
|
|
|
+}
|
|
|
+
|
|
|
} // namespace YouCompleteMe
|