Result.cpp 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. // Copyright (C) 2011, 2012 Google Inc.
  2. //
  3. // This file is part of YouCompleteMe.
  4. //
  5. // YouCompleteMe is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // YouCompleteMe is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU General Public License
  16. // along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
  17. #include "Result.h"
  18. #include "standard.h"
  19. #include "Utils.h"
  20. #include <boost/algorithm/string.hpp>
  21. #include <boost/function.hpp>
  22. #include <algorithm>
  23. #include <locale>
  24. using boost::algorithm::istarts_with;
  25. namespace YouCompleteMe {
  26. namespace {
  27. char ChangeCharCase( char c ) {
  28. if ( std::isupper( c, std::locale() ) )
  29. return std::tolower( c, std::locale() );
  30. return std::toupper( c, std::locale() );
  31. }
  32. bool CharLessThanWithLowercasePriority(const char &first,
  33. const char &second) {
  34. char swap_first = ChangeCharCase( first );
  35. char swap_second = ChangeCharCase( second );
  36. return swap_first < swap_second;
  37. }
  38. bool StringLessThanWithLowercasePriority(const std::string &first,
  39. const std::string &second) {
  40. return std::lexicographical_compare(
  41. first.begin(), first.end(),
  42. second.begin(), second.end(),
  43. boost::function< bool( const char&, const char& ) >(
  44. &CharLessThanWithLowercasePriority ) );
  45. }
  46. int LongestCommonSubsequenceLength( const std::string &first,
  47. const std::string &second ) {
  48. const std::string &longer = first.size() > second.size() ? first : second;
  49. const std::string &shorter = first.size() > second.size() ? second : first;
  50. int longer_len = longer.size();
  51. int shorter_len = shorter.size();
  52. std::vector<int> previous( shorter_len + 1, 0 );
  53. std::vector<int> current( shorter_len + 1, 0 );
  54. for ( int i = 0; i < longer_len; ++i ) {
  55. for ( int j = 0; j < shorter_len; ++j ) {
  56. if ( toupper( longer[ i ] ) == toupper( shorter[ j ] ) )
  57. current[ j + 1 ] = previous[ j ] + 1;
  58. else
  59. current[ j + 1 ] = std::max( current[ j ], previous[ j + 1 ] );
  60. }
  61. for ( int j = 0; j < shorter_len; ++j ) {
  62. previous[ j + 1 ] = current[ j + 1 ];
  63. }
  64. }
  65. return current[ shorter_len ];
  66. }
  67. int NumWordBoundaryCharMatches( const std::string &query,
  68. const std::string &word_boundary_chars ) {
  69. return LongestCommonSubsequenceLength( query, word_boundary_chars );
  70. }
  71. } // unnamed namespace
  72. Result::Result()
  73. :
  74. query_is_empty_( true ),
  75. is_subsequence_( false ),
  76. first_char_same_in_query_and_text_( false ),
  77. ratio_of_word_boundary_chars_in_query_( 0 ),
  78. word_boundary_char_utilization_( 0 ),
  79. query_is_candidate_prefix_( false ),
  80. text_is_lowercase_( false ),
  81. char_match_index_sum_( 0 ),
  82. text_( NULL ) {
  83. }
  84. Result::Result( bool is_subsequence )
  85. :
  86. query_is_empty_( true ),
  87. is_subsequence_( is_subsequence ),
  88. first_char_same_in_query_and_text_( false ),
  89. ratio_of_word_boundary_chars_in_query_( 0 ),
  90. word_boundary_char_utilization_( 0 ),
  91. query_is_candidate_prefix_( false ),
  92. text_is_lowercase_( false ),
  93. char_match_index_sum_( 0 ),
  94. text_( NULL ) {
  95. }
  96. Result::Result( bool is_subsequence,
  97. const std::string *text,
  98. bool text_is_lowercase,
  99. int char_match_index_sum,
  100. const std::string &word_boundary_chars,
  101. const std::string &query )
  102. :
  103. query_is_empty_( true ),
  104. is_subsequence_( is_subsequence ),
  105. first_char_same_in_query_and_text_( false ),
  106. ratio_of_word_boundary_chars_in_query_( 0 ),
  107. word_boundary_char_utilization_( 0 ),
  108. query_is_candidate_prefix_( false ),
  109. text_is_lowercase_( text_is_lowercase ),
  110. char_match_index_sum_( char_match_index_sum ),
  111. text_( text ) {
  112. if ( is_subsequence )
  113. SetResultFeaturesFromQuery( word_boundary_chars, query );
  114. }
  115. bool Result::operator< ( const Result &other ) const {
  116. // Yes, this is ugly but it also needs to be fast. Since this is called a
  117. // bazillion times, we have to make sure only the required comparisons are
  118. // made, and no more.
  119. if ( !query_is_empty_ ) {
  120. if ( first_char_same_in_query_and_text_ !=
  121. other.first_char_same_in_query_and_text_ ) {
  122. return first_char_same_in_query_and_text_;
  123. }
  124. bool equal_wb_ratios = AlmostEqual(
  125. ratio_of_word_boundary_chars_in_query_,
  126. other.ratio_of_word_boundary_chars_in_query_ );
  127. bool equal_wb_utilization = AlmostEqual(
  128. word_boundary_char_utilization_,
  129. other.word_boundary_char_utilization_ );
  130. if ( AlmostEqual( ratio_of_word_boundary_chars_in_query_, 1.0 ) ||
  131. AlmostEqual( other.ratio_of_word_boundary_chars_in_query_, 1.0 ) ) {
  132. if ( !equal_wb_ratios ) {
  133. return ratio_of_word_boundary_chars_in_query_ >
  134. other.ratio_of_word_boundary_chars_in_query_;
  135. }
  136. else {
  137. if ( !equal_wb_utilization )
  138. return word_boundary_char_utilization_ >
  139. other.word_boundary_char_utilization_;
  140. }
  141. }
  142. if ( query_is_candidate_prefix_ != other.query_is_candidate_prefix_ )
  143. return query_is_candidate_prefix_;
  144. if ( !equal_wb_ratios ) {
  145. return ratio_of_word_boundary_chars_in_query_ >
  146. other.ratio_of_word_boundary_chars_in_query_;
  147. }
  148. else {
  149. if ( !equal_wb_utilization )
  150. return word_boundary_char_utilization_ >
  151. other.word_boundary_char_utilization_;
  152. }
  153. if ( char_match_index_sum_ != other.char_match_index_sum_ )
  154. return char_match_index_sum_ < other.char_match_index_sum_;
  155. if ( text_->length() != other.text_->length() )
  156. return text_->length() < other.text_->length();
  157. if ( text_is_lowercase_ != other.text_is_lowercase_ )
  158. return text_is_lowercase_;
  159. }
  160. // Lexicographic comparison, but we prioritize lowercase letters over
  161. // uppercase ones. So "foo" < "Foo".
  162. return StringLessThanWithLowercasePriority( *text_, *other.text_ );
  163. }
  164. void Result::SetResultFeaturesFromQuery(
  165. const std::string &word_boundary_chars,
  166. const std::string &query ) {
  167. query_is_empty_ = query.empty();
  168. if ( query.empty() || text_->empty() )
  169. return;
  170. first_char_same_in_query_and_text_ =
  171. toupper( query[ 0 ] ) == toupper( ( *text_ )[ 0 ] );
  172. int num_wb_matches = NumWordBoundaryCharMatches( query,
  173. word_boundary_chars );
  174. ratio_of_word_boundary_chars_in_query_ =
  175. num_wb_matches / static_cast< double >( query.length() );
  176. word_boundary_char_utilization_ =
  177. num_wb_matches / static_cast< double >( word_boundary_chars.length() );
  178. query_is_candidate_prefix_ = istarts_with( *text_, query );
  179. }
  180. } // namespace YouCompleteMe