match.c (6023B)
1 #include <ctype.h> 2 #include <string.h> 3 #include <strings.h> 4 #include <stdio.h> 5 #include <float.h> 6 #include <math.h> 7 #include <stdlib.h> 8 9 #include "match.h" 10 #include "bonus.h" 11 12 #include "config.h" 13 14 char *strcasechr(const char *s, char c) { 15 const char accept[3] = {c, toupper(c), 0}; 16 return strpbrk(s, accept); 17 } 18 19 int has_match(const char *needle, const char *haystack) { 20 while (*needle) { 21 char nch = *needle++; 22 23 if (!(haystack = strcasechr(haystack, nch))) { 24 return 0; 25 } 26 haystack++; 27 } 28 return 1; 29 } 30 31 #define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0) 32 33 #define max(a, b) (((a) > (b)) ? (a) : (b)) 34 35 struct match_struct { 36 int needle_len; 37 int haystack_len; 38 39 char lower_needle[MATCH_MAX_LEN]; 40 char lower_haystack[MATCH_MAX_LEN]; 41 42 score_t match_bonus[MATCH_MAX_LEN]; 43 }; 44 45 static void precompute_bonus(const char *haystack, score_t *match_bonus) { 46 /* Which positions are beginning of words */ 47 char last_ch = '/'; 48 for (int i = 0; haystack[i]; i++) { 49 char ch = haystack[i]; 50 match_bonus[i] = COMPUTE_BONUS(last_ch, ch); 51 last_ch = ch; 52 } 53 } 54 55 static void setup_match_struct(struct match_struct *match, const char *needle, const char *haystack) { 56 match->needle_len = strlen(needle); 57 match->haystack_len = strlen(haystack); 58 59 if (match->haystack_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) { 60 return; 61 } 62 63 for (int i = 0; i < match->needle_len; i++) 64 match->lower_needle[i] = tolower(needle[i]); 65 66 for (int i = 0; i < match->haystack_len; i++) 67 match->lower_haystack[i] = tolower(haystack[i]); 68 69 precompute_bonus(haystack, match->match_bonus); 70 } 71 72 static inline void match_row(const struct match_struct *match, int row, score_t *curr_D, score_t *curr_M, const score_t *last_D, const score_t *last_M) { 73 int n = match->needle_len; 74 int m = match->haystack_len; 75 int i = row; 76 77 const char *lower_needle = match->lower_needle; 78 const char *lower_haystack = match->lower_haystack; 79 const score_t *match_bonus = match->match_bonus; 80 81 score_t prev_score = SCORE_MIN; 82 score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; 83 84 for (int j = 0; j < m; j++) { 85 if (lower_needle[i] == lower_haystack[j]) { 86 score_t score = SCORE_MIN; 87 if (!i) { 88 score = (j * SCORE_GAP_LEADING) + match_bonus[j]; 89 } else if (j) { /* i > 0 && j > 0*/ 90 score = max( 91 last_M[j - 1] + match_bonus[j], 92 93 /* consecutive match, doesn't stack with match_bonus */ 94 last_D[j - 1] + SCORE_MATCH_CONSECUTIVE); 95 } 96 curr_D[j] = score; 97 curr_M[j] = prev_score = max(score, prev_score + gap_score); 98 } else { 99 curr_D[j] = SCORE_MIN; 100 curr_M[j] = prev_score = prev_score + gap_score; 101 } 102 } 103 } 104 105 score_t match(const char *needle, const char *haystack) { 106 if (!*needle) 107 return SCORE_MIN; 108 109 struct match_struct match; 110 setup_match_struct(&match, needle, haystack); 111 112 int n = match.needle_len; 113 int m = match.haystack_len; 114 115 if (m > MATCH_MAX_LEN || n > m) { 116 /* 117 * Unreasonably large candidate: return no score 118 * If it is a valid match it will still be returned, it will 119 * just be ranked below any reasonably sized candidates 120 */ 121 return SCORE_MIN; 122 } else if (n == m) { 123 /* Since this method can only be called with a haystack which 124 * matches needle. If the lengths of the strings are equal the 125 * strings themselves must also be equal (ignoring case). 126 */ 127 return SCORE_MAX; 128 } 129 130 /* 131 * D[][] Stores the best score for this position ending with a match. 132 * M[][] Stores the best possible score at this position. 133 */ 134 score_t D[2][MATCH_MAX_LEN], M[2][MATCH_MAX_LEN]; 135 136 score_t *last_D, *last_M; 137 score_t *curr_D, *curr_M; 138 139 last_D = D[0]; 140 last_M = M[0]; 141 curr_D = D[1]; 142 curr_M = M[1]; 143 144 for (int i = 0; i < n; i++) { 145 match_row(&match, i, curr_D, curr_M, last_D, last_M); 146 147 SWAP(curr_D, last_D, score_t *); 148 SWAP(curr_M, last_M, score_t *); 149 } 150 151 return last_M[m - 1]; 152 } 153 154 score_t match_positions(const char *needle, const char *haystack, size_t *positions) { 155 if (!*needle) 156 return SCORE_MIN; 157 158 struct match_struct match; 159 setup_match_struct(&match, needle, haystack); 160 161 int n = match.needle_len; 162 int m = match.haystack_len; 163 164 if (m > MATCH_MAX_LEN || n > m) { 165 /* 166 * Unreasonably large candidate: return no score 167 * If it is a valid match it will still be returned, it will 168 * just be ranked below any reasonably sized candidates 169 */ 170 return SCORE_MIN; 171 } else if (n == m) { 172 /* Since this method can only be called with a haystack which 173 * matches needle. If the lengths of the strings are equal the 174 * strings themselves must also be equal (ignoring case). 175 */ 176 if (positions) 177 for (int i = 0; i < n; i++) 178 positions[i] = i; 179 return SCORE_MAX; 180 } 181 182 /* 183 * D[][] Stores the best score for this position ending with a match. 184 * M[][] Stores the best possible score at this position. 185 */ 186 score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN]; 187 M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); 188 D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); 189 190 score_t *last_D, *last_M; 191 score_t *curr_D, *curr_M; 192 193 for (int i = 0; i < n; i++) { 194 curr_D = &D[i][0]; 195 curr_M = &M[i][0]; 196 197 match_row(&match, i, curr_D, curr_M, last_D, last_M); 198 199 last_D = curr_D; 200 last_M = curr_M; 201 } 202 203 /* backtrace to find the positions of optimal matching */ 204 if (positions) { 205 int match_required = 0; 206 for (int i = n - 1, j = m - 1; i >= 0; i--) { 207 for (; j >= 0; j--) { 208 /* 209 * There may be multiple paths which result in 210 * the optimal weight. 211 * 212 * For simplicity, we will pick the first one 213 * we encounter, the latest in the candidate 214 * string. 215 */ 216 if (D[i][j] != SCORE_MIN && 217 (match_required || D[i][j] == M[i][j])) { 218 /* If this score was determined using 219 * SCORE_MATCH_CONSECUTIVE, the 220 * previous character MUST be a match 221 */ 222 match_required = 223 i && j && 224 M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; 225 positions[i] = j--; 226 break; 227 } 228 } 229 } 230 } 231 232 score_t result = M[n - 1][m - 1]; 233 234 free(M); 235 free(D); 236 237 return result; 238 }