systemSetup

system setup, configuration and dotfiles
git clone https://noulin.net/git/systemSetup.git
Log | Files | Refs | README | LICENSE

match.c (6023B)


      1 #include <ctype.h>
      2 #include <string.h>
      3 #include <strings.h>
      4 #include <stdio.h>
      5 #include <float.h>
      6 #include <math.h>
      7 #include <stdlib.h>
      8 
      9 #include "match.h"
     10 #include "bonus.h"
     11 
     12 #include "config.h"
     13 
     14 char *strcasechr(const char *s, char c) {
     15 	const char accept[3] = {c, toupper(c), 0};
     16 	return strpbrk(s, accept);
     17 }
     18 
     19 int has_match(const char *needle, const char *haystack) {
     20 	while (*needle) {
     21 		char nch = *needle++;
     22 
     23 		if (!(haystack = strcasechr(haystack, nch))) {
     24 			return 0;
     25 		}
     26 		haystack++;
     27 	}
     28 	return 1;
     29 }
     30 
     31 #define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
     32 
     33 #define max(a, b) (((a) > (b)) ? (a) : (b))
     34 
     35 struct match_struct {
     36 	int needle_len;
     37 	int haystack_len;
     38 
     39 	char lower_needle[MATCH_MAX_LEN];
     40 	char lower_haystack[MATCH_MAX_LEN];
     41 
     42 	score_t match_bonus[MATCH_MAX_LEN];
     43 };
     44 
     45 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
     46 	/* Which positions are beginning of words */
     47 	char last_ch = '/';
     48 	for (int i = 0; haystack[i]; i++) {
     49 		char ch = haystack[i];
     50 		match_bonus[i] = COMPUTE_BONUS(last_ch, ch);
     51 		last_ch = ch;
     52 	}
     53 }
     54 
     55 static void setup_match_struct(struct match_struct *match, const char *needle, const char *haystack) {
     56 	match->needle_len = strlen(needle);
     57 	match->haystack_len = strlen(haystack);
     58 
     59 	if (match->haystack_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) {
     60 		return;
     61 	}
     62 
     63 	for (int i = 0; i < match->needle_len; i++)
     64 		match->lower_needle[i] = tolower(needle[i]);
     65 
     66 	for (int i = 0; i < match->haystack_len; i++)
     67 		match->lower_haystack[i] = tolower(haystack[i]);
     68 
     69 	precompute_bonus(haystack, match->match_bonus);
     70 }
     71 
     72 static inline void match_row(const struct match_struct *match, int row, score_t *curr_D, score_t *curr_M, const score_t *last_D, const score_t *last_M) {
     73 	int n = match->needle_len;
     74 	int m = match->haystack_len;
     75 	int i = row;
     76 
     77 	const char *lower_needle = match->lower_needle;
     78 	const char *lower_haystack = match->lower_haystack;
     79 	const score_t *match_bonus = match->match_bonus;
     80 
     81 	score_t prev_score = SCORE_MIN;
     82 	score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
     83 
     84 	for (int j = 0; j < m; j++) {
     85 		if (lower_needle[i] == lower_haystack[j]) {
     86 			score_t score = SCORE_MIN;
     87 			if (!i) {
     88 				score = (j * SCORE_GAP_LEADING) + match_bonus[j];
     89 			} else if (j) { /* i > 0 && j > 0*/
     90 				score = max(
     91 						last_M[j - 1] + match_bonus[j],
     92 
     93 						/* consecutive match, doesn't stack with match_bonus */
     94 						last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
     95 			}
     96 			curr_D[j] = score;
     97 			curr_M[j] = prev_score = max(score, prev_score + gap_score);
     98 		} else {
     99 			curr_D[j] = SCORE_MIN;
    100 			curr_M[j] = prev_score = prev_score + gap_score;
    101 		}
    102 	}
    103 }
    104 
    105 score_t match(const char *needle, const char *haystack) {
    106 	if (!*needle)
    107 		return SCORE_MIN;
    108 
    109 	struct match_struct match;
    110 	setup_match_struct(&match, needle, haystack);
    111 
    112 	int n = match.needle_len;
    113 	int m = match.haystack_len;
    114 
    115 	if (m > MATCH_MAX_LEN || n > m) {
    116 		/*
    117 		 * Unreasonably large candidate: return no score
    118 		 * If it is a valid match it will still be returned, it will
    119 		 * just be ranked below any reasonably sized candidates
    120 		 */
    121 		return SCORE_MIN;
    122 	} else if (n == m) {
    123 		/* Since this method can only be called with a haystack which
    124 		 * matches needle. If the lengths of the strings are equal the
    125 		 * strings themselves must also be equal (ignoring case).
    126 		 */
    127 		return SCORE_MAX;
    128 	}
    129 
    130 	/*
    131 	 * D[][] Stores the best score for this position ending with a match.
    132 	 * M[][] Stores the best possible score at this position.
    133 	 */
    134 	score_t D[2][MATCH_MAX_LEN], M[2][MATCH_MAX_LEN];
    135 
    136 	score_t *last_D, *last_M;
    137 	score_t *curr_D, *curr_M;
    138 
    139 	last_D = D[0];
    140 	last_M = M[0];
    141 	curr_D = D[1];
    142 	curr_M = M[1];
    143 
    144 	for (int i = 0; i < n; i++) {
    145 		match_row(&match, i, curr_D, curr_M, last_D, last_M);
    146 
    147 		SWAP(curr_D, last_D, score_t *);
    148 		SWAP(curr_M, last_M, score_t *);
    149 	}
    150 
    151 	return last_M[m - 1];
    152 }
    153 
    154 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
    155 	if (!*needle)
    156 		return SCORE_MIN;
    157 
    158 	struct match_struct match;
    159 	setup_match_struct(&match, needle, haystack);
    160 
    161 	int n = match.needle_len;
    162 	int m = match.haystack_len;
    163 
    164 	if (m > MATCH_MAX_LEN || n > m) {
    165 		/*
    166 		 * Unreasonably large candidate: return no score
    167 		 * If it is a valid match it will still be returned, it will
    168 		 * just be ranked below any reasonably sized candidates
    169 		 */
    170 		return SCORE_MIN;
    171 	} else if (n == m) {
    172 		/* Since this method can only be called with a haystack which
    173 		 * matches needle. If the lengths of the strings are equal the
    174 		 * strings themselves must also be equal (ignoring case).
    175 		 */
    176 		if (positions)
    177 			for (int i = 0; i < n; i++)
    178 				positions[i] = i;
    179 		return SCORE_MAX;
    180 	}
    181 
    182 	/*
    183 	 * D[][] Stores the best score for this position ending with a match.
    184 	 * M[][] Stores the best possible score at this position.
    185 	 */
    186 	score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN];
    187 	M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
    188 	D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
    189 
    190 	score_t *last_D, *last_M;
    191 	score_t *curr_D, *curr_M;
    192 
    193 	for (int i = 0; i < n; i++) {
    194 		curr_D = &D[i][0];
    195 		curr_M = &M[i][0];
    196 
    197 		match_row(&match, i, curr_D, curr_M, last_D, last_M);
    198 
    199 		last_D = curr_D;
    200 		last_M = curr_M;
    201 	}
    202 
    203 	/* backtrace to find the positions of optimal matching */
    204 	if (positions) {
    205 		int match_required = 0;
    206 		for (int i = n - 1, j = m - 1; i >= 0; i--) {
    207 			for (; j >= 0; j--) {
    208 				/*
    209 				 * There may be multiple paths which result in
    210 				 * the optimal weight.
    211 				 *
    212 				 * For simplicity, we will pick the first one
    213 				 * we encounter, the latest in the candidate
    214 				 * string.
    215 				 */
    216 				if (D[i][j] != SCORE_MIN &&
    217 				    (match_required || D[i][j] == M[i][j])) {
    218 					/* If this score was determined using
    219 					 * SCORE_MATCH_CONSECUTIVE, the
    220 					 * previous character MUST be a match
    221 					 */
    222 					match_required =
    223 					    i && j &&
    224 					    M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
    225 					positions[i] = j--;
    226 					break;
    227 				}
    228 			}
    229 		}
    230 	}
    231 
    232 	score_t result = M[n - 1][m - 1];
    233 
    234 	free(M);
    235 	free(D);
    236 
    237 	return result;
    238 }