(************************************************************************* ** String Similarity (Fast Method) ** ** For: 1998 UMCP Programming Contest ** Author: David Mount ** Date: March 3, 1998 ** ** This program inputs two character strings, and outputs their ** longest matching subsequence. X and Y holds the two strings, ** and Z holds the longest matching subsequence (LMS), and lx, ly, ** and lz are their respective lengths. ** ** This algorithm uses the technique of dynamic to solve the ** problem. We construct a 2-dimensional array L, where L[i,j] ** contains the length of the longest matching subsequence of ** X[1..i] and Y[1..j]. To build this array, we first ** initialize L[0,*] = L[*,0] = 0. Then we use the following ** rule for setting L[i,j]: ** ** if X[i] = Y[j] then we may assume that this shared character ** will be the last character of the LMS of X[1..i] and ** Y[1..j]. Thus, the length of the LMS is: L[i-1,j-1]+1 ** if X[i] <> Y[j] then there are two possibilities. Either ** X[i] is not the last character of the LMS, in which ** case the LMS will be the LMS of X[1..i-1] and Y[1..j] ** (whose length is L[i-1,j] -or- ** ** Y[j] is not the last character of the LMS, in which ** case the LMS will be the LMS of X[1..i] and Y[1..j-1] ** (whose length is L[i,j-1] -or- ** ** Neither X[i] or Y[j] is the last character of the ** LMS. (We do not need to do anything in this case, ** since it is handled by either of the other two cases.) ** ** Since we want to maximize the length of the LMS, we ** take the larger of these two possibilities: ** ** LMS[i,j] = max(L[i-1,j], L[i,j-1]). ** ** After computing this array, L[lx,ly] is the length of the LMS. ** To get the LMS, we record ``indicators'' with each element of ** L. In particular, Ind[i,j] has the following values: ** ** takeBoth means taking the character X[i]=Y[j] for the LMS ** (when X[i]=Y[j]), ** delX means that X[i] <> Y[j] and L[i-1,j] <= L[i,j-1] ** so we should not take X[i], ** delY means that X[i] <> Y[j] and L[i-1,j] > L[i,j-1] ** so we should not take Y[j], ** done this is stored in Ind[0,0] to terminate the loop. ** ** The final LMS is constructed by working backwards through the ** table, determining which characters to take and which to skip. ** **************************************************************************) program lcs(input, output); const MAXLEN = 50; type Sequence = array [1..MAXLEN] of char; (* character sequence *) Indicator = (done, delX, delY, takeBoth); (* indicator value *) var X: Sequence; (* input sequences *) Y: Sequence; Z: Sequence; (* output sequence *) L: array [0..MAXLEN,0..MAXLEN] of integer; (* array of sequence lengths *) Ind: array [0..MAXLEN,0..MAXLEN] of Indicator;(* array of indicators *) lx, ly, lz: integer; (* lengths of X, Y, Z *) i, j, k: integer; begin lx := 0; write('First string: '); while (not eoln(input)) do begin (* input X *) lx := lx+1; read(X[lx]); write(X[lx]); end; readln; writeln; ly := 0; write('Second string: '); while (not eoln(input)) do begin (* input Y *) ly := ly+1; read(Y[ly]); write(Y[ly]); end; readln; writeln; for i := 0 to lx do begin (* init 0th row/col of L *) L[i, 0] := 0; Ind[i, 0] := delX; end; for j := 0 to ly do begin L[0, j] := 0; Ind[0, j] := delY; end; Ind[0, 0] := done; for i := 1 to lx do begin (* build tables L and I *) for j := 1 to ly do begin if (X[i] = Y[j]) then begin (* characters match *) L[i,j] := L[i-1,j-1] + 1; Ind[i,j] := takeBoth; end else begin (* charcters do not match *) if (L[i-1,j] > L[i,j-1]) then begin L[i,j] := L[i-1,j]; (* better to delete X[i] *) Ind[i,j] := delX; end else begin (* better to delete Y[j] *) L[i,j] := L[i,j-1]; Ind[i,j] := delY; end; end; end; end; (*--------------(print table for debugging)------------------------------- write(' '); for j := 1 to ly do write(Y[j]:4); writeln; for i := 0 to lx do begin if (i <> 0) then write(X[i], ' ') else write(' '); for j := 0 to ly do begin write(L[i,j]:3); if (Ind[i,j] = takeBoth) then write('`') else if (Ind[i,j] = delX) then write('^') else if (Ind[i,j] = delY) then write('<') else write('*'); end; writeln; end; --------------------------------------------------------------------------*) lz := L[lx, ly]; (* this is the LMS length *) k := lz; i := lx; j := ly; while (Ind[i,j] <> done) do begin (* copy LMS to Z *) if (Ind[i,j] = takeBoth) then begin (* take this char for LMS *) Z[k] := X[i]; k := k-1; i := i-1; j := j-1; end else if (Ind[i,j] = delX) then i := i-1 (* X[i] not in LMS *) else if (Ind[i,j] = delY) then j := j-1; (* Y[j] not in LMS *) end; writeln('Similarity: ', lz:2); (* output length of LMS *) write('LMS: '); for i := 1 to lz do begin (* output LMS *) write(Z[i]); end; writeln; end.