#! /usr/bin/python import sys import cgi def normalise( str ): return str.replace( " ", "" ).lower() class Difference: def __init__( self, a, b ): self.a = a self.b = b self.memo = {} def distance( self, i = None, j = None ): return self.beta( i, j )[0] def similarity( self, i = None, j = None ): l = max( (len( self.a ), len( self.b )) ) return 1.0 - (float( self.distance( i, j ) ) / float( l )) def trace( self, i = None, j = None ): "Gets a trace of the difference; |.<> etc." return self.beta( i, j )[1] def encoding( self, i = None, j = None ): "Encodes the difference as a list of changes in mutation style (X23Y etc)" trace = self.trace( i, j ) codes = [] n = 0 m = 0 for ch in trace: if (ch == "|"): n = n + 1 m = m + 1 elif (ch == ">"): code = self.a[n] + str( n ) codes.append( code ) n = n + 1 elif (ch == "<"): code = str( n ) + self.b[m] codes.append( code ) m = m + 1 elif (ch == "."): code = self.a[n] + str( n ) + self.b[m] codes.append( code ) n = n + 1 m = m + 1 else: raise ValueError, ("invalid trace symbol: " + ch) m = 0 return codes def beta( self, i = None, j = None ): "I don't know a good name for this function." if (i == None): i = len( self.a ) - 1 if (j == None): j = len( self.b ) - 1 if ((i < 0) or (j < 0)): return (0, "") p = (i, j) if (self.memo.has_key( p )): return self.memo[p] # refactor up = self.beta( (i - 1), j ) up2 = ((up[0] + 1), (up[1] + ">")) left = self.beta( i, (j - 1) ) left2 = ((left[0] + 1), (left[1] + "<")) diag = self.beta( (i - 1), (j - 1) ) if (self.a[i] == self.b[j]): diag2 = ((diag[0]), (diag[1] + "|")) else: diag2 = ((diag[0] + 1), (diag[1] + ".")) d = min( (up2, left2, diag2) ) # /refactor self.memo[p] = d return d