Levenshtein Distance Calculation using Dynamic Programming and ForkJoin Framework

Here are 3 different Java Codes for calculating the Levenshtein Distance between 2 strings which is described here – http://en.wikipedia.org/wiki/Levenshtein_distance

Code 1 – As is described in the above WIKI – follows the traditional Matrix based way of calculating the distance

Code 2 – Using Recursive Dynamic Programming – starting to compare each character from the beginning of the String

Code 3 – Same as Code 3 but uses – Java 7 – Fork and Join Pool to use threads to do the above calculation

Enjoy

Code 1

———————————————————————————————————————–

import java.util.*;

public class LevenshteinDistancePlain
{
private final String str1;
private final String str2;
private final int str1Len;
private final int str2Len;

int[][] matrix;
LevenshteinDistancePlain(String str1, String str2)
{
this.str1 = str1;
this.str2 = str2;
this.str1Len = str1.length();
this.str2Len = str2.length();

matrix = new int[str1Len][];
for(int i=0;i<str1Len;i++)
matrix[i] = new int[str2Len];
}

protected Integer compute()
{
for(int i=0;i<str1Len;i++)
matrix[i][0] = i;

for(int i=0;i<str2Len;i++)
matrix[0][i] = i;

for(int i=1;i<str1Len;i++)
{
for(int j=1;j<str2Len;j++)
{
if ( str1.charAt(i) == str2.charAt(j) )
matrix[i][j] = matrix[i-1][j-1];
else
{
int one = matrix[i-1][j] + 1;
int two = matrix[i][j-1] + 1;
int three = matrix[i-1][j-1] + 1;

matrix[i][j] = Math.min(one, Math.min(two, three));
}
}
}
return matrix[str1Len-1][str2Len-1];
}

public void printMatrix()
{
System.out.println("Distance Matrix");

for(int i=0;i<str1Len;i++)
{
for(int j=0;j= 2 )
{
LevenshteinDistancePlain l = new LevenshteinDistancePlain(args[0], args[1]);
int result = l.compute();
System.out.println("Lev Distance Between : " + args[0] + ", " + args[1] + "= " + result);
l.printMatrix();
}
else
{
LevenshteinDistancePlain l = new LevenshteinDistancePlain(str1, str2);
int result = l.compute();
System.out.println("Lev Distance Between : " + str1 + ", " + str2 + "= " + result);
l.printMatrix();
}
}
}

———————————————————————————————————————–

Code 2
———————————————————————————————————————–

import java.util.*;

public class LevenshteinDistanceUsingDynamicProgramming
{

LevenshteinDistanceUsingDynamicProgramming(String str1, String str2)
{
System.out.println("Lev Distance between : " + str1 + ", " + str2 + " = " + compute(str1, str2));
}

protected int compute(String str1, String str2)
{
// System.out.println(str1 + ", " + str2);
if ( str1.length() == 0 )
return str2.length();
if ( str2.length() == 0 )
return str1.length();

String newStr1 = str1.substring(1);
String newStr2 = str2.substring(1);
if ( str1.charAt(0) == str2.charAt(0) )
return compute(newStr1, newStr2);
else
{
// get the String after the 1st Char of str1 and str2
int one = 1 + compute(newStr1, newStr2);
int two = 1 + compute(newStr1, str2);
int three = 1 + compute(str1, newStr2);
return Math.min(one, Math.min(two, three));
}
}

public static void main(String[] args)
{
String str1 = "BostonMassachusett";
String str2 = "BostonMasachusets";
if ( args.length >= 2 )
new LevenshteinDistanceUsingDynamicProgramming(args[0], args[1]);
else
new LevenshteinDistanceUsingDynamicProgramming(str1, str2);
}
}

———————————————————————————————————————–

Code 3
———————————————————————————————————————–

import java.util.concurrent.*;
import java.util.*;

public class LevenshteinDistanceUsingDynamicProgrammingForkAndJoin
{

static class LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask extends RecursiveTask
{

String str1, str2;

LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(String s1, String s2)
{
str1 = s1;
str2 = s2;
}

@Override
protected Integer compute()
{
if ( str1.length() == 0 )
return str2.length();
if ( str2.length() == 0 )
return str1.length();

String newStr1 = str1.substring(1);
String newStr2 = str2.substring(1);
if ( str1.charAt(0) == str2.charAt(0) )
{
LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask l = new LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(newStr1, newStr2);
return l.compute();
}
else
{
// get the String after the 1st Char of str1 and str2
LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask t1 = new LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(newStr1, newStr2);
LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask t2 = new LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(newStr1, str2);
LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask t3 = new LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(str1, newStr2);
invokeAll(t1, t2, t3);
return Math.min(1+t1.join(), Math.min(1+t2.join(), 1+t3.join()));
}
}
}

public static void main(String[] args)
{
String str1 = "BostonMassachusett";
String str2 = "BostonMasachusets";
str1 = "SumitPal";
str2 = "SummitPaul";
ForkJoinPool pool = new ForkJoinPool(20);
if ( args.length >= 2 )
{
LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask l = new LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(args[0], args[1]);
Integer result = pool.invoke(l);
System.out.println("Lev Distance between : " + args[0] + ", " + args[1] + " = " + result);
}
else
{
LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask l = new LevenshteinDistanceUsingDynamicProgrammingForkAndJoinTask(str1, str2);
Integer result = pool.invoke(l);
System.out.println("Lev Distance between : " + str1 + ", " + str2 + " = " + result);
}
}
}

———————————————————————————————————————–

Advertisements

3 responses to “Levenshtein Distance Calculation using Dynamic Programming and ForkJoin Framework

  1. good day Sir. i stumbled upon your post while searching java implementations for solving the Levenshtein distance. I have tried the second variation and it works great. I also tried the third code with Java 7. However, it has an error on this line:

    return Math.min(1+t1.join(), Math.min(1+t2.join(), 1+t3.join()));

    the return of the ‘.join’ method is an Object type and thus cannot be added to the integers. I really wanted to try your code because I believe that it can speed up the computation of the distances for very long strings. I hope you could help me with this Sir.

    thank you!

  2. do they also use levenshtein insize lucene?

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s