Blogs   

Levenshtein Distance (Edit Distance)

Published on 11 Mar 2025  ·  Written by Aditya Mayukh Som

Problem Link

Note that in space optimization solution, only assigning prev = curr assigns only reference in java, hence both prev and curr points to the same memory location. Thus updating one will update the other and that leads to wrong answer, so always use a third temporary array to swap prev and curr or assign a new array to curr using curr = new int[m]; after assigning curr to prev.

import java.util.Arrays;

public class Solution
{
    private static int minimum(final int num0, final int ...nums)
    {
        int mini = num0;
        int len = nums.length;

        for(int i = 0; i < len; ++i)
        {
            mini = (mini > nums[i]) ? nums[i] : mini;
        }

        return mini;
    }

    private static void printGrid(final int[][] grid)
    {
        for(int[] row: grid)
        {
            System.out.println(Arrays.toString(row));
        }
    }

    private static void printGrid(final boolean[][] grid)
    {
        for(boolean[] row: grid)
        {
            System.out.println(Arrays.toString(row));
        }
    }

    /*
    Returns a new matrix in which the locations where values are different contains
    true, and the locations where values are same, contains false. Supports ragged arrays.
    */
    private static boolean[][] gridDiff(final int[][] g1, final int[][] g2) throws Exception
    {
        int n1 = g1.length;
        int n2 = g2.length;

        if(n1 != n2)
        {
            throw new Exception("grid number of rows must be same to compute diff");
        }

        // If n1 is equal to zero, it implies that n2 is also zero
        // hence the diff will be a boolean empty array
        if(n1 == 0)
        {
            return new boolean[][]{};
        }

        // default value of boolean is false, and we need to set true to only those
        // positions where the values in the grid are actually different.
        boolean[][] diff = new boolean[n1][];

        for(int i = 0, m1, m2; i < n1; ++i)
        {
            m1 = g1[i].length;
            m2 = g2[i].length;

            if(m1 != m2)
            {
                throw new Exception("grid column sizes must be same to compare diff");
            }

            diff[i] = new boolean[m1];

            for(int j = 0; j < m1; ++j)
            {
                if(g1[i][j] != g2[i][j])
                {
                    diff[i][j] = true;
                }
            }
        }

        return diff;
    }

    private static int levenshteinMemHelper(String s1, String s2, int i, int j, int[][] dp)
    {
        // There is a possibility that it can be called with either
        // i and j lesses than zero, which will throw an exception,
        // but if we think about the conditions, each time either i
        // decreases by 1 or j decreases by 1 or both decreases by 1
        // so if for cases where either of them or both of them are
        // zero is handled, then this function won't ever get negative
        // values for either i or j.

        char c1 = s1.charAt(i);
        char c2 = s2.charAt(j);

        if(i == 0 && j == 0)
        {
            return dp[i][j] = (c1 == c2) ? 0 : 1;
        }

        if(i == 0)
        {
            boolean exists = s2.substring(j + 1).indexOf(c1) >= 0;
            return dp[i][j] = exists ? j : j + 1;
        }

        if(j == 0)
        {
            boolean exists = s1.substring(i + 1).indexOf(c2) >= 0;
            return dp[i][j] = exists ? i : i + 1;
        }

        if(dp[i][j] != -1)
        {
            return dp[i][j];
        }

        int dist, insert, delete, replace;

        if(c1 == c2)
        {
            dist = 0 + levenshteinMemHelper(s1, s2, i - 1, j - 1, dp);
        }
        else
        {
            insert = 1 + levenshteinMemHelper(s1, s2, i, j - 1, dp);
            delete = 1 + levenshteinMemHelper(s1, s2, i - 1, j, dp);
            replace = 1 + levenshteinMemHelper(s1, s2, i - 1, j - 1, dp);
            dist = minimum(insert, delete, replace);
        }

        dp[i][j] = dist;

        return dist;
    }

    private static int levenshteinMem(String s1, String s2)
    {
        int n = s1.length();
        int m = s2.length();

        int[][] dp = new int[n][m];

        for(int[] row: dp)
        {
            Arrays.fill(row, -1);
        }

        return levenshteinMemHelper(s1, s2, n - 1, m - 1, dp);
    }

    public static int levenshteinTab(String s1, String s2)
    {
        int n = s1.length();
        int m = s2.length();

        if(n == 0)
        {
            return m;
        }

        if(m == 0)
        {
            return n;
        }

        int[][] dp = new int[n][m];

        // whether string with length one, i.e. the first characters match
        final boolean unitMatch = s1.charAt(0) == s2.charAt(0);

        // if both the string are of length one, then if they are equal, no ops
        // needed, if they are not equal, one ops needed to convert one to other.
        dp[0][0] = unitMatch ? 0 : 1;

        // we need to remember that if one string is of unit length, then if the
        // character in that unit length string appears anywhere inside the other
        // string (whose length is j + 1), only j characters needs to be added,
        // if the character itself does not exist, then we need one operation to
        // replace the character with one which exists in the large string, hence
        // effectively matching one character, so only j more characters needs to
        // be inserted. So if the string does not exists, (j + 1) ops are required.
        //
        // ex: ('l' represents length of the string)
        // s1 = 'ADI' -> l1 = 3 -> i = 2
        // s2 = 'D'   -> l2 = 1 -> j = 0
        // here only 'A' and 'I' are needed to be inserted, so 2 ops needed.
        //
        // s1 = 'ADI' -> l1 = 3 -> i = 2
        // s2 = 'P'   -> l2 = 1 -> j = 0
        // here we need to first change 'P' to one of the chars present in s1
        // let's say to 'I', so that needs one op, and then insert 'A' and 'D'
        // hence in total 3 ops are required if s2 (length 1) is not present in s1.

        // loop when i is equal to zero
        boolean c1Exists = unitMatch;
        for(int j = 1; j < m; ++j)
        {
            c1Exists |= s1.charAt(0) == s2.charAt(j);
            dp[0][j] = c1Exists ? j : j + 1;
        }

        // loop when j is equal to zero,
        boolean c2Exists = unitMatch;
        for(int i = 1; i < n; ++i)
        {
            c2Exists |= s1.charAt(i) == s2.charAt(0);
            dp[i][0] = c2Exists ? i : i + 1;
        }


        int ins, del, rep;

        for(int i = 1; i < n; ++i)
        {
            for(int j = 1; j < m; ++j)
            {
                if(s1.charAt(i) == s2.charAt(j))
                {
                    dp[i][j] = 0 + dp[i - 1][j - 1];
                }
                else
                {
                    // if the strings do not match, then one change must be
                    // done, that can be of either insert, delete or replace
                    // type, so one is added to each of the choice.
                    ins = 1 + dp[i][j - 1];
                    del = 1 + dp[i - 1][j];
                    rep = 1 + dp[i - 1][j - 1];
                    dp[i][j] = minimum(ins, del, rep);
                }
            }
        }

        int tab = dp[n - 1][m - 1];
        return tab;
    }

    public static int levenshteinOpt(String s1, String s2)
    {
        int n, m, i, j, ins, del, rep;
        boolean c1Exists, c2Exists;
        int[] prev, curr, temp;

        n = s1.length();
        m = s2.length();

        if(n == 0) return m;
        if(m == 0) return n;

        prev = new int[m];
        curr = new int[m];
        temp = new int[m];

        // whether string with length one, i.e. the first characters match
        c1Exists = c2Exists = s1.charAt(0) == s2.charAt(0);

        for(j = 0; j < m; ++j)
        {
            c1Exists |= s1.charAt(0) == s2.charAt(j);
            prev[j] = c1Exists ? j : j + 1;
        }


        for(i = 1; i < n; ++i)
        {
            c2Exists |= s1.charAt(i) == s2.charAt(0);
            curr[0] = c2Exists ? i : i + 1;

            for(j = 1; j < m; ++j)
            {
                if(s1.charAt(i) == s2.charAt(j))
                {
                    curr[j] = 0 + prev[j - 1];
                }
                else
                {
                    // if the strings do not match, then one change must be
                    // done, that can be of either insert, delete or replace
                    // type, so one is added to each of the choice.
                    ins = 1 + curr[j - 1];
                    del = 1 + prev[j];
                    rep = 1 + prev[j - 1];
                    curr[j] = minimum(ins, del, rep);
                }
            }

            prev = curr;
            curr = temp;
            temp = prev;
        }

        return prev[m - 1];
    }

    public static int editDistance(String s1, String s2)
    {
        return levenshteinOpt(s1, s2);
    }
}
Written by Aditya Mayukh Som.