3

[Golang] Levenshtein Distance Recursive Implementation

 2 years ago
source link: http://siongui.github.io/2017/04/04/go-levenshtein-distance-recursive-implementation/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

[Golang] Levenshtein Distance Recursive Implementation

Updated: April 05, 2017

I read post of the BK-tree referred by HN [1], and in the post, an interesting algorithm called Levenshtein distance [2] is used. Levenshtein distance (LD) is a measure of the similarity between two strings, the minimum number of single-character edits (insertions, deletions or substitutions) required to change one string into the other.

The recursive implementation of LD in Wikipedia is not very difficult, so I port the code from C to Go as an exercise.

If you want a more efficient algorithm (Wagner-Fischer algorithm), see [4].

Run Code on Go Playground

util.go | repository | view raw

package ld

func StringToRuneSlice(s string) []rune {
	var r []rune
	for _, runeValue := range s {
		r = append(r, runeValue)
	}
	return r
}

func minimum(a, b, c int) int {
	min := a
	if min > b {
		min = b
	}
	if min > c {
		min = c
	}
	return min
}

recursive.go | repository | view raw

package ld

// lenS and lenT are the number of characters in string s and t respectively
func Recursive(s []rune, lenS int, t []rune, lenT int) int {
	var cost int

	// base case: empty strings
	if lenS == 0 {
		return lenT
	}
	if lenT == 0 {
		return lenS
	}

	// test if last characters of the strings match
	if s[lenS-1] == t[lenT-1] {
		cost = 0
	} else {
		cost = 1
	}

	// return minimum of delete char from s, delete char from t, and delete char from both
	return minimum(
		Recursive(s, lenS-1, t, lenT)+1,
		Recursive(s, lenS, t, lenT-1)+1,
		Recursive(s, lenS-1, t, lenT-1)+cost,
	)
}

func LevenshteinDistance(s, t string) int {
	srs := StringToRuneSlice(s)
	trs := StringToRuneSlice(t)
	return Recursive(srs, len(srs), trs, len(trs))
}

ld_test.go | repository | view raw

package ld

import (
	"testing"
)

func TestLevenshteinDistance(t *testing.T) {
	if LevenshteinDistance("soccer", "otter") != 3 {
		t.Error(`"soccer", "otter"`)
	}

	if LevenshteinDistance("你好他他是誰", "好我我是誰") != 3 {
		t.Error(`"你好他他是誰", "好我我是誰"`)
	}

	if LevenshteinDistance("kitten", "sitten") != 1 {
		t.Error(`"kitten", "sitten"`)
	}

	if LevenshteinDistance("sitten", "sittin") != 1 {
		t.Error(`"sitten", "sittin"`)
	}

	if LevenshteinDistance("sittin", "sitting") != 1 {
		t.Error(`"sittin", "sitting"`)
	}

	if LevenshteinDistance("sort", "some") != 2 {
		t.Error(`"sort", "some"`)
	}

	if LevenshteinDistance("sort", "same") != 3 {
		t.Error(`"sort", "same"`)
	}

	if LevenshteinDistance("sort", "soft") != 1 {
		t.Error(`"sort", "soft"`)
	}
}

Tested on:


References:

[3][Golang] Iterate Over UTF-8 Strings (non-ASCII strings)


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK