Some work
This commit is contained in:
99
helpers/levenshtein.go
Normal file
99
helpers/levenshtein.go
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
Copyright © Brian Buller <brian@bullercodeworks.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
package helpers
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
const levMinLngthThreshold = 32
|
||||
|
||||
func ComputeLevDistance(a, b string) int {
|
||||
if len(a) == 0 {
|
||||
return utf8.RuneCountInString(b)
|
||||
}
|
||||
if len(b) == 0 {
|
||||
return utf8.RuneCountInString(a)
|
||||
}
|
||||
|
||||
if a == b {
|
||||
return 0
|
||||
}
|
||||
s1 := []rune(a)
|
||||
s2 := []rune(b)
|
||||
|
||||
if len(s1) > len(s2) {
|
||||
s1, s2 = s2, s1
|
||||
}
|
||||
|
||||
s1, s2 = trimLongestCommonSuffix(s1, s2)
|
||||
s1, s2 = trimLongestCommonPrefix(s1, s2)
|
||||
|
||||
lenS1 := len(s1)
|
||||
lenS2 := len(s2)
|
||||
|
||||
// Init the row
|
||||
var x []uint16
|
||||
if lenS1+1 > levMinLngthThreshold {
|
||||
x = make([]uint16, lenS1+1)
|
||||
} else {
|
||||
x = make([]uint16, levMinLngthThreshold)
|
||||
x = x[:lenS1+1]
|
||||
}
|
||||
|
||||
for i := 1; i < len(x); i++ {
|
||||
x[i] = uint16(i)
|
||||
}
|
||||
|
||||
_ = x[lenS1]
|
||||
y := x[1:]
|
||||
y = y[:lenS1]
|
||||
for i := 0; i < lenS2; i++ {
|
||||
prev := uint16(i + 1)
|
||||
for j := 0; j < lenS1; j++ {
|
||||
current := x[j]
|
||||
if s2[i] != s1[j] {
|
||||
current = min(x[j], prev, y[j]) + 1
|
||||
}
|
||||
x[j] = prev
|
||||
prev = current
|
||||
}
|
||||
x[lenS1] = prev
|
||||
}
|
||||
return int(x[lenS1])
|
||||
}
|
||||
|
||||
func trimLongestCommonSuffix(a, b []rune) ([]rune, []rune) {
|
||||
m := min(len(a), len(b))
|
||||
a2 := a[len(a)-m:]
|
||||
b2 := b[len(b)-m:]
|
||||
i := len(a2)
|
||||
b2 = b2[:i]
|
||||
for ; i > 0 && a2[i-1] == b2[i-1]; i-- {
|
||||
}
|
||||
return a[:len(a)-len(a2)+i], b[:len(b)-len(b2)+i]
|
||||
}
|
||||
|
||||
func trimLongestCommonPrefix(a, b []rune) ([]rune, []rune) {
|
||||
var i int
|
||||
for m := min(len(a), len(b)); i < m && a[i] == b[i]; i++ {
|
||||
}
|
||||
return a[i:], b[i:]
|
||||
}
|
||||
Reference in New Issue
Block a user