214 lines
9.2 KiB
Go
214 lines
9.2 KiB
Go
// Copyright 2016 ALRUX Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package levenshtein
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
type e struct {
|
|
cost, lp, ls int
|
|
sim, match float64
|
|
}
|
|
|
|
func Test_Metrics(t *testing.T) {
|
|
var (
|
|
cases = []struct {
|
|
s1 string
|
|
s2 string
|
|
desc string
|
|
p *Params
|
|
exp e
|
|
}{
|
|
// When the values are the same...
|
|
{"", "", "", nil, e{0, 0, 0, 1, 1}},
|
|
{"1", "1", "", nil, e{0, 1, 0, 1, 1}},
|
|
{"12", "12", "", nil, e{0, 2, 0, 1, 1}},
|
|
{"123", "123", "", nil, e{0, 3, 0, 1, 1}},
|
|
{"1234", "1234", "", nil, e{0, 4, 0, 1, 1}},
|
|
{"12345", "12345", "", nil, e{0, 5, 0, 1, 1}},
|
|
{"password", "password", "", nil, e{0, 8, 0, 1, 1}},
|
|
|
|
// When one of the values is empty...
|
|
{"", "1", "", nil, e{1, 0, 0, 0, 0}},
|
|
{"", "12", "", nil, e{2, 0, 0, 0, 0}},
|
|
{"", "123", "", nil, e{3, 0, 0, 0, 0}},
|
|
{"", "1234", "", nil, e{4, 0, 0, 0, 0}},
|
|
{"", "12345", "", nil, e{5, 0, 0, 0, 0}},
|
|
{"", "password", "", nil, e{8, 0, 0, 0, 0}},
|
|
{"1", "", "", nil, e{1, 0, 0, 0, 0}},
|
|
{"12", "", "", nil, e{2, 0, 0, 0, 0}},
|
|
{"123", "", "", nil, e{3, 0, 0, 0, 0}},
|
|
{"1234", "", "", nil, e{4, 0, 0, 0, 0}},
|
|
{"12345", "", "", nil, e{5, 0, 0, 0, 0}},
|
|
{"password", "", "", nil, e{8, 0, 0, 0, 0}},
|
|
|
|
// When a single character is inserted or removed...
|
|
{"password", "1password", "", nil, e{1, 0, 8, 8.0 / 9, 8.0 / 9}},
|
|
{"password", "p1assword", "", nil, e{1, 1, 7, 8.0 / 9, 8.1 / 9}},
|
|
{"password", "pa1ssword", "", nil, e{1, 2, 6, 8.0 / 9, 8.2 / 9}},
|
|
{"password", "pas1sword", "", nil, e{1, 3, 5, 8.0 / 9, 8.3 / 9}},
|
|
{"password", "pass1word", "", nil, e{1, 4, 4, 8.0 / 9, 8.4 / 9}},
|
|
{"password", "passw1ord", "", nil, e{1, 5, 3, 8.0 / 9, 8.4 / 9}},
|
|
{"password", "passwo1rd", "", nil, e{1, 6, 2, 8.0 / 9, 8.4 / 9}},
|
|
{"password", "passwor1d", "", nil, e{1, 7, 1, 8.0 / 9, 8.4 / 9}},
|
|
{"password", "password1", "", nil, e{1, 8, 0, 8.0 / 9, 8.4 / 9}},
|
|
{"password", "assword", "", nil, e{1, 0, 7, 7.0 / 8, 7.0 / 8}},
|
|
{"password", "pssword", "", nil, e{1, 1, 6, 7.0 / 8, 7.1 / 8}},
|
|
{"password", "pasword", "", nil, e{1, 3, 4, 7.0 / 8, 7.3 / 8}},
|
|
{"password", "passord", "", nil, e{1, 4, 3, 7.0 / 8, 7.4 / 8}},
|
|
{"password", "passwrd", "", nil, e{1, 5, 2, 7.0 / 8, 7.4 / 8}},
|
|
{"password", "passwod", "", nil, e{1, 6, 1, 7.0 / 8, 7.4 / 8}},
|
|
{"password", "passwor", "", nil, e{1, 7, 0, 7.0 / 8, 7.4 / 8}},
|
|
|
|
// When a single character is replaced...
|
|
{"password", "Xassword", "", nil, e{1, 0, 7, 7.0 / 8, 7.0 / 8}},
|
|
{"password", "pXssword", "", nil, e{1, 1, 6, 7.0 / 8, 7.1 / 8}},
|
|
{"password", "paXsword", "", nil, e{1, 2, 5, 7.0 / 8, 7.2 / 8}},
|
|
{"password", "pasXword", "", nil, e{1, 3, 4, 7.0 / 8, 7.3 / 8}},
|
|
{"password", "passXord", "", nil, e{1, 4, 3, 7.0 / 8, 7.4 / 8}},
|
|
{"password", "passwXrd", "", nil, e{1, 5, 2, 7.0 / 8, 7.4 / 8}},
|
|
{"password", "passwoXd", "", nil, e{1, 6, 1, 7.0 / 8, 7.4 / 8}},
|
|
{"password", "passworX", "", nil, e{1, 7, 0, 7.0 / 8, 7.4 / 8}},
|
|
|
|
// If characters are taken off the front and added to the back and all of
|
|
// the characters are unique, then the distance is two times the number of
|
|
// characters shifted, until you get halfway (and then it becomes easier
|
|
// to shift from the other direction).
|
|
{"12345678", "23456781", "", nil, e{2, 0, 0, 6. / 8, 6. / 8}},
|
|
{"12345678", "34567812", "", nil, e{4, 0, 0, 4. / 8, 4. / 8}},
|
|
{"12345678", "45678123", "", nil, e{6, 0, 0, 2. / 8, 2. / 8}},
|
|
{"12345678", "56781234", "", nil, e{8, 0, 0, 0, 0}},
|
|
{"12345678", "67812345", "", nil, e{6, 0, 0, 2. / 8, 2. / 8}},
|
|
{"12345678", "78123456", "", nil, e{4, 0, 0, 4. / 8, 4. / 8}},
|
|
{"12345678", "81234567", "", nil, e{2, 0, 0, 6. / 8, 6. / 8}},
|
|
|
|
// If all the characters are unique and the values are reversed, then the
|
|
// distance is the number of characters for an even number of characters,
|
|
// and one less for an odd number of characters (since the middle
|
|
// character will stay the same).
|
|
{"12", "21", "", nil, e{2, 0, 0, 0, 0}},
|
|
{"123", "321", "", nil, e{2, 0, 0, 1. / 3, 1. / 3}},
|
|
{"1234", "4321", "", nil, e{4, 0, 0, 0, 0}},
|
|
{"12345", "54321", "", nil, e{4, 0, 0, 1. / 5, 1. / 5}},
|
|
{"123456", "654321", "", nil, e{6, 0, 0, 0, 0}},
|
|
{"1234567", "7654321", "", nil, e{6, 0, 0, 1. / 7, 1. / 7}},
|
|
{"12345678", "87654321", "", nil, e{8, 0, 0, 0, 0}},
|
|
|
|
// The results are the same regardless of the string order,
|
|
// with the default parameters...
|
|
{"password", "1234", "", nil, e{8, 0, 0, 0, 0}},
|
|
{"1234", "password", "", nil, e{8, 0, 0, 0, 0}},
|
|
{"password", "pass1", "", nil, e{4, 4, 0, 4. / 8, 4. / 8}},
|
|
{"pass1", "password", "", nil, e{4, 4, 0, 4. / 8, 4. / 8}},
|
|
{"password", "passwor", "", nil, e{1, 7, 0, 7.0 / 8, 7.4 / 8}},
|
|
{"passwor", "password", "", nil, e{1, 7, 0, 7.0 / 8, 7.4 / 8}},
|
|
// ... but not necessarily so with custom costs:
|
|
{"password", "1234", " (D=2)", NewParams().DelCost(2), e{12, 0, 0, 0, 0}},
|
|
{"1234", "password", " (D=2)", NewParams().DelCost(2), e{8, 0, 0, 0, 0}},
|
|
{"password", "pass1", " (D=2)", NewParams().DelCost(2), e{7, 4, 0, 4. / 11, 4. / 11}},
|
|
{"pass1", "password", " (D=2)", NewParams().DelCost(2), e{4, 4, 0, 4. / 8, 4. / 8}},
|
|
{"password", "pass1", " (S=3)", NewParams().SubCost(3), e{5, 4, 0, 8. / 13, 8. / 13}},
|
|
{"password", "passwor", " (D=2)", NewParams().DelCost(2), e{2, 7, 0, 7.0 / 9, 7.8 / 9}},
|
|
{"passwor", "password", " (D=2)", NewParams().DelCost(2), e{1, 7, 0, 7.0 / 8, 7.4 / 8}},
|
|
|
|
// When setting a maxCost (should not affect Similarity() and Match())...
|
|
{"password", "1password2", "(maxCost=6)", NewParams().MaxCost(6), e{2, 0, 0, 8. / 10, 8. / 10}},
|
|
{"password", "pass1234", "(maxCost=1)", NewParams().MaxCost(1), e{2, 4, 0, 4. / 8, 4. / 8}},
|
|
{"pass1word", "passwords1", "(maxCost=2)", NewParams().MaxCost(2), e{3, 4, 0, 7. / 10, 8.2 / 10}},
|
|
{"password", "1passwo", " (D=2,maxCost=1)", NewParams().DelCost(2).MaxCost(1), e{2, 0, 0, 4. / 9, 4. / 9}},
|
|
{"pwd", "password", " (I=0,maxCost=0)", NewParams().InsCost(0).MaxCost(0), e{0, 1, 1, 1, 1}},
|
|
{"passXword", "password", "(maxCost=10)", NewParams().MaxCost(10), e{1, 4, 4, 8. / 9, 8.4 / 9}},
|
|
{"passXord", "password", "(S=3,maxCost=17)", NewParams().SubCost(3).MaxCost(17), e{2, 4, 3, 14. / 16, 14.8 / 16}},
|
|
// ... no change because the Calculate is calculated without getting into the main algorithm:
|
|
{"password", "pass", "(maxCost=1)", NewParams().MaxCost(1), e{4, 4, 0, 4. / 8, 4. / 8}},
|
|
{"password", "1234", " (D=2,maxCost=1)", NewParams().DelCost(2).MaxCost(1), e{8, 0, 0, 0, 0}},
|
|
|
|
// When setting a minScore (should not affect Calculate() and Distance())...
|
|
{"password", "pass1", "(minScore=0.3)", NewParams().MinScore(.3), e{4, 4, 0, 4. / 8, 4. / 8}},
|
|
{"password", "pass1", "(minScore=0.6)", NewParams().MinScore(.6), e{4, 4, 0, 0, 0}},
|
|
{"password", "pass1wor", "(minScore=0.9)", NewParams().MinScore(.9), e{2, 4, 0, 0, 0}},
|
|
{"password", "password", "(minScore=1.1)", NewParams().MinScore(1.1), e{0, 8, 0, 0, 0}},
|
|
|
|
// The rest of these are miscellaneous examples. They will
|
|
// be illustrated using the following key:
|
|
// = (the characters are equal)
|
|
// + (the character is inserted)
|
|
// - (the character is removed)
|
|
// # (the character is replaced)
|
|
|
|
// Mississippi
|
|
// ippississiM
|
|
// -=##====##=+ --> 6
|
|
{"Mississippi", "ippississiM", "", nil, e{6, 0, 0, 5. / 11, 5. / 11}},
|
|
|
|
// eieio
|
|
// oieie
|
|
// #===# --> 2
|
|
{"eieio", "oieie", "", nil, e{2, 0, 0, 3. / 5, 3. / 5}},
|
|
|
|
// brad+angelina
|
|
// bra ngelina
|
|
// ===+++======= --> 3
|
|
{"brad+angelina", "brangelina", "", nil, e{3, 3, 7, 10. / 13, 10.9 / 13}},
|
|
|
|
// test international chars
|
|
// naive
|
|
// naïve
|
|
// ==#== --> 1
|
|
{"naive", "naïve", "", nil, e{1, 2, 2, 4. / 5, 4.2 / 5}},
|
|
}
|
|
)
|
|
for _, c := range cases {
|
|
par := c.p
|
|
if par == nil {
|
|
par = defaultParams
|
|
}
|
|
cost, lp, ls := Calculate([]rune(c.s1), []rune(c.s2), par.maxCost, par.insCost, par.subCost, par.delCost)
|
|
if cost != c.exp.cost {
|
|
t.Errorf("Cost: %q -> %q%s: got %d, want %d", c.s1, c.s2, c.desc, cost, c.exp.cost)
|
|
}
|
|
if lp != c.exp.lp {
|
|
t.Errorf("Prefix: %q -> %q%s: got %d, want %d", c.s1, c.s2, c.desc, lp, c.exp.lp)
|
|
}
|
|
if ls != c.exp.ls {
|
|
t.Errorf("Suffix: %q -> %q%s: got %d, want %d", c.s1, c.s2, c.desc, ls, c.exp.ls)
|
|
}
|
|
|
|
dist := Distance(c.s1, c.s2, c.p)
|
|
if dist != c.exp.cost {
|
|
t.Errorf("Distance: %q -> %q%s: got %d, want %d", c.s1, c.s2, c.desc, dist, c.exp.cost)
|
|
}
|
|
|
|
sim := Similarity(c.s1, c.s2, c.p)
|
|
off := sim - c.exp.sim
|
|
if off < 0 {
|
|
off = -off
|
|
}
|
|
if off > 1e-15 {
|
|
t.Errorf("Similarity: %q -> %q%s: got %f, want %f (off %g)", c.s1, c.s2, c.desc, sim, c.exp.sim, off)
|
|
}
|
|
|
|
match := Match(c.s1, c.s2, c.p)
|
|
off = match - c.exp.match
|
|
if off < 0 {
|
|
off = -off
|
|
}
|
|
if off > 1e-15 {
|
|
t.Errorf("Match: %q -> %q%s: got %f, want %f (off %g)", c.s1, c.s2, c.desc, match, c.exp.match, off)
|
|
}
|
|
}
|
|
}
|