67 lines
1.6 KiB
Go
67 lines
1.6 KiB
Go
|
package textseg
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"reflect"
|
||
|
"strings"
|
||
|
"testing"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
func TestScanGraphemeClusters(t *testing.T) {
|
||
|
tests := unicodeGraphemeTests
|
||
|
|
||
|
for i, test := range tests {
|
||
|
t.Run(fmt.Sprintf("%03d-%x", i, test.input), func(t *testing.T) {
|
||
|
got, err := AllTokens(test.input, ScanGraphemeClusters)
|
||
|
|
||
|
if err != nil {
|
||
|
t.Fatalf("unexpected error: %s", err)
|
||
|
}
|
||
|
|
||
|
if !reflect.DeepEqual(got, test.output) {
|
||
|
// Also get the rune values resulting from decoding utf8,
|
||
|
// since they are generally easier to look up to figure out
|
||
|
// what's failing.
|
||
|
runes := make([]string, 0, len(test.input))
|
||
|
seqs := make([][]byte, 0, len(test.input))
|
||
|
categories := make([]string, 0, len(test.input))
|
||
|
buf := test.input
|
||
|
for len(buf) > 0 {
|
||
|
r, size := utf8.DecodeRune(buf)
|
||
|
runes = append(runes, fmt.Sprintf("0x%04x", r))
|
||
|
seqs = append(seqs, buf[:size])
|
||
|
categories = append(categories, _GraphemeRuneType(r).String())
|
||
|
buf = buf[size:]
|
||
|
}
|
||
|
|
||
|
t.Errorf(
|
||
|
"wrong result\ninput: %s\nutf8s: %s\nrunes: %s\ncats: %s\ngot: %s\nwant: %s",
|
||
|
formatBytes(test.input),
|
||
|
formatByteRanges(seqs),
|
||
|
strings.Join(runes, " "),
|
||
|
strings.Join(categories, " "),
|
||
|
formatByteRanges(got),
|
||
|
formatByteRanges(test.output),
|
||
|
)
|
||
|
}
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func formatBytes(buf []byte) string {
|
||
|
strs := make([]string, len(buf))
|
||
|
for i, b := range buf {
|
||
|
strs[i] = fmt.Sprintf("0x%02x", b)
|
||
|
}
|
||
|
return strings.Join(strs, " ")
|
||
|
}
|
||
|
|
||
|
func formatByteRanges(bufs [][]byte) string {
|
||
|
strs := make([]string, len(bufs))
|
||
|
for i, b := range bufs {
|
||
|
strs[i] = formatBytes(b)
|
||
|
}
|
||
|
return strings.Join(strs, " | ")
|
||
|
}
|