82 lines
2.7 KiB
Go
82 lines
2.7 KiB
Go
// Copyright 2014 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package ucd_test
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"golang.org/x/text/internal/ucd"
|
|
)
|
|
|
|
func Example() {
|
|
// Read rune-by-rune from UnicodeData.
|
|
var count int
|
|
p := ucd.New(strings.NewReader(unicodeData))
|
|
for p.Next() {
|
|
count++
|
|
if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
|
|
fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
|
|
}
|
|
}
|
|
if err := p.Err(); err != nil {
|
|
fmt.Println(err)
|
|
}
|
|
fmt.Println("Number of runes visited:", count)
|
|
|
|
// Read raw ranges from Scripts.
|
|
p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
|
|
for p.Next() {
|
|
start, end := p.Range(0)
|
|
fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
|
|
}
|
|
if err := p.Err(); err != nil {
|
|
fmt.Println(err)
|
|
}
|
|
|
|
// Output:
|
|
// lower(U+00C0) -> U+00E0
|
|
// lower(U+00C1) -> U+00E1
|
|
// lower(U+00C2) -> U+00E2
|
|
// lower(U+00C3) -> U+00E3
|
|
// lower(U+00C4) -> U+00E4
|
|
// Number of runes visited: 6594
|
|
// 0000..001F: Common
|
|
// 0020..0020: Common
|
|
// 0021..0023: Common
|
|
// 0024..0024: Common
|
|
}
|
|
|
|
// Excerpt from UnicodeData.txt
|
|
const unicodeData = `
|
|
00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;;
|
|
00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;;
|
|
00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;;
|
|
00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;;
|
|
00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;;
|
|
00BE;VULGAR FRACTION THREE QUARTERS;No;0;ON;<fraction> 0033 2044 0034;;;3/4;N;FRACTION THREE QUARTERS;;;;
|
|
00BF;INVERTED QUESTION MARK;Po;0;ON;;;;;N;;;;;
|
|
00C0;LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
|
|
00C1;LATIN CAPITAL LETTER A WITH ACUTE;Lu;0;L;0041 0301;;;;N;LATIN CAPITAL LETTER A ACUTE;;;00E1;
|
|
00C2;LATIN CAPITAL LETTER A WITH CIRCUMFLEX;Lu;0;L;0041 0302;;;;N;LATIN CAPITAL LETTER A CIRCUMFLEX;;;00E2;
|
|
00C3;LATIN CAPITAL LETTER A WITH TILDE;Lu;0;L;0041 0303;;;;N;LATIN CAPITAL LETTER A TILDE;;;00E3;
|
|
00C4;LATIN CAPITAL LETTER A WITH DIAERESIS;Lu;0;L;0041 0308;;;;N;LATIN CAPITAL LETTER A DIAERESIS;;;00E4;
|
|
|
|
# A legacy rune range.
|
|
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
|
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
|
`
|
|
|
|
// Excerpt from Scripts.txt
|
|
const scripts = `
|
|
# Property: Script
|
|
# ================================================
|
|
|
|
0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
|
|
0020 ; Common # Zs SPACE
|
|
0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
|
|
0024 ; Common # Sc DOLLAR SIGN
|
|
`
|