Documentation
¶
Overview ¶
Example ¶
package main
import (
"fmt"
"github.com/gnames/gner/ent/token"
)
func main() {
text := "one\vtwo Poma- \t\r\n tomus " +
"dash -\nstandalone " +
"Tora-\nBora\n\rthree 1778,\n"
res := token.Tokenize([]rune(text), func(t token.TokenNER) token.TokenNER { return t })
fmt.Println(res[0].Cleaned())
fmt.Println(res[2].Cleaned())
}
Output: one Pomatomus
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type TokenNER ¶ added in v0.1.1
type TokenNER interface {
// Raw is a verbatim presentation of a token as it appears in a text.
Raw() []rune
// Start is the index of the first rune of a token. The first rune
// does not have to be alpha-numeric.
Start() int
// End is the index of the last rune of a token. The last rune does not
// have to be alpha-numeric.
End() int
// Line line number in the text
Line() int
// SetLine sets the line number
SetLine(int)
// Cleaned is a presentation of a token after normalization.
Cleaned() string
// SetCleaned substitues existing cleaned text with a new one.
SetCleaned(string)
// ProcessToken computes a clean version of a name as well as properties
// of the token.
ProcessToken()
}
TokenNER represents a word separated by spaces in a text. Words split by new lines are concatenated.
Click to show internal directories.
Click to hide internal directories.