Documentation
¶
Overview ¶
Package bamboo implements text processing for Vietnamese
Index ¶
- Constants
- Variables
- func AddMarkToChar(chr rune, mark uint8) rune
- func AddMarkToTonelessChar(chr rune, mark uint8) rune
- func AddToneToChar(chr rune, tone uint8) rune
- func Encode(charsetName string, input string) string
- func FindMarkPosition(chr rune) int
- func FindVowelPosition(chr rune) int
- func Flatten(composition []*Transformation, mode Mode) string
- func GetCharsetNames() []string
- func GetInputMethodDefinitions() map[string]InputMethodDefinition
- func HasAnyVietnameseRune(word string) bool
- func HasAnyVietnameseVowel(word string) bool
- func IsAlpha(c rune) bool
- func IsPunctuationMark(key rune) bool
- func IsSpace(key rune) bool
- func IsVietnameseRune(lowerKey rune) bool
- func IsVowel(chr rune) bool
- func IsWordBreakSymbol(key rune) bool
- type BambooEngine
- func (e *BambooEngine) CanProcessKey(key rune) bool
- func (e *BambooEngine) GetFlag() uint
- func (e *BambooEngine) GetInputMethod() InputMethod
- func (e *BambooEngine) GetProcessedString(mode Mode) string
- func (e *BambooEngine) IsValid(inputIsFullComplete bool) bool
- func (e *BambooEngine) ProcessKey(key rune, mode Mode)
- func (e *BambooEngine) ProcessString(str string, mode Mode)
- func (e *BambooEngine) RemoveLastChar(refreshLastToneTarget bool)
- func (e *BambooEngine) Reset()
- func (e *BambooEngine) RestoreLastWord(toVietnamese bool)
- func (e *BambooEngine) SetFlag(flag uint)
- type EffectType
- type IEngine
- type InputMethod
- type InputMethodDefinition
- type Mark
- type Mode
- type Rule
- type Tone
- type Transformation
Constants ¶
const ( EfreeToneMarking uint = 1 << iota EstdToneStyle EautoCorrectEnabled EstdFlags = EfreeToneMarking | EstdToneStyle | EautoCorrectEnabled )
const UNICODE = "Unicode"
Variables ¶
var InputMethodDefinitions = map[string]InputMethodDefinition{
"Telex": {
"z": "XoaDauThanh",
"s": "DauSac",
"f": "DauHuyen",
"r": "DauHoi",
"x": "DauNga",
"j": "DauNang",
"a": "A_Â",
"e": "E_Ê",
"o": "O_Ô",
"w": "UOA_ƯƠĂ",
"d": "D_Đ",
},
"VNI": {
"0": "XoaDauThanh",
"1": "DauSac",
"2": "DauHuyen",
"3": "DauHoi",
"4": "DauNga",
"5": "DauNang",
"6": "AEO_ÂÊÔ",
"7": "UO_ƯƠ",
"8": "A_Ă",
"9": "D_Đ",
},
"VIQR": {
"0": "XoaDauThanh",
"'": "DauSac",
"`": "DauHuyen",
"?": "DauHoi",
"~": "DauNga",
".": "DauNang",
"^": "AEO_ÂÊÔ",
"+": "UO_ƯƠ",
"*": "UO_ƯƠ",
"(": "A_Ă",
"d": "D_Đ",
},
"Microsoft layout": {
"8": "DauSac",
"5": "DauHuyen",
"6": "DauHoi",
"7": "DauNga",
"9": "DauNang",
"1": "__ă",
"!": "_Ă",
"2": "__â",
"@": "_Â",
"3": "__ê",
"#": "_Ê",
"4": "__ô",
"$": "_Ô",
"0": "__đ",
")": "_Đ",
"[": "__ư",
"{": "_Ư",
"]": "__ơ",
"}": "_Ơ",
},
"Telex 2": {
"z": "XoaDauThanh",
"s": "DauSac",
"f": "DauHuyen",
"r": "DauHoi",
"x": "DauNga",
"j": "DauNang",
"a": "A_Â",
"e": "E_Ê",
"o": "O_Ô",
"w": "UOA_ƯƠĂ__Ư",
"d": "D_Đ",
"]": "__ư",
"[": "__ơ",
"}": "_Ư",
"{": "_Ơ",
},
"Telex + VNI": {
"z": "XoaDauThanh",
"s": "DauSac",
"f": "DauHuyen",
"r": "DauHoi",
"x": "DauNga",
"j": "DauNang",
"a": "A_Â",
"e": "E_Ê",
"o": "O_Ô",
"w": "UOA_ƯƠĂ",
"d": "D_Đ",
"0": "XoaDauThanh",
"1": "DauSac",
"2": "DauHuyen",
"3": "DauHoi",
"4": "DauNga",
"5": "DauNang",
"6": "AEO_ÂÊÔ",
"7": "UO_ƯƠ",
"8": "A_Ă",
"9": "D_Đ",
},
"Telex + VNI + VIQR": {
"z": "XoaDauThanh",
"s": "DauSac",
"f": "DauHuyen",
"r": "DauHoi",
"x": "DauNga",
"j": "DauNang",
"a": "A_Â",
"e": "E_Ê",
"o": "O_Ô",
"w": "UOA_ƯƠĂ",
"d": "D_Đ",
"0": "XoaDauThanh",
"1": "DauSac",
"2": "DauHuyen",
"3": "DauHoi",
"4": "DauNga",
"5": "DauNang",
"6": "AEO_ÂÊÔ",
"7": "UO_ƯƠ",
"8": "A_Ă",
"9": "D_Đ",
"'": "DauSac",
"`": "DauHuyen",
"?": "DauHoi",
"~": "DauNga",
".": "DauNang",
"^": "AEO_ÂÊÔ",
"+": "UO_ƯƠ",
"*": "UO_ƯƠ",
"(": "A_Ă",
"\\": "D_Đ",
},
"VNI Bàn phím tiếng Pháp": {
"&": "XoaDauThanh",
"é": "DauSac",
"\"": "DauHuyen",
"'": "DauHoi",
"(": "DauNga",
"-": "DauNang",
"è": "AEO_ÂÊÔ",
"_": "UO_ƯƠ",
"ç": "A_Ă",
"à": "D_Đ",
},
"Telex W": {
"z": "XoaDauThanh",
"s": "DauSac",
"f": "DauHuyen",
"r": "DauHoi",
"x": "DauNga",
"j": "DauNang",
"a": "A_Â",
"e": "E_Ê",
"o": "O_Ô",
"w": "UOA_ƯƠĂ__Ư",
"d": "D_Đ",
},
}
var PunctuationMarks = []rune{
',', ';', ':', '.', '"', '\'', '!', '?', ' ',
'<', '>', '=', '+', '-', '*', '/', '\\',
'_', '~', '`', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']',
'|',
}
var Vowels = []rune("aàáảãạăằắẳẵặâầấẩẫậeèéẻẽẹêềếểễệiìíỉĩịoòóỏõọôồốổỗộơờớởỡợuùúủũụưừứửữựyỳýỷỹỵ")
Functions ¶
func AddMarkToChar ¶
func AddMarkToTonelessChar ¶
func AddToneToChar ¶
func FindMarkPosition ¶
func FindVowelPosition ¶
func Flatten ¶
func Flatten(composition []*Transformation, mode Mode) string
func GetCharsetNames ¶
func GetCharsetNames() []string
func GetInputMethodDefinitions ¶
func GetInputMethodDefinitions() map[string]InputMethodDefinition
func HasAnyVietnameseRune ¶
func HasAnyVietnameseVowel ¶
func IsPunctuationMark ¶
func IsVietnameseRune ¶
func IsWordBreakSymbol ¶
Types ¶
type BambooEngine ¶
type BambooEngine struct {
// contains filtered or unexported fields
}
BambooEngine implements the IEngine interface
func (*BambooEngine) CanProcessKey ¶
func (e *BambooEngine) CanProcessKey(key rune) bool
CanProcessKey checks if the engine can process the given key
func (*BambooEngine) GetFlag ¶
func (e *BambooEngine) GetFlag() uint
GetFlag returns the current engine flags
func (*BambooEngine) GetInputMethod ¶
func (e *BambooEngine) GetInputMethod() InputMethod
GetInputMethod returns the current input method
func (*BambooEngine) GetProcessedString ¶
func (e *BambooEngine) GetProcessedString(mode Mode) string
GetProcessedString returns the processed text based on the specified mode
func (*BambooEngine) IsValid ¶
func (e *BambooEngine) IsValid(inputIsFullComplete bool) bool
IsValid checks if the current composition is valid
func (*BambooEngine) ProcessKey ¶
func (e *BambooEngine) ProcessKey(key rune, mode Mode)
ProcessKey processes a single key input
func (*BambooEngine) ProcessString ¶
func (e *BambooEngine) ProcessString(str string, mode Mode)
ProcessString processes a string of characters
func (*BambooEngine) RemoveLastChar ¶
func (e *BambooEngine) RemoveLastChar(refreshLastToneTarget bool)
RemoveLastChar removes the last character from the composition Find the last APPENDING transformation and all the transformations that add effects to it.
func (*BambooEngine) RestoreLastWord ¶
func (e *BambooEngine) RestoreLastWord(toVietnamese bool)
RestoreLastWord restores the last word to its original or Vietnamese form
func (*BambooEngine) SetFlag ¶
func (e *BambooEngine) SetFlag(flag uint)
SetFlag sets the engine flags
type EffectType ¶
type EffectType int
const ( Appending EffectType = iota MarkTransformation ToneTransformation Replacing )
type IEngine ¶
type IEngine interface {
SetFlag(uint)
GetFlag() uint
GetInputMethod() InputMethod
ProcessKey(rune, Mode)
ProcessString(string, Mode)
GetProcessedString(Mode) string
IsValid(bool) bool
CanProcessKey(rune) bool
RemoveLastChar(bool)
RestoreLastWord(bool)
Reset()
}
IEngine defines the interface for Vietnamese text processing engines
func NewEngine ¶
func NewEngine(inputMethod InputMethod, flag uint) IEngine
NewEngine creates a new instance of the Bamboo engine
type InputMethod ¶
type InputMethod struct {
Name string
Rules []Rule
SuperKeys []rune
ToneKeys []rune
AppendingKeys []rune
Keys []rune
}
InputMethod represents a Vietnamese input method configuration
func ParseInputMethod ¶
func ParseInputMethod(imDef map[string]InputMethodDefinition, imName string) InputMethod
ParseInputMethod returns an InputMethod by name from a definition map
type InputMethodDefinition ¶
type Rule ¶
type Rule struct {
Key rune
Effect uint8 // (Tone, Mark)
EffectType EffectType
EffectOn rune
Result rune
AppendedRules []Rule
}
Rule represents a transformation rule for Vietnamese input
func ParseRules ¶
ParseRules converts a definition string into a slice of rules
func ParseToneLessRule ¶
func ParseTonelessRules ¶
type Transformation ¶
type Transformation struct {
Rule Rule
Target *Transformation
IsUpperCase bool
}
Transformation represents a single transformation operation