bamboo

package module
v0.0.0-...-50b3136 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 12, 2025 License: MIT Imports: 5 Imported by: 0

README

Text processing library for Vietnamese

License

The MIT License (MIT) Copyright (C) 2018 Luong Thanh Lam

Documentation

Overview

Package bamboo implements text processing for Vietnamese

Index

Constants

View Source
const (
	EfreeToneMarking uint = 1 << iota
	EstdToneStyle
	EautoCorrectEnabled
	EstdFlags = EfreeToneMarking | EstdToneStyle | EautoCorrectEnabled
)
View Source
const UNICODE = "Unicode"

Variables

View Source
var InputMethodDefinitions = map[string]InputMethodDefinition{
	"Telex": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ",
		"d": "D_Đ",
	},
	"VNI": {
		"0": "XoaDauThanh",
		"1": "DauSac",
		"2": "DauHuyen",
		"3": "DauHoi",
		"4": "DauNga",
		"5": "DauNang",
		"6": "AEO_ÂÊÔ",
		"7": "UO_ƯƠ",
		"8": "A_Ă",
		"9": "D_Đ",
	},
	"VIQR": {
		"0": "XoaDauThanh",
		"'": "DauSac",
		"`": "DauHuyen",
		"?": "DauHoi",
		"~": "DauNga",
		".": "DauNang",
		"^": "AEO_ÂÊÔ",
		"+": "UO_ƯƠ",
		"*": "UO_ƯƠ",
		"(": "A_Ă",
		"d": "D_Đ",
	},
	"Microsoft layout": {
		"8": "DauSac",
		"5": "DauHuyen",
		"6": "DauHoi",
		"7": "DauNga",
		"9": "DauNang",
		"1": "__ă",
		"!": "_Ă",
		"2": "__â",
		"@": "_Â",
		"3": "__ê",
		"#": "_Ê",
		"4": "__ô",
		"$": "_Ô",
		"0": "__đ",
		")": "_Đ",
		"[": "__ư",
		"{": "_Ư",
		"]": "__ơ",
		"}": "_Ơ",
	},
	"Telex 2": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ__Ư",
		"d": "D_Đ",
		"]": "__ư",
		"[": "__ơ",
		"}": "_Ư",
		"{": "_Ơ",
	},
	"Telex + VNI": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ",
		"d": "D_Đ",
		"0": "XoaDauThanh",
		"1": "DauSac",
		"2": "DauHuyen",
		"3": "DauHoi",
		"4": "DauNga",
		"5": "DauNang",
		"6": "AEO_ÂÊÔ",
		"7": "UO_ƯƠ",
		"8": "A_Ă",
		"9": "D_Đ",
	},
	"Telex + VNI + VIQR": {
		"z":  "XoaDauThanh",
		"s":  "DauSac",
		"f":  "DauHuyen",
		"r":  "DauHoi",
		"x":  "DauNga",
		"j":  "DauNang",
		"a":  "A_Â",
		"e":  "E_Ê",
		"o":  "O_Ô",
		"w":  "UOA_ƯƠĂ",
		"d":  "D_Đ",
		"0":  "XoaDauThanh",
		"1":  "DauSac",
		"2":  "DauHuyen",
		"3":  "DauHoi",
		"4":  "DauNga",
		"5":  "DauNang",
		"6":  "AEO_ÂÊÔ",
		"7":  "UO_ƯƠ",
		"8":  "A_Ă",
		"9":  "D_Đ",
		"'":  "DauSac",
		"`":  "DauHuyen",
		"?":  "DauHoi",
		"~":  "DauNga",
		".":  "DauNang",
		"^":  "AEO_ÂÊÔ",
		"+":  "UO_ƯƠ",
		"*":  "UO_ƯƠ",
		"(":  "A_Ă",
		"\\": "D_Đ",
	},
	"VNI Bàn phím tiếng Pháp": {
		"&":  "XoaDauThanh",
		"é":  "DauSac",
		"\"": "DauHuyen",
		"'":  "DauHoi",
		"(":  "DauNga",
		"-":  "DauNang",
		"è":  "AEO_ÂÊÔ",
		"_":  "UO_ƯƠ",
		"ç":  "A_Ă",
		"à":  "D_Đ",
	},
	"Telex W": {
		"z": "XoaDauThanh",
		"s": "DauSac",
		"f": "DauHuyen",
		"r": "DauHoi",
		"x": "DauNga",
		"j": "DauNang",
		"a": "A_Â",
		"e": "E_Ê",
		"o": "O_Ô",
		"w": "UOA_ƯƠĂ__Ư",
		"d": "D_Đ",
	},
}
View Source
var PunctuationMarks = []rune{
	',', ';', ':', '.', '"', '\'', '!', '?', ' ',
	'<', '>', '=', '+', '-', '*', '/', '\\',
	'_', '~', '`', '@', '#', '$', '%', '^', '&', '(', ')', '{', '}', '[', ']',
	'|',
}
View Source
var Vowels = []rune("aàáảãạăằắẳẵặâầấẩẫậeèéẻẽẹêềếểễệiìíỉĩịoòóỏõọôồốổỗộơờớởỡợuùúủũụưừứửữựyỳýỷỹỵ")

Functions

func AddMarkToChar

func AddMarkToChar(chr rune, mark uint8) rune

func AddMarkToTonelessChar

func AddMarkToTonelessChar(chr rune, mark uint8) rune

func AddToneToChar

func AddToneToChar(chr rune, tone uint8) rune

func Encode

func Encode(charsetName string, input string) string

func FindMarkPosition

func FindMarkPosition(chr rune) int

func FindVowelPosition

func FindVowelPosition(chr rune) int

func Flatten

func Flatten(composition []*Transformation, mode Mode) string

func GetCharsetNames

func GetCharsetNames() []string

func GetInputMethodDefinitions

func GetInputMethodDefinitions() map[string]InputMethodDefinition

func HasAnyVietnameseRune

func HasAnyVietnameseRune(word string) bool

func HasAnyVietnameseVowel

func HasAnyVietnameseVowel(word string) bool

func IsAlpha

func IsAlpha(c rune) bool

func IsPunctuationMark

func IsPunctuationMark(key rune) bool

func IsSpace

func IsSpace(key rune) bool

func IsVietnameseRune

func IsVietnameseRune(lowerKey rune) bool

func IsVowel

func IsVowel(chr rune) bool

func IsWordBreakSymbol

func IsWordBreakSymbol(key rune) bool

Types

type BambooEngine

type BambooEngine struct {
	// contains filtered or unexported fields
}

BambooEngine implements the IEngine interface

func (*BambooEngine) CanProcessKey

func (e *BambooEngine) CanProcessKey(key rune) bool

CanProcessKey checks if the engine can process the given key

func (*BambooEngine) GetFlag

func (e *BambooEngine) GetFlag() uint

GetFlag returns the current engine flags

func (*BambooEngine) GetInputMethod

func (e *BambooEngine) GetInputMethod() InputMethod

GetInputMethod returns the current input method

func (*BambooEngine) GetProcessedString

func (e *BambooEngine) GetProcessedString(mode Mode) string

GetProcessedString returns the processed text based on the specified mode

func (*BambooEngine) IsValid

func (e *BambooEngine) IsValid(inputIsFullComplete bool) bool

IsValid checks if the current composition is valid

func (*BambooEngine) ProcessKey

func (e *BambooEngine) ProcessKey(key rune, mode Mode)

ProcessKey processes a single key input

func (*BambooEngine) ProcessString

func (e *BambooEngine) ProcessString(str string, mode Mode)

ProcessString processes a string of characters

func (*BambooEngine) RemoveLastChar

func (e *BambooEngine) RemoveLastChar(refreshLastToneTarget bool)

RemoveLastChar removes the last character from the composition Find the last APPENDING transformation and all the transformations that add effects to it.

func (*BambooEngine) Reset

func (e *BambooEngine) Reset()

Reset clears the current composition

func (*BambooEngine) RestoreLastWord

func (e *BambooEngine) RestoreLastWord(toVietnamese bool)

RestoreLastWord restores the last word to its original or Vietnamese form

func (*BambooEngine) SetFlag

func (e *BambooEngine) SetFlag(flag uint)

SetFlag sets the engine flags

type EffectType

type EffectType int
const (
	Appending EffectType = iota
	MarkTransformation
	ToneTransformation
	Replacing
)

type IEngine

type IEngine interface {
	SetFlag(uint)
	GetFlag() uint
	GetInputMethod() InputMethod
	ProcessKey(rune, Mode)
	ProcessString(string, Mode)
	GetProcessedString(Mode) string
	IsValid(bool) bool
	CanProcessKey(rune) bool
	RemoveLastChar(bool)
	RestoreLastWord(bool)
	Reset()
}

IEngine defines the interface for Vietnamese text processing engines

func NewEngine

func NewEngine(inputMethod InputMethod, flag uint) IEngine

NewEngine creates a new instance of the Bamboo engine

type InputMethod

type InputMethod struct {
	Name          string
	Rules         []Rule
	SuperKeys     []rune
	ToneKeys      []rune
	AppendingKeys []rune
	Keys          []rune
}

InputMethod represents a Vietnamese input method configuration

func ParseInputMethod

func ParseInputMethod(imDef map[string]InputMethodDefinition, imName string) InputMethod

ParseInputMethod returns an InputMethod by name from a definition map

type InputMethodDefinition

type InputMethodDefinition map[string]string

type Mark

type Mark uint8

Mark represents Vietnamese diacritical marks

const (
	MarkNone Mark = iota
	MarkHat
	MarkBreve
	MarkHorn
	MarkDash
	MarkRaw
)

func FindMarkFromChar

func FindMarkFromChar(chr rune) (Mark, bool)

type Mode

type Mode uint

Mode represents the different operation modes for the Bamboo engine

const (
	VietnameseMode Mode = 1 << iota
	EnglishMode
	ToneLess
	MarkLess
	LowerCase
	FullText
	PunctuationMode
	InReverseOrder
)

type Rule

type Rule struct {
	Key           rune
	Effect        uint8 // (Tone, Mark)
	EffectType    EffectType
	EffectOn      rune
	Result        rune
	AppendedRules []Rule
}

Rule represents a transformation rule for Vietnamese input

func ParseRules

func ParseRules(key rune, line string) []Rule

ParseRules converts a definition string into a slice of rules

func ParseToneLessRule

func ParseToneLessRule(key, effectiveOn, result rune, effect Mark) []Rule

func ParseTonelessRules

func ParseTonelessRules(key rune, line string) []Rule

func (*Rule) GetMark

func (r *Rule) GetMark() Mark

func (*Rule) GetTone

func (r *Rule) GetTone() Tone

func (*Rule) SetMark

func (r *Rule) SetMark(mark Mark)

func (*Rule) SetTone

func (r *Rule) SetTone(tone Tone)

type Tone

type Tone uint8

Tone represents Vietnamese tone marks

const (
	ToneNone Tone = iota
	ToneGrave
	ToneAcute
	ToneHook
	ToneTilde
	ToneDot
)

func FindToneFromChar

func FindToneFromChar(chr rune) Tone

type Transformation

type Transformation struct {
	Rule        Rule
	Target      *Transformation
	IsUpperCase bool
}

Transformation represents a single transformation operation

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL