390 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			390 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Go
		
	
	
	
package snowballstem
 | 
						|
 | 
						|
import (
 | 
						|
	"log"
 | 
						|
	"strings"
 | 
						|
	"unicode/utf8"
 | 
						|
)
 | 
						|
 | 
						|
// Env represents the Snowball execution environment
 | 
						|
type Env struct {
 | 
						|
	current       string
 | 
						|
	Cursor        int
 | 
						|
	Limit         int
 | 
						|
	LimitBackward int
 | 
						|
	Bra           int
 | 
						|
	Ket           int
 | 
						|
}
 | 
						|
 | 
						|
// NewEnv creates a new Snowball execution environment on the provided string
 | 
						|
func NewEnv(val string) *Env {
 | 
						|
	return &Env{
 | 
						|
		current:       val,
 | 
						|
		Cursor:        0,
 | 
						|
		Limit:         len(val),
 | 
						|
		LimitBackward: 0,
 | 
						|
		Bra:           0,
 | 
						|
		Ket:           len(val),
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) Current() string {
 | 
						|
	return env.current
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) SetCurrent(s string) {
 | 
						|
	env.current = s
 | 
						|
	env.Cursor = 0
 | 
						|
	env.Limit = len(s)
 | 
						|
	env.LimitBackward = 0
 | 
						|
	env.Bra = 0
 | 
						|
	env.Ket = len(s)
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) ReplaceS(bra, ket int, s string) int32 {
 | 
						|
	adjustment := int32(len(s)) - (int32(ket) - int32(bra))
 | 
						|
	result, _ := splitAt(env.current, bra)
 | 
						|
	rsplit := ket
 | 
						|
	if ket < bra {
 | 
						|
		rsplit = bra
 | 
						|
	}
 | 
						|
	_, rhs := splitAt(env.current, rsplit)
 | 
						|
	result += s
 | 
						|
	result += rhs
 | 
						|
 | 
						|
	newLim := int32(env.Limit) + adjustment
 | 
						|
	env.Limit = int(newLim)
 | 
						|
 | 
						|
	if env.Cursor >= ket {
 | 
						|
		newCur := int32(env.Cursor) + adjustment
 | 
						|
		env.Cursor = int(newCur)
 | 
						|
	} else if env.Cursor > bra {
 | 
						|
		env.Cursor = bra
 | 
						|
	}
 | 
						|
 | 
						|
	env.current = result
 | 
						|
	return adjustment
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) EqS(s string) bool {
 | 
						|
	if env.Cursor >= env.Limit {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	if strings.HasPrefix(env.current[env.Cursor:], s) {
 | 
						|
		env.Cursor += len(s)
 | 
						|
		for !onCharBoundary(env.current, env.Cursor) {
 | 
						|
			env.Cursor++
 | 
						|
		}
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) EqSB(s string) bool {
 | 
						|
	if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
 | 
						|
		return false
 | 
						|
	} else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
 | 
						|
		!strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
 | 
						|
		return false
 | 
						|
	} else {
 | 
						|
		env.Cursor -= len(s)
 | 
						|
		return true
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) SliceFrom(s string) bool {
 | 
						|
	bra, ket := env.Bra, env.Ket
 | 
						|
	env.ReplaceS(bra, ket, s)
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) NextChar() {
 | 
						|
	env.Cursor++
 | 
						|
	for !onCharBoundary(env.current, env.Cursor) {
 | 
						|
		env.Cursor++
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) PrevChar() {
 | 
						|
	env.Cursor--
 | 
						|
	for !onCharBoundary(env.current, env.Cursor) {
 | 
						|
		env.Cursor--
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) ByteIndexForHop(delta int32) int32 {
 | 
						|
	if delta > 0 {
 | 
						|
		res := env.Cursor
 | 
						|
		for delta > 0 {
 | 
						|
			res++
 | 
						|
			delta--
 | 
						|
			for res <= len(env.current) && !onCharBoundary(env.current, res) {
 | 
						|
				res++
 | 
						|
			}
 | 
						|
		}
 | 
						|
		return int32(res)
 | 
						|
	} else if delta < 0 {
 | 
						|
		res := env.Cursor
 | 
						|
		for delta < 0 {
 | 
						|
			res--
 | 
						|
			delta++
 | 
						|
			for res >= 0 && !onCharBoundary(env.current, res) {
 | 
						|
				res--
 | 
						|
			}
 | 
						|
		}
 | 
						|
		return int32(res)
 | 
						|
	} else {
 | 
						|
		return int32(env.Cursor)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) InGrouping(chars []byte, min, max int32) bool {
 | 
						|
	if env.Cursor >= env.Limit {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
 | 
						|
	if r != utf8.RuneError {
 | 
						|
		if r > max || r < min {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		r -= min
 | 
						|
		if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		env.NextChar()
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
 | 
						|
	if env.Cursor <= env.LimitBackward {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	env.PrevChar()
 | 
						|
	r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
 | 
						|
	if r != utf8.RuneError {
 | 
						|
		env.NextChar()
 | 
						|
		if r > max || r < min {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		r -= min
 | 
						|
		if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		env.PrevChar()
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
 | 
						|
	if env.Cursor >= env.Limit {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
 | 
						|
	if r != utf8.RuneError {
 | 
						|
		if r > max || r < min {
 | 
						|
			env.NextChar()
 | 
						|
			return true
 | 
						|
		}
 | 
						|
		r -= min
 | 
						|
		if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
 | 
						|
			env.NextChar()
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
 | 
						|
	if env.Cursor <= env.LimitBackward {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	env.PrevChar()
 | 
						|
	r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
 | 
						|
	if r != utf8.RuneError {
 | 
						|
		env.NextChar()
 | 
						|
		if r > max || r < min {
 | 
						|
			env.PrevChar()
 | 
						|
			return true
 | 
						|
		}
 | 
						|
		r -= min
 | 
						|
		if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
 | 
						|
			env.PrevChar()
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) SliceDel() bool {
 | 
						|
	return env.SliceFrom("")
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) Insert(bra, ket int, s string) {
 | 
						|
	adjustment := env.ReplaceS(bra, ket, s)
 | 
						|
	if bra <= env.Bra {
 | 
						|
		env.Bra = int(int32(env.Bra) + adjustment)
 | 
						|
	}
 | 
						|
	if bra <= env.Ket {
 | 
						|
		env.Ket = int(int32(env.Ket) + adjustment)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) SliceTo() string {
 | 
						|
	return env.current[env.Bra:env.Ket]
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
 | 
						|
	var i int32
 | 
						|
	j := int32(len(amongs))
 | 
						|
 | 
						|
	c := env.Cursor
 | 
						|
	l := env.Limit
 | 
						|
 | 
						|
	var commonI, commonJ int
 | 
						|
 | 
						|
	firstKeyInspected := false
 | 
						|
	for {
 | 
						|
		k := i + ((j - i) >> 1)
 | 
						|
		var diff int32
 | 
						|
		common := min(commonI, commonJ)
 | 
						|
		w := amongs[k]
 | 
						|
		for lvar := common; lvar < len(w.Str); lvar++ {
 | 
						|
			if c+common == l {
 | 
						|
				diff--
 | 
						|
				break
 | 
						|
			}
 | 
						|
			diff = int32(env.current[c+common]) - int32(w.Str[lvar])
 | 
						|
			if diff != 0 {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			common++
 | 
						|
		}
 | 
						|
		if diff < 0 {
 | 
						|
			j = k
 | 
						|
			commonJ = common
 | 
						|
		} else {
 | 
						|
			i = k
 | 
						|
			commonI = common
 | 
						|
		}
 | 
						|
		if j-i <= 1 {
 | 
						|
			if i > 0 {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			if j == i {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			if firstKeyInspected {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			firstKeyInspected = true
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	for {
 | 
						|
		w := amongs[i]
 | 
						|
		if commonI >= len(w.Str) {
 | 
						|
			env.Cursor = c + len(w.Str)
 | 
						|
			if w.F != nil {
 | 
						|
				res := w.F(env, ctx)
 | 
						|
				env.Cursor = c + len(w.Str)
 | 
						|
				if res {
 | 
						|
					return w.B
 | 
						|
				}
 | 
						|
			} else {
 | 
						|
				return w.B
 | 
						|
			}
 | 
						|
		}
 | 
						|
		i = w.A
 | 
						|
		if i < 0 {
 | 
						|
			return 0
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
 | 
						|
	var i int32
 | 
						|
	j := int32(len(amongs))
 | 
						|
 | 
						|
	c := env.Cursor
 | 
						|
	lb := env.LimitBackward
 | 
						|
 | 
						|
	var commonI, commonJ int
 | 
						|
 | 
						|
	firstKeyInspected := false
 | 
						|
 | 
						|
	for {
 | 
						|
		k := i + ((j - i) >> 1)
 | 
						|
		diff := int32(0)
 | 
						|
		common := min(commonI, commonJ)
 | 
						|
		w := amongs[k]
 | 
						|
		for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
 | 
						|
			if c-common == lb {
 | 
						|
				diff--
 | 
						|
				break
 | 
						|
			}
 | 
						|
			diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
 | 
						|
			if diff != 0 {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			// Count up commons. But not one character but the byte width of that char
 | 
						|
			common++
 | 
						|
		}
 | 
						|
		if diff < 0 {
 | 
						|
			j = k
 | 
						|
			commonJ = common
 | 
						|
		} else {
 | 
						|
			i = k
 | 
						|
			commonI = common
 | 
						|
		}
 | 
						|
		if j-i <= 1 {
 | 
						|
			if i > 0 {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			if j == i {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			if firstKeyInspected {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			firstKeyInspected = true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for {
 | 
						|
		w := amongs[i]
 | 
						|
		if commonI >= len(w.Str) {
 | 
						|
			env.Cursor = c - len(w.Str)
 | 
						|
			if w.F != nil {
 | 
						|
				res := w.F(env, ctx)
 | 
						|
				env.Cursor = c - len(w.Str)
 | 
						|
				if res {
 | 
						|
					return w.B
 | 
						|
				}
 | 
						|
			} else {
 | 
						|
				return w.B
 | 
						|
			}
 | 
						|
		}
 | 
						|
		i = w.A
 | 
						|
		if i < 0 {
 | 
						|
			return 0
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) Debug(count, lineNumber int) {
 | 
						|
	log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) Clone() *Env {
 | 
						|
	clone := *env
 | 
						|
	return &clone
 | 
						|
}
 | 
						|
 | 
						|
func (env *Env) AssignTo() string {
 | 
						|
	return env.Current()
 | 
						|
}
 |