You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Go
		
	
			
		
		
	
	
			71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Go
		
	
| /*
 | |
|  * Copyright 2022 ByteDance Inc.
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *     http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| package utf8
 | |
| 
 | |
| import (
 | |
|     `github.com/bytedance/sonic/internal/rt`
 | |
|     `github.com/bytedance/sonic/internal/native/types`
 | |
|     `github.com/bytedance/sonic/internal/native`
 | |
| )
 | |
| 
 | |
| // CorrectWith corrects the invalid utf8 byte with repl string.
 | |
| func CorrectWith(dst []byte, src []byte, repl string) []byte {
 | |
|     sstr := rt.Mem2Str(src)
 | |
|     sidx := 0
 | |
| 
 | |
|     /* state machine records the invalid postions */
 | |
|     m := types.NewStateMachine()
 | |
|     m.Sp = 0 // invalid utf8 numbers
 | |
| 
 | |
|     for sidx < len(sstr) {
 | |
|         scur  := sidx
 | |
|         ecode := native.ValidateUTF8(&sstr, &sidx, m)
 | |
| 
 | |
|         if m.Sp != 0 {
 | |
|             if m.Sp > len(sstr) {
 | |
|                 panic("numbers of invalid utf8 exceed the string len!")
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         for i := 0; i < m.Sp; i++ {
 | |
|             ipos := m.Vt[i] // invalid utf8 position
 | |
|             dst  = append(dst, sstr[scur:ipos]...)
 | |
|             dst  = append(dst, repl...)
 | |
|             scur = m.Vt[i] + 1
 | |
|         }
 | |
|         /* append the remained valid utf8 bytes */
 | |
|         dst = append(dst, sstr[scur:sidx]...)
 | |
| 
 | |
|         /* not enough space, reset and continue */
 | |
|         if ecode != 0 {
 | |
|             m.Sp = 0
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     types.FreeStateMachine(m)
 | |
|     return dst
 | |
| }
 | |
| 
 | |
| // Validate is a simd-accelereated drop-in replacement for the standard library's utf8.Valid.
 | |
| func Validate(src []byte) bool {
 | |
|     return ValidateString(rt.Mem2Str(src))
 | |
| }
 | |
| 
 | |
| // ValidateString as Validate, but for string.
 | |
| func ValidateString(src string) bool {
 | |
|     return native.ValidateUTF8Fast(&src) == 0
 | |
| } |