Don't treat BOM escape sequence as hidden character. (#18909)
* Don't treat BOM escape sequence as hidden character. - BOM sequence is a common non-harmfull escape sequence, it shouldn't be shown as hidden character. - Follows GitHub's behavior. - Resolves #18837 Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
		
							parent
							
								
									329b959160
								
							
						
					
					
						commit
						bf2867dec2
					
				|  | @ -63,6 +63,7 @@ func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { | ||||||
| func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { | func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { | ||||||
| 	buf := make([]byte, 4096) | 	buf := make([]byte, 4096) | ||||||
| 	readStart := 0 | 	readStart := 0 | ||||||
|  | 	runeCount := 0 | ||||||
| 	var n int | 	var n int | ||||||
| 	var writePos int | 	var writePos int | ||||||
| 
 | 
 | ||||||
|  | @ -79,6 +80,8 @@ readingloop: | ||||||
| 
 | 
 | ||||||
| 		for i < len(bs) { | 		for i < len(bs) { | ||||||
| 			r, size := utf8.DecodeRune(bs[i:]) | 			r, size := utf8.DecodeRune(bs[i:]) | ||||||
|  | 			runeCount++ | ||||||
|  | 
 | ||||||
| 			// Now handle the codepoints
 | 			// Now handle the codepoints
 | ||||||
| 			switch { | 			switch { | ||||||
| 			case r == utf8.RuneError: | 			case r == utf8.RuneError: | ||||||
|  | @ -113,6 +116,8 @@ readingloop: | ||||||
| 				lineHasRTLScript = false | 				lineHasRTLScript = false | ||||||
| 				lineHasLTRScript = false | 				lineHasLTRScript = false | ||||||
| 
 | 
 | ||||||
|  | 			case runeCount == 1 && r == 0xFEFF: // UTF BOM
 | ||||||
|  | 				// the first BOM is safe
 | ||||||
| 			case r == '\r' || r == '\t' || r == ' ': | 			case r == '\r' || r == '\t' || r == ' ': | ||||||
| 				// These are acceptable control characters and space characters
 | 				// These are acceptable control characters and space characters
 | ||||||
| 			case unicode.IsSpace(r): | 			case unicode.IsSpace(r): | ||||||
|  |  | ||||||
|  | @ -129,6 +129,14 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, | ||||||
| 			"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n", | 			"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n", | ||||||
| 		status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, | 		status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, | ||||||
| 	}, | 	}, | ||||||
|  | 	{ | ||||||
|  | 		// UTF-8/16/32 all use the same codepoint for BOM
 | ||||||
|  | 		// Gitea could read UTF-16/32 content and convert into UTF-8 internally then render it, so we only process UTF-8 internally
 | ||||||
|  | 		name:   "UTF BOM", | ||||||
|  | 		text:   "\xef\xbb\xbftest", | ||||||
|  | 		result: "\xef\xbb\xbftest", | ||||||
|  | 		status: EscapeStatus{HasLTRScript: true}, | ||||||
|  | 	}, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func TestEscapeControlString(t *testing.T) { | func TestEscapeControlString(t *testing.T) { | ||||||
|  | @ -163,10 +171,18 @@ func TestEscapeControlReader(t *testing.T) { | ||||||
| 	// lets add some control characters to the tests
 | 	// lets add some control characters to the tests
 | ||||||
| 	tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) | 	tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) | ||||||
| 	copy(tests, escapeControlTests) | 	copy(tests, escapeControlTests) | ||||||
|  | 
 | ||||||
|  | 	// if there is a BOM, we should keep the BOM
 | ||||||
|  | 	addPrefix := func(prefix, s string) string { | ||||||
|  | 		if strings.HasPrefix(s, "\xef\xbb\xbf") { | ||||||
|  | 			return s[:3] + prefix + s[3:] | ||||||
|  | 		} | ||||||
|  | 		return prefix + s | ||||||
|  | 	} | ||||||
| 	for _, test := range escapeControlTests { | 	for _, test := range escapeControlTests { | ||||||
| 		test.name += " (+Control)" | 		test.name += " (+Control)" | ||||||
| 		test.text = "\u001E" + test.text | 		test.text = addPrefix("\u001E", test.text) | ||||||
| 		test.result = `<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">` + "\u001e" + `</span></span>` + test.result | 		test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result) | ||||||
| 		test.status.Escaped = true | 		test.status.Escaped = true | ||||||
| 		test.status.HasControls = true | 		test.status.HasControls = true | ||||||
| 		tests = append(tests, test) | 		tests = append(tests, test) | ||||||
|  | @ -174,8 +190,8 @@ func TestEscapeControlReader(t *testing.T) { | ||||||
| 
 | 
 | ||||||
| 	for _, test := range escapeControlTests { | 	for _, test := range escapeControlTests { | ||||||
| 		test.name += " (+Mark)" | 		test.name += " (+Mark)" | ||||||
| 		test.text = "\u0300" + test.text | 		test.text = addPrefix("\u0300", test.text) | ||||||
| 		test.result = `<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">` + "\u0300" + `</span></span>` + test.result | 		test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">`+"\u0300"+`</span></span>`, test.result) | ||||||
| 		test.status.Escaped = true | 		test.status.Escaped = true | ||||||
| 		test.status.HasMarks = true | 		test.status.HasMarks = true | ||||||
| 		tests = append(tests, test) | 		tests = append(tests, test) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue