Fix non-ASCII search on database (#18437)

Use `ToASCIIUpper` for SQLite database on issues search, this because `UPPER(x)` on SQLite only transforms ASCII letters. Resolves #18429
This commit is contained in:
Gusted 2022-02-01 13:59:25 +01:00 committed by GitHub
parent 7f2530e004
commit bb5f859ec0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 1 deletions

View file

@ -23,6 +23,7 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/references"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
@ -1862,7 +1863,12 @@ func GetRepoIssueStats(repoID, uid int64, filterMode int, isPull bool) (numOpen,
func SearchIssueIDsByKeyword(ctx context.Context, kw string, repoIDs []int64, limit, start int) (int64, []int64, error) {
repoCond := builder.In("repo_id", repoIDs)
subQuery := builder.Select("id").From("issue").Where(repoCond)
kw = strings.ToUpper(kw)
// SQLite's UPPER function only transforms ASCII letters.
if setting.Database.UseSQLite3 {
kw = util.ToUpperASCII(kw)
} else {
kw = strings.ToUpper(kw)
}
cond := builder.And(
repoCond,
builder.Or(

View file

@ -170,3 +170,14 @@ func CryptoRandomBytes(length int64) ([]byte, error) {
_, err := rand.Read(buf)
return buf, err
}
// ToUpperASCII returns s with all ASCII letters mapped to their upper case.
func ToUpperASCII(s string) string {
b := []byte(s)
for i, c := range b {
if 'a' <= c && c <= 'z' {
b[i] -= 'a' - 'A'
}
}
return string(b)
}

View file

@ -186,3 +186,37 @@ func Test_OptionalBool(t *testing.T) {
assert.Equal(t, OptionalBoolTrue, OptionalBoolParse("t"))
assert.Equal(t, OptionalBoolTrue, OptionalBoolParse("True"))
}
// Test case for any function which accepts and returns a single string.
type StringTest struct {
in, out string
}
var upperTests = []StringTest{
{"", ""},
{"ONLYUPPER", "ONLYUPPER"},
{"abc", "ABC"},
{"AbC123", "ABC123"},
{"azAZ09_", "AZAZ09_"},
{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u0250STRING\u0250WITH\u0250NONASCII\u2C6FCHARS"},
{"\u0250\u0250\u0250\u0250\u0250", "\u0250\u0250\u0250\u0250\u0250"},
{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"},
{"lél", "LéL"},
}
func TestToUpperASCII(t *testing.T) {
for _, tc := range upperTests {
assert.Equal(t, ToUpperASCII(tc.in), tc.out)
}
}
func BenchmarkToUpper(b *testing.B) {
for _, tc := range upperTests {
b.Run(tc.in, func(b *testing.B) {
for i := 0; i < b.N; i++ {
ToUpperASCII(tc.in)
}
})
}
}