From 477ef462510d69a8a31e008fb6e64059dc6cc148 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 21 Feb 2019 13:01:28 +0800 Subject: [PATCH] Add more tests and docs for issue indexer, add db indexer type for searching from database (#6144) * add more tests and docs for issue indexer, add db indexer type for searching from database * fix typo * fix typo * fix lint * improve docs --- custom/conf/app.ini.sample | 12 ++++- .../doc/advanced/config-cheat-sheet.en-us.md | 5 +++ .../doc/advanced/config-cheat-sheet.zh-cn.md | 14 ++++++ models/issue.go | 34 ++++++++++++++ models/issue_test.go | 25 +++++++++++ modules/indexer/issues/db.go | 45 +++++++++++++++++++ modules/indexer/issues/indexer.go | 11 ++++- modules/indexer/issues/indexer_test.go | 4 ++ modules/indexer/issues/queue.go | 16 ++++++- modules/indexer/issues/queue_channel.go | 8 +++- modules/indexer/issues/queue_disk.go | 10 ++--- modules/setting/indexer.go | 1 + 12 files changed, 174 insertions(+), 11 deletions(-) create mode 100644 modules/indexer/issues/db.go diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index 9b1712b02..9323e5a0d 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -253,9 +253,19 @@ DB_RETRIES = 10 ; Backoff time per DB retry (time.Duration) DB_RETRY_BACKOFF = 3s - [indexer] +; Issue indexer type, currently support: bleve or db, default is bleve +ISSUE_INDEXER_TYPE = bleve +; Issue indexer storage path, available when ISSUE_INDEXER_TYPE is bleve ISSUE_INDEXER_PATH = indexers/issues.bleve +; Issue indexer queue, currently support: channel or levelqueue, default is levelqueue +ISSUE_INDEXER_QUEUE_TYPE = levelqueue +; When ISSUE_INDEXER_QUEUE_TYPE is levelqueue, this will be the queue will be saved path, +; default is indexers/issues.queue +ISSUE_INDEXER_QUEUE_DIR = indexers/issues.queue +; Batch queue number, default is 20 +ISSUE_INDEXER_QUEUE_BATCH_NUMBER = 20 + ; repo indexer by default disabled, since it uses a lot of disk space REPO_INDEXER_ENABLED = false REPO_INDEXER_PATH = indexers/repos.bleve diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index b7708084e..a7e8f7673 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -154,7 +154,12 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. ## Indexer (`indexer`) +- `ISSUE_INDEXER_TYPE`: **bleve**: Issue indexer type, currently support: bleve or db, if it's db, below issue indexer item will be invalid. - `ISSUE_INDEXER_PATH`: **indexers/issues.bleve**: Index file used for issue search. +- `ISSUE_INDEXER_QUEUE_TYPE`: **levelqueue**: Issue indexer queue, currently support: channel or levelqueue +- `ISSUE_INDEXER_QUEUE_DIR`: **indexers/issues.queue**: When ISSUE_INDEXER_QUEUE_TYPE is levelqueue, this will be the queue will be saved path +- `ISSUE_INDEXER_QUEUE_BATCH_NUMBER`: **20**: Batch queue number + - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index 1181b8b41..b1b2f11f2 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -82,6 +82,20 @@ menu: - `PATH`: Tidb 或者 SQLite3 数据文件存放路径。 - `LOG_SQL`: **true**: 显示生成的SQL,默认为真。 + +## Indexer (`indexer`) + +- `ISSUE_INDEXER_TYPE`: **bleve**: 工单索引类型,当前支持 `bleve` 或 `db`,当为 `db` 时其它工单索引项可不用设置。 +- `ISSUE_INDEXER_PATH`: **indexers/issues.bleve**: 工单索引文件存放路径,当索引类型为 `bleve` 时有效。 +- `ISSUE_INDEXER_QUEUE_TYPE`: **levelqueue**: 工单索引队列类型,当前支持 `channel` 或 `levelqueue`。 +- `ISSUE_INDEXER_QUEUE_DIR`: **indexers/issues.queue**: 当 `ISSUE_INDEXER_QUEUE_TYPE` 为 `levelqueue` 时,保存索引队列的磁盘路径。 +- `ISSUE_INDEXER_QUEUE_BATCH_NUMBER`: **20**: 队列处理中批量提交数量。 + +- `REPO_INDEXER_ENABLED`: **false**: 是否启用代码搜索(启用后会占用比较大的磁盘空间)。 +- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: 用于代码搜索的索引文件路径。 +- `UPDATE_BUFFER_LEN`: **20**: 代码索引请求的缓冲区长度。 +- `MAX_FILE_SIZE`: **1048576**: 进行解析的源代码文件的最大长度,小于该值时才会索引。 + ## Security (`security`) - `INSTALL_LOCK`: 是否允许运行安装向导,(跟管理员账号有关,十分重要)。 diff --git a/models/issue.go b/models/issue.go index 8de26c2b1..503ff32b8 100644 --- a/models/issue.go +++ b/models/issue.go @@ -1684,6 +1684,40 @@ func GetRepoIssueStats(repoID, uid int64, filterMode int, isPull bool) (numOpen return openResult, closedResult } +// SearchIssueIDsByKeyword search issues on database +func SearchIssueIDsByKeyword(kw string, repoID int64, limit, start int) (int64, []int64, error) { + var repoCond = builder.Eq{"repo_id": repoID} + var subQuery = builder.Select("id").From("issue").Where(repoCond) + var cond = builder.And( + repoCond, + builder.Or( + builder.Like{"name", kw}, + builder.Like{"content", kw}, + builder.In("id", builder.Select("issue_id"). + From("comment"). + Where(builder.And( + builder.Eq{"type": CommentTypeComment}, + builder.In("issue_id", subQuery), + builder.Like{"content", kw}, + )), + ), + ), + ) + + var ids = make([]int64, 0, limit) + err := x.Distinct("id").Table("issue").Where(cond).Limit(limit, start).Find(&ids) + if err != nil { + return 0, nil, err + } + + total, err := x.Distinct("id").Table("issue").Where(cond).Count() + if err != nil { + return 0, nil, err + } + + return total, ids, nil +} + func updateIssue(e Engine, issue *Issue) error { _, err := e.ID(issue.ID).AllCols().Update(issue) if err != nil { diff --git a/models/issue_test.go b/models/issue_test.go index cec7e8b47..1a7e45ae0 100644 --- a/models/issue_test.go +++ b/models/issue_test.go @@ -295,3 +295,28 @@ func TestIssue_loadTotalTimes(t *testing.T) { assert.NoError(t, ms.loadTotalTimes(x)) assert.Equal(t, int64(3662), ms.TotalTrackedTime) } + +func TestIssue_SearchIssueIDsByKeyword(t *testing.T) { + assert.NoError(t, PrepareTestDatabase()) + + total, ids, err := SearchIssueIDsByKeyword("issue2", 1, 10, 0) + assert.NoError(t, err) + assert.EqualValues(t, 1, total) + assert.EqualValues(t, []int64{2}, ids) + + total, ids, err = SearchIssueIDsByKeyword("first", 1, 10, 0) + assert.NoError(t, err) + assert.EqualValues(t, 1, total) + assert.EqualValues(t, []int64{1}, ids) + + total, ids, err = SearchIssueIDsByKeyword("for", 1, 10, 0) + assert.NoError(t, err) + assert.EqualValues(t, 4, total) + assert.EqualValues(t, []int64{1, 2, 3, 5}, ids) + + // issue1's comment id 2 + total, ids, err = SearchIssueIDsByKeyword("good", 1, 10, 0) + assert.NoError(t, err) + assert.EqualValues(t, 1, total) + assert.EqualValues(t, []int64{1}, ids) +} diff --git a/modules/indexer/issues/db.go b/modules/indexer/issues/db.go new file mode 100644 index 000000000..6e7f0c1a6 --- /dev/null +++ b/modules/indexer/issues/db.go @@ -0,0 +1,45 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package issues + +import "code.gitea.io/gitea/models" + +// DBIndexer implements Indexer inteface to use database's like search +type DBIndexer struct { +} + +// Init dummy function +func (db *DBIndexer) Init() (bool, error) { + return false, nil +} + +// Index dummy function +func (db *DBIndexer) Index(issue []*IndexerData) error { + return nil +} + +// Delete dummy function +func (db *DBIndexer) Delete(ids ...int64) error { + return nil +} + +// Search dummy function +func (db *DBIndexer) Search(kw string, repoID int64, limit, start int) (*SearchResult, error) { + total, ids, err := models.SearchIssueIDsByKeyword(kw, repoID, limit, start) + if err != nil { + return nil, err + } + var result = SearchResult{ + Total: total, + Hits: make([]Match, 0, limit), + } + for _, id := range ids { + result.Hits = append(result.Hits, Match{ + ID: id, + RepoID: repoID, + }) + } + return &result, nil +} diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index 41af5c36b..c5d6d05a6 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -33,7 +33,8 @@ type Match struct { // SearchResult represents search results type SearchResult struct { - Hits []Match + Total int64 + Hits []Match } // Indexer defines an inteface to indexer issues contents @@ -54,6 +55,7 @@ var ( // all issue index done. func InitIssueIndexer(syncReindex bool) error { var populate bool + var dummyQueue bool switch setting.Indexer.IssueType { case "bleve": issueIndexer = NewBleveIndexer(setting.Indexer.IssuePath) @@ -62,10 +64,17 @@ func InitIssueIndexer(syncReindex bool) error { return err } populate = !exist + case "db": + issueIndexer = &DBIndexer{} + dummyQueue = true default: return fmt.Errorf("unknow issue indexer type: %s", setting.Indexer.IssueType) } + if dummyQueue { + return nil + } + var err error switch setting.Indexer.IssueIndexerQueueType { case setting.LevelQueueType: diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index 1b6bdec53..66087e3e6 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -48,4 +48,8 @@ func TestSearchIssues(t *testing.T) { ids, err = SearchIssuesByKeyword(1, "for") assert.NoError(t, err) assert.EqualValues(t, []int64{1, 2, 3, 5}, ids) + + ids, err = SearchIssuesByKeyword(1, "good") + assert.NoError(t, err) + assert.EqualValues(t, []int64{1}, ids) } diff --git a/modules/indexer/issues/queue.go b/modules/indexer/issues/queue.go index 6f4ee4c13..f93e5c47a 100644 --- a/modules/indexer/issues/queue.go +++ b/modules/indexer/issues/queue.go @@ -7,5 +7,19 @@ package issues // Queue defines an interface to save an issue indexer queue type Queue interface { Run() error - Push(*IndexerData) + Push(*IndexerData) error +} + +// DummyQueue represents an empty queue +type DummyQueue struct { +} + +// Run starts to run the queue +func (b *DummyQueue) Run() error { + return nil +} + +// Push pushes data to indexer +func (b *DummyQueue) Push(*IndexerData) error { + return nil } diff --git a/modules/indexer/issues/queue_channel.go b/modules/indexer/issues/queue_channel.go index 99a90ad49..bd92f6b7b 100644 --- a/modules/indexer/issues/queue_channel.go +++ b/modules/indexer/issues/queue_channel.go @@ -33,6 +33,11 @@ func (c *ChannelQueue) Run() error { for { select { case data := <-c.queue: + if data.IsDelete { + c.indexer.Delete(data.IDs...) + continue + } + datas = append(datas, data) if len(datas) >= c.batchNumber { c.indexer.Index(datas) @@ -51,6 +56,7 @@ func (c *ChannelQueue) Run() error { } // Push will push the indexer data to queue -func (c *ChannelQueue) Push(data *IndexerData) { +func (c *ChannelQueue) Push(data *IndexerData) error { c.queue <- data + return nil } diff --git a/modules/indexer/issues/queue_disk.go b/modules/indexer/issues/queue_disk.go index 97bacdf99..be6867f9c 100644 --- a/modules/indexer/issues/queue_disk.go +++ b/modules/indexer/issues/queue_disk.go @@ -94,14 +94,10 @@ func (l *LevelQueue) Run() error { } // Push will push the indexer data to queue -func (l *LevelQueue) Push(data *IndexerData) { +func (l *LevelQueue) Push(data *IndexerData) error { bs, err := json.Marshal(data) if err != nil { - log.Error(4, "Marshal: %v", err) - return - } - err = l.queue.LPush(bs) - if err != nil { - log.Error(4, "LPush: %v", err) + return err } + return l.queue.LPush(bs) } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 245ebb049..0e09ed2b8 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -38,6 +38,7 @@ var ( func newIndexerService() { sec := Cfg.Section("indexer") + Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve") Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString(path.Join(AppDataPath, "indexers/issues.bleve")) if !filepath.IsAbs(Indexer.IssuePath) { Indexer.IssuePath = path.Join(AppWorkPath, Indexer.IssuePath)