From 00370f17a4f7cba31e8a2617ebdbff139739d6d4 Mon Sep 17 00:00:00 2001 From: dark-angel <70754989+inferno-umar@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:27:16 +0530 Subject: [PATCH] [gitea] fix: Elasticsearch: Request Entity Too Large #28117 (#29062) Fix for gitea putting everything into one request without batching and sending it to Elasticsearch for indexing as issued in #28117 This issue occured in large repositories while Gitea tries to index the code using ElasticSearch. I've applied necessary changes that takes batch length from below config (app.ini) ``` [queue.code_indexer] BATCH_LENGTH= ``` and batches all requests to Elasticsearch in chunks as configured in the above config (cherry picked from commit 5c0fc9087211f01375f208d679a1e6de0685320c) --- .../indexer/code/elasticsearch/elasticsearch.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 2fadbfeb06..0f70f13485 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -180,11 +180,17 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st } if len(reqs) > 0 { - _, err := b.inner.Client.Bulk(). - Index(b.inner.VersionedIndexName()). - Add(reqs...). - Do(ctx) - return err + esBatchSize := 50 + + for i := 0; i < len(reqs); i += esBatchSize { + _, err := b.inner.Client.Bulk(). + Index(b.inner.VersionedIndexName()). + Add(reqs[i:min(i+esBatchSize, len(reqs))]...). + Do(ctx) + if err != nil { + return err + } + } } return nil }