From 460a2b0edffe71d9e64633beaa1071fcf4a33369 Mon Sep 17 00:00:00 2001 From: FuXiaoHei Date: Wed, 6 Sep 2023 15:41:06 +0800 Subject: [PATCH] Artifacts retention and auto clean up (#26131) Currently, Artifact does not have an expiration and automatic cleanup mechanism, and this feature needs to be added. It contains the following key points: - [x] add global artifact retention days option in config file. Default value is 90 days. - [x] add cron task to clean up expired artifacts. It should run once a day. - [x] support custom retention period from `retention-days: 5` in `upload-artifact@v3`. - [x] artifacts link in actions view should be non-clickable text when expired. --- custom/conf/app.example.ini | 2 + .../config-cheat-sheet.en-us.md | 7 ++++ models/actions/artifact.go | 38 ++++++++++++----- models/migrations/migrations.go | 2 + models/migrations/v1_21/v274.go | 36 ++++++++++++++++ modules/setting/actions.go | 14 +++++-- options/locale/locale_en-US.ini | 1 + routers/api/actions/artifacts.go | 28 +++++++++++-- routers/api/actions/artifacts_chunks.go | 2 +- routers/web/repo/actions/view.go | 14 +++++-- services/actions/cleanup.go | 42 +++++++++++++++++++ services/cron/tasks_basic.go | 18 ++++++++ .../integration/api_actions_artifact_test.go | 42 ++++++++++++++++++- 13 files changed, 221 insertions(+), 25 deletions(-) create mode 100644 models/migrations/v1_21/v274.go create mode 100644 services/actions/cleanup.go diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index dd673190a..a2fab2fd5 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2564,6 +2564,8 @@ LEVEL = Info ;; ;; Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance. ;DEFAULT_ACTIONS_URL = github +;; Default artifact retention time in days, default is 90 days +;ARTIFACT_RETENTION_DAYS = 90 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index 4158f14cb..7e8befb8b 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -955,6 +955,12 @@ Default templates for project boards: - `SCHEDULE`: **@midnight** : Interval as a duration between each synchronization, it will always attempt synchronization when the instance starts. - `UPDATE_EXISTING`: **true**: Create new users, update existing user data and disable users that are not in external source anymore (default) or only create new users if UPDATE_EXISTING is set to false. +## Cron - Cleanup Expired Actions Assets (`cron.cleanup_actions`) + +- `ENABLED`: **true**: Enable cleanup expired actions assets job. +- `RUN_AT_START`: **true**: Run job at start time (if ENABLED). +- `SCHEDULE`: **@midnight** : Cron syntax for the job. + ### Extended cron tasks (not enabled by default) #### Cron - Garbage collect all repositories (`cron.git_gc_repos`) @@ -1381,6 +1387,7 @@ PROXY_HOSTS = *.github.com - `DEFAULT_ACTIONS_URL`: **github**: Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance. - `STORAGE_TYPE`: **local**: Storage type for actions logs, `local` for local disk or `minio` for s3 compatible object storage service, default is `local` or other name defined with `[storage.xxx]` - `MINIO_BASE_PATH`: **actions_log/**: Minio base path on the bucket only available when STORAGE_TYPE is `minio` +- `ARTIFACT_RETENTION_DAYS`: **90**: Number of days to keep artifacts. Set to 0 to disable artifact retention. Default is 90 days if not set. `DEFAULT_ACTIONS_URL` indicates where the Gitea Actions runners should find the actions with relative path. For example, `uses: actions/checkout@v3` means `https://github.com/actions/checkout@v3` since the value of `DEFAULT_ACTIONS_URL` is `github`. diff --git a/models/actions/artifact.go b/models/actions/artifact.go index 800dcd0d5..849a90fd1 100644 --- a/models/actions/artifact.go +++ b/models/actions/artifact.go @@ -9,19 +9,21 @@ package actions import ( "context" "errors" + "time" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" ) +// ArtifactStatus is the status of an artifact, uploading, expired or need-delete +type ArtifactStatus int64 + const ( - // ArtifactStatusUploadPending is the status of an artifact upload that is pending - ArtifactStatusUploadPending = 1 - // ArtifactStatusUploadConfirmed is the status of an artifact upload that is confirmed - ArtifactStatusUploadConfirmed = 2 - // ArtifactStatusUploadError is the status of an artifact upload that is errored - ArtifactStatusUploadError = 3 + ArtifactStatusUploadPending ArtifactStatus = iota + 1 // 1, ArtifactStatusUploadPending is the status of an artifact upload that is pending + ArtifactStatusUploadConfirmed // 2, ArtifactStatusUploadConfirmed is the status of an artifact upload that is confirmed + ArtifactStatusUploadError // 3, ArtifactStatusUploadError is the status of an artifact upload that is errored + ArtifactStatusExpired // 4, ArtifactStatusExpired is the status of an artifact that is expired ) func init() { @@ -45,9 +47,10 @@ type ActionArtifact struct { Status int64 `xorm:"index"` // The status of the artifact, uploading, expired or need-delete CreatedUnix timeutil.TimeStamp `xorm:"created"` UpdatedUnix timeutil.TimeStamp `xorm:"updated index"` + ExpiredUnix timeutil.TimeStamp `xorm:"index"` // The time when the artifact will be expired } -func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string) (*ActionArtifact, error) { +func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string, expiredDays int64) (*ActionArtifact, error) { if err := t.LoadJob(ctx); err != nil { return nil, err } @@ -61,7 +64,8 @@ func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPa RepoID: t.RepoID, OwnerID: t.OwnerID, CommitSHA: t.CommitSHA, - Status: ArtifactStatusUploadPending, + Status: int64(ArtifactStatusUploadPending), + ExpiredUnix: timeutil.TimeStamp(time.Now().Unix() + 3600*24*expiredDays), } if _, err := db.GetEngine(ctx).Insert(artifact); err != nil { return nil, err @@ -126,15 +130,16 @@ func ListUploadedArtifactsByRunID(ctx context.Context, runID int64) ([]*ActionAr type ActionArtifactMeta struct { ArtifactName string FileSize int64 + Status int64 } // ListUploadedArtifactsMeta returns all uploaded artifacts meta of a run func ListUploadedArtifactsMeta(ctx context.Context, runID int64) ([]*ActionArtifactMeta, error) { arts := make([]*ActionArtifactMeta, 0, 10) return arts, db.GetEngine(ctx).Table("action_artifact"). - Where("run_id=? AND status=?", runID, ArtifactStatusUploadConfirmed). + Where("run_id=? AND (status=? OR status=?)", runID, ArtifactStatusUploadConfirmed, ArtifactStatusExpired). GroupBy("artifact_name"). - Select("artifact_name, sum(file_size) as file_size"). + Select("artifact_name, sum(file_size) as file_size, max(status) as status"). Find(&arts) } @@ -149,3 +154,16 @@ func ListArtifactsByRunIDAndName(ctx context.Context, runID int64, name string) arts := make([]*ActionArtifact, 0, 10) return arts, db.GetEngine(ctx).Where("run_id=? AND artifact_name=?", runID, name).Find(&arts) } + +// ListNeedExpiredArtifacts returns all need expired artifacts but not deleted +func ListNeedExpiredArtifacts(ctx context.Context) ([]*ActionArtifact, error) { + arts := make([]*ActionArtifact, 0, 10) + return arts, db.GetEngine(ctx). + Where("expired_unix < ? AND status = ?", timeutil.TimeStamp(time.Now().Unix()), ArtifactStatusUploadConfirmed).Find(&arts) +} + +// SetArtifactExpired sets an artifact to expired +func SetArtifactExpired(ctx context.Context, artifactID int64) error { + _, err := db.GetEngine(ctx).Where("id=? AND status = ?", artifactID, ArtifactStatusUploadConfirmed).Cols("status").Update(&ActionArtifact{Status: int64(ArtifactStatusExpired)}) + return err +} diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index 9f4acda23..40df1cd62 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -528,6 +528,8 @@ var migrations = []Migration{ NewMigration("Add Version to ActionRun table", v1_21.AddVersionToActionRunTable), // v273 -> v274 NewMigration("Add Action Schedule Table", v1_21.AddActionScheduleTable), + // v274 -> v275 + NewMigration("Add Actions artifacts expiration date", v1_21.AddExpiredUnixColumnInActionArtifactTable), } // GetCurrentDBVersion returns the current db version diff --git a/models/migrations/v1_21/v274.go b/models/migrations/v1_21/v274.go new file mode 100644 index 000000000..df5994f15 --- /dev/null +++ b/models/migrations/v1_21/v274.go @@ -0,0 +1,36 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package v1_21 //nolint +import ( + "time" + + "code.gitea.io/gitea/modules/timeutil" + + "xorm.io/xorm" +) + +func AddExpiredUnixColumnInActionArtifactTable(x *xorm.Engine) error { + type ActionArtifact struct { + ExpiredUnix timeutil.TimeStamp `xorm:"index"` // time when the artifact will be expired + } + if err := x.Sync(new(ActionArtifact)); err != nil { + return err + } + return updateArtifactsExpiredUnixTo90Days(x) +} + +func updateArtifactsExpiredUnixTo90Days(x *xorm.Engine) error { + sess := x.NewSession() + defer sess.Close() + + if err := sess.Begin(); err != nil { + return err + } + expiredTime := time.Now().AddDate(0, 0, 90).Unix() + if _, err := sess.Exec(`UPDATE action_artifact SET expired_unix=? WHERE status='2' AND expired_unix is NULL`, expiredTime); err != nil { + return err + } + + return sess.Commit() +} diff --git a/modules/setting/actions.go b/modules/setting/actions.go index a13330dcd..bfc502c0c 100644 --- a/modules/setting/actions.go +++ b/modules/setting/actions.go @@ -13,10 +13,11 @@ import ( // Actions settings var ( Actions = struct { - LogStorage *Storage // how the created logs should be stored - ArtifactStorage *Storage // how the created artifacts should be stored - Enabled bool - DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"` + LogStorage *Storage // how the created logs should be stored + ArtifactStorage *Storage // how the created artifacts should be stored + ArtifactRetentionDays int64 `ini:"ARTIFACT_RETENTION_DAYS"` + Enabled bool + DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"` }{ Enabled: false, DefaultActionsURL: defaultActionsURLGitHub, @@ -76,5 +77,10 @@ func loadActionsFrom(rootCfg ConfigProvider) error { Actions.ArtifactStorage, err = getStorage(rootCfg, "actions_artifacts", "", actionsSec) + // default to 90 days in Github Actions + if Actions.ArtifactRetentionDays <= 0 { + Actions.ArtifactRetentionDays = 90 + } + return err } diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index bdb9b0c9d..4f5f0383e 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -2731,6 +2731,7 @@ dashboard.reinit_missing_repos = Reinitialize all missing Git repositories for w dashboard.sync_external_users = Synchronize external user data dashboard.cleanup_hook_task_table = Cleanup hook_task table dashboard.cleanup_packages = Cleanup expired packages +dashboard.cleanup_actions = Cleanup actions expired logs and artifacts dashboard.server_uptime = Server Uptime dashboard.current_goroutine = Current Goroutines dashboard.current_memory_usage = Current Memory Usage diff --git a/routers/api/actions/artifacts.go b/routers/api/actions/artifacts.go index 946ea11e7..c45dc667a 100644 --- a/routers/api/actions/artifacts.go +++ b/routers/api/actions/artifacts.go @@ -170,8 +170,9 @@ func (ar artifactRoutes) buildArtifactURL(runID int64, artifactHash, suffix stri } type getUploadArtifactRequest struct { - Type string - Name string + Type string + Name string + RetentionDays int64 } type getUploadArtifactResponse struct { @@ -192,10 +193,16 @@ func (ar artifactRoutes) getUploadArtifactURL(ctx *ArtifactContext) { return } + // set retention days + retentionQuery := "" + if req.RetentionDays > 0 { + retentionQuery = fmt.Sprintf("?retentionDays=%d", req.RetentionDays) + } + // use md5(artifact_name) to create upload url artifactHash := fmt.Sprintf("%x", md5.Sum([]byte(req.Name))) resp := getUploadArtifactResponse{ - FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"), + FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"+retentionQuery), } log.Debug("[artifact] get upload url: %s", resp.FileContainerResourceURL) ctx.JSON(http.StatusOK, resp) @@ -219,8 +226,21 @@ func (ar artifactRoutes) uploadArtifact(ctx *ArtifactContext) { return } + // get artifact retention days + expiredDays := setting.Actions.ArtifactRetentionDays + if queryRetentionDays := ctx.Req.URL.Query().Get("retentionDays"); queryRetentionDays != "" { + expiredDays, err = strconv.ParseInt(queryRetentionDays, 10, 64) + if err != nil { + log.Error("Error parse retention days: %v", err) + ctx.Error(http.StatusBadRequest, "Error parse retention days") + return + } + } + log.Debug("[artifact] upload chunk, name: %s, path: %s, size: %d, retention days: %d", + artifactName, artifactPath, fileRealTotalSize, expiredDays) + // create or get artifact with name and path - artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath) + artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath, expiredDays) if err != nil { log.Error("Error create or get artifact: %v", err) ctx.Error(http.StatusInternalServerError, "Error create or get artifact") diff --git a/routers/api/actions/artifacts_chunks.go b/routers/api/actions/artifacts_chunks.go index 30d31b4d7..458d671cf 100644 --- a/routers/api/actions/artifacts_chunks.go +++ b/routers/api/actions/artifacts_chunks.go @@ -179,7 +179,7 @@ func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st st // save storage path to artifact log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath) artifact.StoragePath = storagePath - artifact.Status = actions.ArtifactStatusUploadConfirmed + artifact.Status = int64(actions.ArtifactStatusUploadConfirmed) if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { return fmt.Errorf("update artifact error: %v", err) } diff --git a/routers/web/repo/actions/view.go b/routers/web/repo/actions/view.go index e4ca6a719..a9c285830 100644 --- a/routers/web/repo/actions/view.go +++ b/routers/web/repo/actions/view.go @@ -486,8 +486,9 @@ type ArtifactsViewResponse struct { } type ArtifactsViewItem struct { - Name string `json:"name"` - Size int64 `json:"size"` + Name string `json:"name"` + Size int64 `json:"size"` + Status string `json:"status"` } func ArtifactsView(ctx *context_module.Context) { @@ -510,9 +511,14 @@ func ArtifactsView(ctx *context_module.Context) { Artifacts: make([]*ArtifactsViewItem, 0, len(artifacts)), } for _, art := range artifacts { + status := "completed" + if art.Status == int64(actions_model.ArtifactStatusExpired) { + status = "expired" + } artifactsResponse.Artifacts = append(artifactsResponse.Artifacts, &ArtifactsViewItem{ - Name: art.ArtifactName, - Size: art.FileSize, + Name: art.ArtifactName, + Size: art.FileSize, + Status: status, }) } ctx.JSON(http.StatusOK, artifactsResponse) diff --git a/services/actions/cleanup.go b/services/actions/cleanup.go new file mode 100644 index 000000000..785eeb583 --- /dev/null +++ b/services/actions/cleanup.go @@ -0,0 +1,42 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package actions + +import ( + "context" + "time" + + "code.gitea.io/gitea/models/actions" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/storage" +) + +// Cleanup removes expired actions logs, data and artifacts +func Cleanup(taskCtx context.Context, olderThan time.Duration) error { + // TODO: clean up expired actions logs + + // clean up expired artifacts + return CleanupArtifacts(taskCtx) +} + +// CleanupArtifacts removes expired artifacts and set records expired status +func CleanupArtifacts(taskCtx context.Context) error { + artifacts, err := actions.ListNeedExpiredArtifacts(taskCtx) + if err != nil { + return err + } + log.Info("Found %d expired artifacts", len(artifacts)) + for _, artifact := range artifacts { + if err := storage.ActionsArtifacts.Delete(artifact.StoragePath); err != nil { + log.Error("Cannot delete artifact %d: %v", artifact.ID, err) + continue + } + if err := actions.SetArtifactExpired(taskCtx, artifact.ID); err != nil { + log.Error("Cannot set artifact %d expired: %v", artifact.ID, err) + continue + } + log.Info("Artifact %d set expired", artifact.ID) + } + return nil +} diff --git a/services/cron/tasks_basic.go b/services/cron/tasks_basic.go index 2a213ae51..3869382d2 100644 --- a/services/cron/tasks_basic.go +++ b/services/cron/tasks_basic.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/models/webhook" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/services/actions" "code.gitea.io/gitea/services/auth" "code.gitea.io/gitea/services/migrations" mirror_service "code.gitea.io/gitea/services/mirror" @@ -156,6 +157,20 @@ func registerCleanupPackages() { }) } +func registerActionsCleanup() { + RegisterTaskFatal("cleanup_actions", &OlderThanConfig{ + BaseConfig: BaseConfig{ + Enabled: true, + RunAtStart: true, + Schedule: "@midnight", + }, + OlderThan: 24 * time.Hour, + }, func(ctx context.Context, _ *user_model.User, config Config) error { + realConfig := config.(*OlderThanConfig) + return actions.Cleanup(ctx, realConfig.OlderThan) + }) +} + func initBasicTasks() { if setting.Mirror.Enabled { registerUpdateMirrorTask() @@ -172,4 +187,7 @@ func initBasicTasks() { if setting.Packages.Enabled { registerCleanupPackages() } + if setting.Actions.Enabled { + registerActionsCleanup() + } } diff --git a/tests/integration/api_actions_artifact_test.go b/tests/integration/api_actions_artifact_test.go index 6590ca667..101bedde0 100644 --- a/tests/integration/api_actions_artifact_test.go +++ b/tests/integration/api_actions_artifact_test.go @@ -18,8 +18,9 @@ type uploadArtifactResponse struct { } type getUploadArtifactRequest struct { - Type string - Name string + Type string + Name string + RetentionDays int64 } func TestActionsArtifactUploadSingleFile(t *testing.T) { @@ -252,3 +253,40 @@ func TestActionsArtifactDownloadMultiFiles(t *testing.T) { assert.Equal(t, resp.Body.String(), body) } } + +func TestActionsArtifactUploadWithRetentionDays(t *testing.T) { + defer tests.PrepareTestEnv(t)() + + // acquire artifact upload url + req := NewRequestWithJSON(t, "POST", "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts", getUploadArtifactRequest{ + Type: "actions_storage", + Name: "artifact-retention-days", + RetentionDays: 9, + }) + req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a") + resp := MakeRequest(t, req, http.StatusOK) + var uploadResp uploadArtifactResponse + DecodeJSON(t, resp, &uploadResp) + assert.Contains(t, uploadResp.FileContainerResourceURL, "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts") + assert.Contains(t, uploadResp.FileContainerResourceURL, "?retentionDays=9") + + // get upload url + idx := strings.Index(uploadResp.FileContainerResourceURL, "/api/actions_pipeline/_apis/pipelines/") + url := uploadResp.FileContainerResourceURL[idx:] + "&itemPath=artifact-retention-days/abc.txt" + + // upload artifact chunk + body := strings.Repeat("A", 1024) + req = NewRequestWithBody(t, "PUT", url, strings.NewReader(body)) + req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a") + req.Header.Add("Content-Range", "bytes 0-1023/1024") + req.Header.Add("x-tfs-filelength", "1024") + req.Header.Add("x-actions-results-md5", "1HsSe8LeLWh93ILaw1TEFQ==") // base64(md5(body)) + MakeRequest(t, req, http.StatusOK) + + t.Logf("Create artifact confirm") + + // confirm artifact upload + req = NewRequest(t, "PATCH", "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts?artifactName=artifact-retention-days") + req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a") + MakeRequest(t, req, http.StatusOK) +}