diff --git a/internal/pkg/report/mask.go b/internal/pkg/report/mask.go new file mode 100644 index 00000000..1e7bbeb0 --- /dev/null +++ b/internal/pkg/report/mask.go @@ -0,0 +1,146 @@ +// Copyright 2025 The Forgejo Authors. +// SPDX-License-Identifier: MIT + +package report + +import ( + "cmp" + "slices" + "strings" + + runnerv1 "code.forgejo.org/forgejo/actions-proto/runner/v1" +) + +type masker struct { + replacer *strings.Replacer + lines []string + multiLines [][]string +} + +func newMasker() *masker { + return &masker{ + lines: make([]string, 0, 10), + multiLines: make([][]string, 0, 10), + } +} + +func (o *masker) add(secret string) { + if len(secret) == 0 { + return + } + o.replacer = nil + lines := strings.Split(strings.ReplaceAll(secret, "\r\n", "\n"), "\n") + if len(lines) > 1 { + o.multiLines = append(o.multiLines, lines) + // make sure the longest secret are replaced first + slices.SortFunc(o.multiLines, func(a, b []string) int { + return cmp.Compare(len(b), len(a)) + }) + } else { + o.lines = append(o.lines, lines[0]) + // make sure the longest secret are replaced first + slices.SortFunc(o.lines, func(a, b string) int { + return cmp.Compare(len(b), len(a)) + }) + } +} + +func (o *masker) getReplacer() *strings.Replacer { + if o.replacer == nil { + oldnew := make([]string, 0, len(o.lines)*2) + for _, line := range o.lines { + oldnew = append(oldnew, line, "***") + } + o.replacer = strings.NewReplacer(oldnew...) + } + return o.replacer +} + +func (o *masker) replaceLines(rows []*runnerv1.LogRow) { + r := o.getReplacer() + for _, row := range rows { + row.Content = r.Replace(row.Content) + } +} + +func (o *masker) maybeReplaceMultiline(multiLine []string, rows []*runnerv1.LogRow) bool { + equal, needMore := o.equalMultiline(multiLine, rows) + if needMore { + return needMore + } + if equal { + o.replaceMultiline(multiLine, rows) + } + return false +} + +func (o *masker) trimEOL(s string) string { + return strings.TrimRightFunc(s, func(r rune) bool { return r == '\r' || r == '\n' }) +} + +func (o *masker) equalMultiline(multiLine []string, rows []*runnerv1.LogRow) (equal, needMore bool) { + if len(rows) < 2 { + needMore = true + return + } + + lastIndex := len(multiLine) - 1 + first := multiLine[0] + if !strings.HasSuffix(o.trimEOL(rows[0].Content), first) { + return // unreachable because the caller checks that already + } + for i, line := range multiLine[1:lastIndex] { + rowIndex := i + 1 + if rowIndex >= len(rows) { + needMore = true + return + } + if o.trimEOL(rows[rowIndex].Content) != line { + return + } + } + last := multiLine[lastIndex] + if lastIndex >= len(rows) { + needMore = true + return + } + if !strings.HasPrefix(o.trimEOL(rows[lastIndex].Content), last) { + return + } + equal = true + return +} + +func (o *masker) replaceMultiline(multiLine []string, rows []*runnerv1.LogRow) { + lastIndex := len(multiLine) - 1 + first := multiLine[0] + rows[0].Content = strings.TrimSuffix(rows[0].Content, first) + "***" + for _, row := range rows[1:lastIndex] { + row.Content = "***" + } + last := multiLine[lastIndex] + rows[lastIndex].Content = "***" + strings.TrimPrefix(rows[lastIndex].Content, last) +} + +func (o *masker) replaceMultilines(rows []*runnerv1.LogRow) bool { + for _, multiLine := range o.multiLines { + for i, row := range rows { + if strings.HasSuffix(o.trimEOL(row.Content), multiLine[0]) { + needMore := o.maybeReplaceMultiline(multiLine, rows[i:]) + if needMore { + return needMore + } + } + } + } + return false +} + +func (o *masker) replace(rows []*runnerv1.LogRow, noMore bool) bool { + needMore := o.replaceMultilines(rows) + if !noMore && needMore { + return needMore + } + o.replaceLines(rows) + return false +} diff --git a/internal/pkg/report/mask_test.go b/internal/pkg/report/mask_test.go new file mode 100644 index 00000000..f6f332e4 --- /dev/null +++ b/internal/pkg/report/mask_test.go @@ -0,0 +1,257 @@ +// Copyright 2025 The Forgejo Authors. +// SPDX-License-Identifier: MIT + +package report + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReporterMask(t *testing.T) { + lineOne := "secretOne" + lineTwo := lineOne + "secretTwoIsLongerThanAndStartsWithIt" + multiLineMixedSeparators := "A\nB\r\nC\r\nD" + multiLineOne := lineOne + ` +TWO +THREE` + multiLineTwo := multiLineOne + ` +FOUR +FIVE +SIX` + for _, testCase := range []struct { + name string + secrets []string + in string + out string + noMore bool + needMore bool + }{ + { + // + // a multiline secret is masked + // + name: "MultilineIsMasked", + secrets: []string{ + multiLineOne, + }, + in: fmt.Sprintf("line before\n%[1]s\nline after", multiLineOne), + out: "line before\n***\n***\n***\nline after\n", + needMore: false, + }, + { + // + // in a multiline secret \r\n is equivalent to \n and does + // not change how it is masked + // + name: "MultilineWithMixedLineSeparatorsIsMasked", + secrets: []string{ + multiLineMixedSeparators, + }, + in: fmt.Sprintf("line before\n%[1]s\nline after", multiLineMixedSeparators), + out: "line before\n***\n***\n***\n***\nline after\n", + needMore: false, + }, + { + // + // the last line of a multline secret is not a match + // + // + name: "MultilineLastLineDoesNotMatch", + secrets: []string{ + multiLineOne, + }, + in: fmt.Sprintf("%s\nTWO\nsomethingelse", lineOne), + out: lineOne + "\nTWO\nsomethingelse\n", + needMore: false, + }, + { + // + // non-multine secrets are masked + // + name: "SecretsAreMasked", + secrets: []string{ + "", + lineOne, + lineTwo, + }, + in: fmt.Sprintf("line before\n%[1]s\n%[2]s\nline after", lineOne, lineTwo), + out: "line before\n***\n***\nline after\n", + needMore: false, + }, + { + // + // the first line of a multiline secret may be found + // at the end of a line and the last line may be followed + // by a suffix, e.g. + // + // >>>ONE + // TWO + // THREE<<< + // + // and is expected to be replaced with + // + // >>>*** + // *** + // ***<<< + // + name: "MultilineWithSuffixAndPrefix", + secrets: []string{ + multiLineOne, + }, + in: fmt.Sprintf(">>>%[1]s<<<", multiLineOne), + out: ">>>***\n***\n***<<<\n", + needMore: false, + }, + { + // + // multiline secrets are considered first + // since only the first line of the multiLineOne secret + // is found, it needs more input to decide and does not + // mask anything. + // + // non-multiline secrets are not considered at all if + // a multiline secret needs more input and the + // lineOne secret is not masked even though it is found + // + // the first lines is found but not the second + // + name: "NeedMoreLines", + secrets: []string{ + lineOne, + multiLineOne, + }, + in: lineOne, + out: lineOne + "\n", + needMore: true, + }, + { + // + // the lines up to but not including the last are found + // + // See NeedMoreLines + // + name: "NeedMoreLinesVariation1", + secrets: []string{ + multiLineOne, + }, + in: fmt.Sprintf("%s\nTWO", lineOne), + out: lineOne + "\nTWO\n", + needMore: true, + }, + { + // + // the lines up to the third out of six are found + // + // See NeedMoreLines + // + name: "NeedMoreLinesVariation2", + secrets: []string{ + multiLineTwo, + }, + in: multiLineOne, + out: multiLineOne + "\n", + needMore: true, + }, + { + // + // a multiline secret will be masked if it is found + // even when another multiline secret needs more input + // + // however non-multiline secrets will not be masked + // + name: "NeedMoreLinesAndMultilinePartialMasking", + secrets: []string{ + lineOne, + multiLineOne, + }, + in: fmt.Sprintf(`%[1]s %[2]s +>>>%[3]s<<< +%[1]s`, lineOne, lineTwo, multiLineOne), + out: "secretOne secretOnesecretTwoIsLongerThanAndStartsWithIt\n>>>***\n***\n***<<<\nsecretOne\n", + needMore: true, + }, + { + // + // - oneline overlaps with lineTwo + // - oneLine overlaps with multiLineOne and multiLineTwo + // - multiLineOne overlaps with multiLineTwo + // + // they are all masked because the longest secret is masked + // first + // + name: "OverlappingSecrets", + secrets: []string{ + lineOne, + lineTwo, + multiLineOne, + multiLineTwo, + }, + in: fmt.Sprintf(`[[[%[1]s]]] {{{%[2]s}}} +>>>%[3]s<<< +(((%[4]s)))`, lineOne, lineTwo, multiLineOne, multiLineTwo), + out: `[[[***]]] {{{***}}} +>>>*** +*** +***<<< +(((*** +*** +*** +*** +*** +***))) +`, + needMore: false, + }, + { + // + // A multiline secret needing more lines does not + // prevent single line secrets from being masked + // when there is no more lines / these are the last + // available lines and there is no sense in hoping + // more will be available later. + // + name: "NeedMoreButNoMore", + secrets: []string{ + lineTwo, + multiLineOne, + }, + in: fmt.Sprintf(`[[[%[1]s]]] {{{%[2]s\nTWO}}}`, lineTwo, lineOne), + out: "[[[***]]] {{{secretOne\\nTWO}}}\n", + noMore: true, + needMore: false, + }, + { + // + // A variation where the partial multiline secret also + // happens to contain a single line secret (TWO) which + // needs to be masked. + // + // See NeedMoreButNoMore + // + name: "NeedMoreButNoMoreAndOverlappingSecret", + secrets: []string{ + "TWO", + lineTwo, + multiLineOne, + }, + in: fmt.Sprintf(`[[[%[1]s]]] {{{%[2]s\nTWO}}}`, lineTwo, lineOne), + out: "[[[***]]] {{{secretOne\\n***}}}\n", + noMore: true, + needMore: false, + }, + } { + t.Run(testCase.name, func(t *testing.T) { + m := newMasker() + for _, secret := range testCase.secrets { + m.add(secret) + } + rows := stringToRows(testCase.in) + needMore := m.replace(rows, testCase.noMore) + assert.Equal(t, testCase.needMore, needMore) + assert.Equal(t, testCase.out, rowsToString(rows)) + }) + } +} diff --git a/internal/pkg/report/reporter.go b/internal/pkg/report/reporter.go index 11f00d99..ec85de37 100644 --- a/internal/pkg/report/reporter.go +++ b/internal/pkg/report/reporter.go @@ -31,8 +31,7 @@ type Reporter struct { logOffset int logRows []*runnerv1.LogRow - logReplacer *strings.Replacer - oldnew []string + masker *masker reportInterval time.Duration state *runnerv1.TaskState @@ -44,24 +43,23 @@ type Reporter struct { } func NewReporter(ctx context.Context, cancel context.CancelFunc, c client.Client, task *runnerv1.Task, reportInterval time.Duration) *Reporter { - var oldnew []string + masker := newMasker() if v := task.Context.Fields["token"].GetStringValue(); v != "" { - oldnew = append(oldnew, v, "***") + masker.add(v) } if v := client.BackwardCompatibleContext(task, "runtime_token"); v != "" { - oldnew = append(oldnew, v, "***") + masker.add(v) } for _, v := range task.Secrets { - oldnew = append(oldnew, v, "***") + masker.add(v) } rv := &Reporter{ ctx: ctx, cancel: cancel, client: c, - oldnew: oldnew, + masker: masker, reportInterval: reportInterval, - logReplacer: strings.NewReplacer(oldnew...), state: &runnerv1.TaskState{ Id: task.Id, }, @@ -256,6 +254,28 @@ func (r *Reporter) Close(lastWords string) error { }, retry.Context(r.ctx)) } +type ErrRetry struct { + message string +} + +func (err ErrRetry) Error() string { + return err.message +} + +func (err ErrRetry) Is(target error) bool { + _, ok := target.(*ErrRetry) + return ok +} + +var ( + errRetryNeedMoreRows = "need more rows to figure out if multiline secrets must be masked" + errRetrySendAll = "not all logs are submitted %d remain" +) + +func NewErrRetry(message string, args ...any) error { + return &ErrRetry{message: fmt.Sprintf(message, args...)} +} + func (r *Reporter) ReportLog(noMore bool) error { r.clientM.Lock() defer r.clientM.Unlock() @@ -264,6 +284,14 @@ func (r *Reporter) ReportLog(noMore bool) error { rows := r.logRows r.stateMu.RUnlock() + if len(rows) == 0 { + return nil + } + + if needMore := r.masker.replace(rows, noMore); needMore { + return NewErrRetry(errRetryNeedMoreRows) + } + resp, err := r.client.UpdateLog(r.ctx, connect.NewRequest(&runnerv1.UpdateLogRequest{ TaskId: r.state.Id, Index: int64(r.logOffset), @@ -276,7 +304,7 @@ func (r *Reporter) ReportLog(noMore bool) error { ack := int(resp.Msg.AckIndex) if ack < r.logOffset { - return fmt.Errorf("submitted logs are lost") + return fmt.Errorf("submitted logs are lost %d < %d", ack, r.logOffset) } r.stateMu.Lock() @@ -284,8 +312,8 @@ func (r *Reporter) ReportLog(noMore bool) error { r.logOffset = ack r.stateMu.Unlock() - if noMore && ack < r.logOffset+len(rows) { - return fmt.Errorf("not all logs are submitted") + if noMore && len(r.logRows) > 0 { + return NewErrRetry(errRetrySendAll, len(r.logRows)) } return nil @@ -331,7 +359,7 @@ func (r *Reporter) ReportState() error { return true }) if len(noSent) > 0 { - return fmt.Errorf("there are still outputs that have not been sent: %v", noSent) + return NewErrRetry(errRetrySendAll, len(noSent)) } return nil @@ -376,7 +404,7 @@ func (r *Reporter) handleCommand(originalContent, command, parameters, value str switch command { case "add-mask": - r.addMask(value) + r.masker.add(value) return nil case "debug": if r.debugOutputEnabled { @@ -423,15 +451,8 @@ func (r *Reporter) parseLogRow(entry *log.Entry) *runnerv1.LogRow { } } - content = r.logReplacer.Replace(content) - return &runnerv1.LogRow{ Time: timestamppb.New(entry.Time), Content: strings.ToValidUTF8(content, "?"), } } - -func (r *Reporter) addMask(msg string) { - r.oldnew = append(r.oldnew, msg, "***") - r.logReplacer = strings.NewReplacer(r.oldnew...) -} diff --git a/internal/pkg/report/reporter_test.go b/internal/pkg/report/reporter_test.go index cbf1be97..d9cecafe 100644 --- a/internal/pkg/report/reporter_test.go +++ b/internal/pkg/report/reporter_test.go @@ -5,6 +5,8 @@ package report import ( "context" + "errors" + "fmt" "strings" "testing" "time" @@ -20,6 +22,42 @@ import ( "runner.forgejo.org/internal/pkg/client/mocks" ) +func rowsToString(rows []*runnerv1.LogRow) string { + s := "" + for _, row := range rows { + s += row.Content + "\n" + } + return s +} + +func stringToRows(s string) []*runnerv1.LogRow { + if s == "" { + return nil + } + lines := strings.Split(s, "\n") + rows := make([]*runnerv1.LogRow, 0, len(lines)) + for _, line := range lines { + rows = append(rows, &runnerv1.LogRow{Content: line}) + } + return rows +} + +func mockReporter(t *testing.T) (*Reporter, *mocks.Client, func()) { + t.Helper() + + client := mocks.NewClient(t) + ctx, cancel := context.WithCancel(context.Background()) + taskCtx, err := structpb.NewStruct(map[string]interface{}{}) + require.NoError(t, err) + reporter := NewReporter(ctx, cancel, client, &runnerv1.Task{ + Context: taskCtx, + }, time.Second) + close := func() { + assert.NoError(t, reporter.Close("")) + } + return reporter, client, close +} + func TestReporter_parseLogRow(t *testing.T) { tests := []struct { name string @@ -139,7 +177,7 @@ func TestReporter_parseLogRow(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { r := &Reporter{ - logReplacer: strings.NewReplacer(), + masker: newMasker(), debugOutputEnabled: tt.debugOutputEnabled, } for idx, arg := range tt.args { @@ -147,6 +185,7 @@ func TestReporter_parseLogRow(t *testing.T) { got := "" if rv != nil { + r.masker.replace([]*runnerv1.LogRow{rv}, true) got = rv.Content } @@ -158,7 +197,10 @@ func TestReporter_parseLogRow(t *testing.T) { func TestReporter_Fire(t *testing.T) { t.Run("ignore command lines", func(t *testing.T) { - client := mocks.NewClient(t) + reporter, client, close := mockReporter(t) + defer close() + reporter.ResetSteps(5) + client.On("UpdateLog", mock.Anything, mock.Anything).Return(func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) { t.Logf("Received UpdateLog: %s", req.Msg.String()) return connect_go.NewResponse(&runnerv1.UpdateLogResponse{ @@ -169,16 +211,6 @@ func TestReporter_Fire(t *testing.T) { t.Logf("Received UpdateTask: %s", req.Msg.String()) return connect_go.NewResponse(&runnerv1.UpdateTaskResponse{}), nil }) - ctx, cancel := context.WithCancel(context.Background()) - taskCtx, err := structpb.NewStruct(map[string]interface{}{}) - require.NoError(t, err) - reporter := NewReporter(ctx, cancel, client, &runnerv1.Task{ - Context: taskCtx, - }, time.Second) - defer func() { - assert.NoError(t, reporter.Close("")) - }() - reporter.ResetSteps(5) dataStep0 := map[string]interface{}{ "stage": "Main", @@ -196,3 +228,128 @@ func TestReporter_Fire(t *testing.T) { assert.Equal(t, int64(3), reporter.state.Steps[0].LogLength) }) } + +func TestReporterReportLogLost(t *testing.T) { + reporter, client, _ := mockReporter(t) + reporter.logRows = stringToRows("A") + reporter.logOffset = 100 + + client.On("UpdateLog", mock.Anything, mock.Anything).Return(func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) { + t.Logf("Received UpdateLog: %s", req.Msg.String()) + return connect_go.NewResponse(&runnerv1.UpdateLogResponse{ + AckIndex: 50, + }), nil + }) + + err := reporter.ReportLog(false) + require.Error(t, err) + assert.Equal(t, "submitted logs are lost 50 < 100", err.Error()) +} + +func TestReporterReportLogError(t *testing.T) { + reporter, client, _ := mockReporter(t) + reporter.logRows = stringToRows("A") + someError := "ERROR MESSAGE" + + client.On("UpdateLog", mock.Anything, mock.Anything).Return(func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) { + t.Logf("Received UpdateLog: %s", req.Msg.String()) + return connect_go.NewResponse(&runnerv1.UpdateLogResponse{}), errors.New(someError) + }) + + err := reporter.ReportLog(false) + require.Error(t, err) + assert.Equal(t, someError, err.Error()) +} + +func TestReporterReportLog(t *testing.T) { + secret := "secretOne" + firstLine := "ONE" + multiLineSecret := firstLine + "\nTWO\nTHREE\n" + + for _, testCase := range []struct { + name string + outgoing string + received int + sent string + noMore bool + err error + }{ + { + name: "SecretsAreMasked", + outgoing: fmt.Sprintf(">>>%s<<< (((%s)))", secret, multiLineSecret), + sent: ">>>***<<< (((***\n***\n***\n***)))\n", + err: nil, + }, + { + name: "NoRowsToSend", + err: nil, + }, + { + name: "RetryToMaskSecrets", + outgoing: firstLine, + err: NewErrRetry(errRetryNeedMoreRows), + }, + { + name: "OnlyTheFirstLineIsReceived", + outgoing: "A\nB\nC", + received: 1, + sent: "A\n", + }, + { + name: "RetrySendAll", + outgoing: "A\nB\nC", + received: 1, + sent: "A\n", + noMore: true, + err: NewErrRetry(errRetrySendAll, 2), + }, + } { + t.Run(testCase.name, func(t *testing.T) { + reporter, client, _ := mockReporter(t) + reporter.masker.add(secret) + reporter.masker.add(multiLineSecret) + rows := stringToRows(testCase.outgoing) + reporter.logRows = rows + + sent := "" + if testCase.sent != "" { + client.On("UpdateLog", mock.Anything, mock.Anything).Return(func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) { + t.Logf("UpdateLogRequest: %s", req.Msg.String()) + rows := req.Msg.Rows + if testCase.received > 0 { + rows = rows[:testCase.received] + } + sent += rowsToString(rows) + resp := &runnerv1.UpdateLogResponse{ + AckIndex: req.Msg.Index + int64(len(rows)), + } + t.Logf("UpdateLogResponse: %s", resp.String()) + return connect_go.NewResponse(resp), nil + }) + } + + err := reporter.ReportLog(testCase.noMore) + if testCase.err == nil { + assert.NoError(t, err) + } else { + if assert.ErrorIs(t, err, testCase.err) { + assert.Equal(t, err.Error(), testCase.err.Error()) + } + } + if testCase.sent != "" { + assert.Equal(t, testCase.sent, sent) + if testCase.received > 0 { + remain := len(rows) - testCase.received + assert.Equal(t, remain, len(reporter.logRows)) + assert.Equal(t, testCase.received, reporter.logOffset) + } else { + assert.Empty(t, reporter.logRows) + assert.Equal(t, len(rows), reporter.logOffset) + } + } else { + assert.Equal(t, len(rows), len(reporter.logRows)) + assert.Equal(t, 0, reporter.logOffset) + } + }) + } +} diff --git a/internal/pkg/testutils/mockvariable.go b/internal/pkg/testutils/mockvariable.go new file mode 100644 index 00000000..3307b89e --- /dev/null +++ b/internal/pkg/testutils/mockvariable.go @@ -0,0 +1,10 @@ +// Copyright 2025 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package testutils + +func MockVariable[T any](p *T, v T) (reset func()) { + old := *p + *p = v + return func() { *p = old } +}