loki/pkg/dataobj/encoder_test.go

package dataobj

import (
	"math"
	"testing"

	"github.com/stretchr/testify/assert"

	"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/filemd"
	"github.com/grafana/loki/v3/pkg/scratch"
)

func Test_encoder_typeRefs(t *testing.T) {
	tt := []struct {
		name  string
		input SectionType

		expectRef     uint32
		expectNameRef *filemd.SectionType_NameRef
	}{
		{
			name:  "invalid",
			input: SectionType{},

			expectRef:     0,
			expectNameRef: nil,
		},
		{
			name: "streams",
			input: SectionType{
				Namespace: "github.com/grafana/loki",
				Kind:      "streams",
			},

			expectRef:     1,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 2},
		},
		{
			name: "logs",
			input: SectionType{
				Namespace: "github.com/grafana/loki",
				Kind:      "logs",
			},

			expectRef:     2,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 3},
		},
		{
			name: "existing namespace, new kind",
			input: SectionType{
				Namespace: "github.com/grafana/loki",
				Kind:      "section-kind-1",
			},

			expectRef:     3,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 4},
		},
		{
			name: "existing namespace, existing kind",
			input: SectionType{
				Namespace: "github.com/grafana/loki",
				Kind:      "section-kind-1",
			},

			expectRef:     3,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 4},
		},
		{
			name: "new namespace, existing kind",
			input: SectionType{
				Namespace: "new-namespace",
				Kind:      "section-kind-1",
			},

			expectRef:     4,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 5, KindRef: 4},
		},
		{
			name: "new namespace, new kind",
			input: SectionType{
				Namespace: "new-namespace-2",
				Kind:      "section-kind-2",
			},

			expectRef:     5,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 6, KindRef: 7},
		},
		{
			name: "existing type, new version",
			input: SectionType{
				Namespace: "github.com/grafana/loki",
				Kind:      "streams",
				Version:   math.MaxUint32,
			},

			expectRef:     6,
			expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 2},
		},
	}

	enc := newEncoder(scratch.NewMemory())

	// Test are run sequentially so we can check the behaviour of streaming types
	// in.
	for _, tc := range tt {
		typeRef := enc.getTypeRef(tc.input)
		nameRef := enc.rawTypes[typeRef].NameRef

		assert.Equal(t, tc.expectRef, typeRef, "unexpected type ref for %s", tc.name)
		assert.Equal(t, tc.expectNameRef, nameRef, "unexpected name ref for %s", tc.name)
	}

}
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago			`package dataobj`

			`import (`
chore(dataobj): introduce the concept of section info "extensions" (#18832) 5 months ago			`"math"`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago			`"testing"`

			`"github.com/stretchr/testify/assert"`

			`"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/filemd"`
chore(dataobj): add ability to buffer pending sections to disk (#18780) Signed-off-by: Robert Fratto <robertfratto@gmail.com> 5 months ago			`"github.com/grafana/loki/v3/pkg/scratch"`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago			`)`

			`func Test_encoder_typeRefs(t *testing.T) {`
			`tt := []struct {`
			`name string`
			`input SectionType`

			`expectRef uint32`
			`expectNameRef *filemd.SectionType_NameRef`
			`}{`
			`{`
			`name: "invalid",`
chore(dataobj): remove support for deprecated fields (breaking change) (#18822) 5 months ago			`input: SectionType{},`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago
			`expectRef: 0,`
			`expectNameRef: nil,`
			`},`
			`{`
chore(dataobj): introduce the concept of section info "extensions" (#18832) 5 months ago			`name: "streams",`
			`input: SectionType{`
			`Namespace: "github.com/grafana/loki",`
			`Kind: "streams",`
			`},`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago
			`expectRef: 1,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 2},`
			`},`
			`{`
chore(dataobj): introduce the concept of section info "extensions" (#18832) 5 months ago			`name: "logs",`
			`input: SectionType{`
			`Namespace: "github.com/grafana/loki",`
			`Kind: "logs",`
			`},`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago
			`expectRef: 2,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 3},`
			`},`
			`{`
			`name: "existing namespace, new kind",`
			`input: SectionType{`
			`Namespace: "github.com/grafana/loki",`
			`Kind: "section-kind-1",`
			`},`

			`expectRef: 3,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 4},`
			`},`
			`{`
			`name: "existing namespace, existing kind",`
			`input: SectionType{`
			`Namespace: "github.com/grafana/loki",`
			`Kind: "section-kind-1",`
			`},`

			`expectRef: 3,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 4},`
			`},`
			`{`
			`name: "new namespace, existing kind",`
			`input: SectionType{`
			`Namespace: "new-namespace",`
			`Kind: "section-kind-1",`
			`},`

			`expectRef: 4,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 5, KindRef: 4},`
			`},`
			`{`
			`name: "new namespace, new kind",`
			`input: SectionType{`
			`Namespace: "new-namespace-2",`
			`Kind: "section-kind-2",`
			`},`

			`expectRef: 5,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 6, KindRef: 7},`
			`},`
chore(dataobj): introduce the concept of section info "extensions" (#18832) 5 months ago			`{`
			`name: "existing type, new version",`
			`input: SectionType{`
			`Namespace: "github.com/grafana/loki",`
			`Kind: "streams",`
			`Version: math.MaxUint32,`
			`},`

			`expectRef: 6,`
			`expectNameRef: &filemd.SectionType_NameRef{NamespaceRef: 1, KindRef: 2},`
			`},`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago			`}`

chore(dataobj): add ability to buffer pending sections to disk (#18780) Signed-off-by: Robert Fratto <robertfratto@gmail.com> 5 months ago			`enc := newEncoder(scratch.NewMemory())`
refactor(dataobj): invert dependency between dataobj and sections (#17762) Originally, the dataobj package was a higher-level API around sections. This design caused it to become a bottleneck: * Implementing any new public behaviour for a section required bubbling it up to the dataobj API for it to be exposed, making it tedious to add new sections or update existing ones. * The `dataobj.Builder` pattern was focused on constructing dataobjs for storing log data, which will cause friction as we build objects around other use cases. This PR builds on top of the foundation laid out by #17704 and #17708, fully inverting the dependency between dataobj and sections: * The `dataobj` package has no knowledge of what sections exist, and can now be used for writing and reading generic sections. Section packages now create higher-level APIs around the abstractions provided by `dataobj`. * Section packages are now public, and callers interact directly with these packages for writing and reading section-specific data. * All logic for a section (encoding, decoding, buffering, reading) is now fully self-contained inside the section package. Previously, the implementation of each section was spread across three packages (`pkg/dataobj/internal/encoding`, `pkg/dataobj/internal/sections/SECTION`, `pkg/dataobj`). * Cutting a section is now a decision made by the caller rather than the section implementation. Previously, the logs section builder would create multiple sections. For the most part, this change is a no-op, with two exceptions: 1. Section cutting is now performed by the caller; however, this shouldn't result in any issues. 2. Removing the high-level `dataobj.Stream` and `dataobj.Record` types will temporarily reduce the allocation gains from #16988. I will address this after this PR is merged. 7 months ago
			`// Test are run sequentially so we can check the behaviour of streaming types`
			`// in.`
			`for _, tc := range tt {`
			`typeRef := enc.getTypeRef(tc.input)`
			`nameRef := enc.rawTypes[typeRef].NameRef`

			`assert.Equal(t, tc.expectRef, typeRef, "unexpected type ref for %s", tc.name)`
			`assert.Equal(t, tc.expectNameRef, nameRef, "unexpected name ref for %s", tc.name)`
			`}`

			`}`