Display replicas, cores and nodes in sizing tool. (#8088)

**What this PR does / why we need it**:
No every users wants to deploy Loki with Helm. That's why we are going
to display the required replicas as well.


![image](https://user-images.githubusercontent.com/2418440/212642958-17bc3fcd-0f6a-4ea8-8366-a3c28ddc6622.png)

This change also fixes two bugs in the algorithm and introduces some
tests for its invariant.

**Checklist**
- [ ] Reviewed the `CONTRIBUTING.md` guide
- [x] Documentation added
- [x] Tests updated
- [ ] `CHANGELOG.md` updated
- [ ] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/upgrading/_index.md`

Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Kaviraj <kavirajkanagaraj@gmail.com>
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
Signed-off-by: Ashwanth Goli <iamashwanth@gmail.com>
Co-authored-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
Co-authored-by: Kaviraj Kanagaraj <kavirajkanagaraj@gmail.com>
Co-authored-by: liam-howe-maersk <118169897+liam-howe-maersk@users.noreply.github.com>
Co-authored-by: JordanRushing <rushing.jordan@gmail.com>
Co-authored-by: Trevor Whitney <trevorjwhitney@gmail.com>
Co-authored-by: Periklis Tsirakidis <periklis@redhat.com>
Co-authored-by: Ashwanth Goli <iamashwanth@gmail.com>
pull/8182/head
Karsten Jeschkies 2 years ago committed by GitHub
parent da76b1394d
commit f24da4a194
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      cmd/logql-analyzer/main.go
  2. 47
      docs/sources/installation/sizing/index.md
  3. 27
      pkg/sizing/algorithm.go
  4. 25
      pkg/sizing/algorithm_test.go
  5. 4
      pkg/sizing/helm.go
  6. 29
      pkg/sizing/http.go
  7. 2
      pkg/sizing/node.go

@ -52,6 +52,7 @@ func createServer(cfg server.Config, logger log.Logger) (*server.Server, error)
s.HTTP.Handle("/api/sizing/helm", http.HandlerFunc(sizingHandler.GenerateHelmValues)).Methods(http.MethodGet, http.MethodOptions)
s.HTTP.Handle("/api/sizing/nodes", http.HandlerFunc(sizingHandler.Nodes)).Methods(http.MethodGet, http.MethodOptions)
s.HTTP.Handle("/api/sizing/cluster", http.HandlerFunc(sizingHandler.Cluster)).Methods(http.MethodGet, http.MethodOptions)
s.HTTP.HandleFunc("/ready", func(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "ready", http.StatusOK)

@ -20,18 +20,18 @@ This tool helps to generate a Helm Charts `values.yaml` file based on specified
<div id="app">
<label class="icon question" v-on:mouseover="help='node'" v-on:mouseleave="help=null">Node Type</label>
<label>Node Type<i class="fa fa-question" v-on:mouseover="help='node'" v-on:mouseleave="help=null"></i></label>
<select name="node-type" v-model="node">
<option v-for="node of nodes">{{ node }}</option>
</select>
<label class="fa fa-question" v-on:mouseover="help='ingest'" v-on:mouseleave="help=null">Ingest</label>
<label>Ingest<i class="fa fa-question" v-on:mouseover="help='ingest'" v-on:mouseleave="help=null"></i></label>
<input v-model="ingest" name="ingest" placeholder="Desired ingest in GiB/day" type="number" max="1048576" min="0"/>
<label class="fa fa-question" v-on:mouseover="help='retention'" v-on:mouseleave="help=null">Log retention period</label>
<label>Log retention period<i class="fa fa-question" v-on:mouseover="help='retention'" v-on:mouseleave="help=null"></i></label>
<input v-model="retention" name="retention" placeholder="Desired retention period in days" type="number" min="0"/>
<label class="fa fa-question" v-on:mouseover="help='queryperf'" v-on:mouseleave="help=null">Query performance</label>
<label>Query performance<i class="fa fa-question" v-on:mouseover="help='queryperf'" v-on:mouseleave="help=null"></i></label>
<div id="queryperf" style="display: inline-flex;">
<label for="basic">
<input type="radio" id="basic" value="Basic" v-model="queryperf"/>Basic
@ -42,6 +42,23 @@ This tool helps to generate a Helm Charts `values.yaml` file based on specified
</label>
</div>
<div v-if="clusterSize">
<table>
<tr>
<th>Read Replicas</th>
<th>Write Replicas</th>
<th>Nodes</th>
<th>Cores</th>
</tr>
<tr>
<td>{{ clusterSize.TotalReadReplicas }}</td>
<td>{{ clusterSize.TotalWriteReplicas }}</td>
<td>{{ clusterSize.TotalNodes}}</td>
<td>{{ clusterSize.TotalCoresRequest}}</td>
</tr>
</table>
</div>
<a v-bind:href="helmURL" class="primary-button">Generate and download values file</a>
<blockquote v-if="help">
@ -97,14 +114,18 @@ createApp({
ingest: null,
retention: null,
queryperf: 'Basic',
help: null
help: null,
clusterSize: null
}
},
computed: {
helmURL() {
return `${API_URL}/helm?${this.queryString}`
},
queryString() {
const bytesDayIngest = this.ingest * 1024 * 1024 * 1024
return `${API_URL}/helm?node-type=${encodeURIComponent(this.node)}&ingest=${encodeURIComponent(bytesDayIngest)}&retention=${encodeURIComponent(this.retention)}&queryperf=${encodeURIComponent(this.queryperf)}`
return `node-type=${encodeURIComponent(this.node)}&ingest=${encodeURIComponent(bytesDayIngest)}&retention=${encodeURIComponent(this.retention)}&queryperf=${encodeURIComponent(this.queryperf)}`
}
},
@ -117,7 +138,21 @@ createApp({
async fetchNodeTypes() {
const url = `${API_URL}/nodes`
this.nodes = await (await fetch(url,{mode: 'cors'})).json()
},
async calculateClusterSize() {
if (this.node == 'Loading...' || this.ingest == null || this.retention == null) {
return
}
const url = `${API_URL}/cluster?${this.queryString}`
this.clusterSize = await (await fetch(url,{mode: 'cors'})).json()
}
},
watch: {
node: 'calculateClusterSize',
ingest: 'calculateClusterSize',
retention: 'calculateClusterSize',
queryperf: 'calculateClusterSize'
}
}).mount('#app')
</script>

@ -5,9 +5,10 @@ import (
)
type ClusterSize struct {
totalNodes int
totalReadReplicas int
totalWriteReplicas int
TotalNodes int
TotalReadReplicas int
TotalWriteReplicas int
TotalCoresRequest float64
expectedMaxReadThroughputBytesSec float64
expectedMaxIngestBytesDay float64
@ -23,7 +24,7 @@ const (
func calculateClusterSize(nt NodeType, bytesDayIngest float64, qperf QueryPerf) ClusterSize {
// 1 Petabyte per day is maximum. We use decimal prefix https://en.wikipedia.org/wiki/Binary_prefix
bytesDayIngest = math.Min(bytesDayIngest, 1e12)
bytesDayIngest = math.Min(bytesDayIngest, 1e15)
bytesSecondIngest := bytesDayIngest / 86400
numWriteReplicasNeeded := math.Ceil(bytesSecondIngest / nt.writePod.rateBytesSecond)
@ -33,7 +34,7 @@ func calculateClusterSize(nt NodeType, bytesDayIngest float64, qperf QueryPerf)
replicasOnLastNode := math.Mod(numWriteReplicasNeeded, writeReplicasPerNode)
coresOnLastNode := 0.0
if replicasOnLastNode >= 0.0 {
if replicasOnLastNode > 0.0 {
coresOnLastNode = math.Max(float64(nt.cores)-replicasOnLastNode*nt.writePod.cpuRequest, 0.0)
}
@ -44,12 +45,13 @@ func calculateClusterSize(nt NodeType, bytesDayIngest float64, qperf QueryPerf)
readReplicasOnFullyPackedWriteNodes := readReplicasPerNode * fullyWritePackedNodes
readReplicasOnPartiallyPackedWriteNodes := math.Floor(coresOnLastNode / nt.readPod.cpuRequest)
basicQperfReadReplicas := readReplicasOnFullyPackedWriteNodes + readReplicasOnPartiallyPackedWriteNodes
// Required read replicase without considering required query performance.
baselineReadReplicas := readReplicasOnFullyPackedWriteNodes + readReplicasOnPartiallyPackedWriteNodes
scaleUp := 0.25
additionalReadReplicas := 0.0
if qperf != Basic {
additionalReadReplicas = basicQperfReadReplicas * scaleUp
additionalReadReplicas = baselineReadReplicas * scaleUp
}
readReplicasPerEmptyNode := math.Floor(float64(nt.cores) / nt.readPod.cpuRequest)
@ -58,15 +60,16 @@ func calculateClusterSize(nt NodeType, bytesDayIngest float64, qperf QueryPerf)
actualNodesAddedForReads := calculateActualReadNodes(additionalNodesNeededForReads)
actualReadReplicasAdded := actualNodesAddedForReads * readReplicasPerEmptyNode
totalReadReplicas := actualReadReplicasAdded + basicQperfReadReplicas
totalReadReplicas := actualReadReplicasAdded + baselineReadReplicas
totalReadThroughputBytesSec := totalReadReplicas * nt.readPod.rateBytesSecond
totalNodesNeeded := nodesNeededForWrites + actualNodesAddedForReads
totalCoresLimit := numWriteReplicasNeeded*nt.writePod.cpuRequest + totalReadReplicas*nt.readPod.cpuRequest
return ClusterSize{
totalNodes: int(totalNodesNeeded),
totalReadReplicas: int(totalReadReplicas),
totalWriteReplicas: int(numWriteReplicasNeeded),
TotalNodes: int(totalNodesNeeded),
TotalReadReplicas: int(totalReadReplicas),
TotalWriteReplicas: int(numWriteReplicasNeeded),
TotalCoresRequest: totalCoresLimit,
expectedMaxReadThroughputBytesSec: totalReadThroughputBytesSec,
expectedMaxIngestBytesDay: (nt.writePod.rateBytesSecond * numWriteReplicasNeeded) * 86400,

@ -7,7 +7,7 @@ import (
"github.com/stretchr/testify/require"
)
func Test_AlgorithmTest_Algorithm(t *testing.T) {
func Test_Algorithm(t *testing.T) {
f := func(ingest float64) bool {
if ingest < 0 {
ingest = -ingest
@ -16,12 +16,12 @@ func Test_AlgorithmTest_Algorithm(t *testing.T) {
for _, cloud := range NodeTypesByProvider {
for _, node := range cloud {
size := calculateClusterSize(node, ingest, Basic)
postiveReplicas = size.totalNodes > 0.0 && size.totalReadReplicas > 0.0 && size.totalWriteReplicas > 0.0
postiveReplicas = size.TotalNodes > 0.0 && size.TotalReadReplicas > 0.0 && size.TotalWriteReplicas > 0.0
require.Truef(t, postiveReplicas, "Cluster size was empty: ingest=%d cluster=%v node=%v", ingest, size, node)
require.InDelta(t, size.totalReadReplicas, size.totalWriteReplicas, 5.0, "Replicas have different sizes: ingest=%d node=%s", ingest, node.name)
require.InDelta(t, size.TotalReadReplicas, size.TotalWriteReplicas, 5.0, "Replicas have different sizes: ingest=%d node=%s", ingest, node.name)
size = calculateClusterSize(node, ingest, Super)
postiveReplicas = size.totalNodes > 0.0 && size.totalReadReplicas > 0.0 && size.totalWriteReplicas > 0.0
postiveReplicas = size.TotalNodes > 0.0 && size.TotalReadReplicas > 0.0 && size.TotalWriteReplicas > 0.0
require.Truef(t, postiveReplicas, "Cluster size was empty: ingest=%d cluster=%v node=%v", ingest, size, node)
}
}
@ -32,4 +32,21 @@ func Test_AlgorithmTest_Algorithm(t *testing.T) {
if err := quick.Check(f, nil); err != nil {
t.Error(err)
}
// Sanity check for 1TB/Day
size := calculateClusterSize(NodeTypesByProvider["AWS"]["t2.xlarge"], 1e12, Basic)
require.Equalf(t, 4, size.TotalNodes, "given ingest=1PB/Day totla nodes must be big")
}
func Test_CoresNodeInvariant(t *testing.T) {
for _, queryPerformance := range []QueryPerf{Basic, Super} {
for _, ingest := range []float64{30, 300, 1000, 2000} {
for _, cloud := range NodeTypesByProvider {
for _, node := range cloud {
size := calculateClusterSize(node, ingest, queryPerformance)
require.LessOrEqualf(t, size.TotalCoresRequest, float64(size.TotalNodes*node.cores), "given ingest=%d node=%s total cores must be less than available cores", ingest, node.name)
}
}
}
}
}

@ -50,7 +50,7 @@ func constructHelmValues(cluster ClusterSize, nodeType NodeType) Values {
AuthEnabled: false,
},
Read: Read{
Replicas: cluster.totalReadReplicas,
Replicas: cluster.TotalReadReplicas,
Resources: Resources{
Requests: struct {
CPU float64 `json:"cpu"`
@ -69,7 +69,7 @@ func constructHelmValues(cluster ClusterSize, nodeType NodeType) Values {
},
},
Write: Write{
Replicas: cluster.totalWriteReplicas,
Replicas: cluster.TotalWriteReplicas,
Resources: Resources{
Requests: struct {
CPU float64 `json:"cpu"`

@ -33,12 +33,12 @@ func decodeMesage(req *http.Request, msg *Message) error {
msg.Ingest, err = strconv.Atoi(req.FormValue("ingest"))
if err != nil {
return err
return fmt.Errorf("cannot read ingest: %w", err)
}
msg.Retention, err = strconv.Atoi(req.FormValue("retention"))
if err != nil {
return err
return fmt.Errorf("cannot read retention: %w", err)
}
msg.QueryPerformance = QueryPerf(strings.ToLower(req.FormValue("queryperf")))
@ -73,7 +73,7 @@ func (h *Handler) GenerateHelmValues(w http.ResponseWriter, req *http.Request) {
enc := yaml.NewEncoder(w)
err = enc.Encode(helm)
if err != nil {
level.Error(h.logger).Log("msg", "could not encode Helm Char values", "error", err)
level.Error(h.logger).Log("msg", "could not encode Helm Chart values", "error", err)
}
}
@ -88,7 +88,7 @@ func (h *Handler) Nodes(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Content-Type", "application/json")
err := json.NewEncoder(w).Encode(nodes)
if err != nil {
level.Error(h.logger).Log("msg", "could not encode Helm Char values", "error", err)
level.Error(h.logger).Log("msg", "could not encode node values", "error", err)
}
}
@ -96,6 +96,25 @@ func (h *Handler) respondError(w http.ResponseWriter, err error) {
w.WriteHeader(http.StatusBadRequest)
_, err = w.Write([]byte(fmt.Sprintf("error: %v", err)))
if err != nil {
level.Error(h.logger).Log("msg", "could not encode Helm Char values", "error", err)
level.Error(h.logger).Log("msg", "could not write error message", "error", err)
}
}
func (h *Handler) Cluster(w http.ResponseWriter, req *http.Request) {
var msg Message
err := decodeMesage(req, &msg)
if err != nil {
level.Error(h.logger).Log("error", err)
h.respondError(w, err)
return
}
cluster := calculateClusterSize(msg.NodeType, float64(msg.Ingest), msg.QueryPerformance)
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(cluster)
if err != nil {
level.Error(h.logger).Log("msg", "could not encode cluster size", "error", err)
}
}

@ -26,7 +26,7 @@ var StandardWrite = NodePod{
var StandardRead = NodePod{
cpuRequest: 3,
cpuLimit: 0, // Undefined
cpuLimit: 3, // Undefined TODO: Is this a bug
memoryRequest: 6,
memoryLimit: 8,
rateBytesSecond: 768 * 1024 * 1024,

Loading…
Cancel
Save