fix(deps): update module github.com/parquet-go/parquet-go to v0.25.1 (main) (#18013)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
pull/18016/head
renovate[bot] 7 months ago committed by GitHub
parent 9d850faf3d
commit 97ea2b60b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      go.mod
  2. 7
      go.sum
  3. 15
      vendor/github.com/olekukonko/tablewriter/.gitignore
  4. 22
      vendor/github.com/olekukonko/tablewriter/.travis.yml
  5. 19
      vendor/github.com/olekukonko/tablewriter/LICENSE.md
  6. 431
      vendor/github.com/olekukonko/tablewriter/README.md
  7. 52
      vendor/github.com/olekukonko/tablewriter/csv.go
  8. 967
      vendor/github.com/olekukonko/tablewriter/table.go
  9. 136
      vendor/github.com/olekukonko/tablewriter/table_with_color.go
  10. 93
      vendor/github.com/olekukonko/tablewriter/util.go
  11. 99
      vendor/github.com/olekukonko/tablewriter/wrap.go
  12. 9
      vendor/github.com/parquet-go/parquet-go/Makefile
  13. 2
      vendor/github.com/parquet-go/parquet-go/README.md
  14. 15
      vendor/github.com/parquet-go/parquet-go/buffer.go
  15. 107
      vendor/github.com/parquet-go/parquet-go/column_buffer.go
  16. 11
      vendor/github.com/parquet-go/parquet-go/column_path.go
  17. 77
      vendor/github.com/parquet-go/parquet-go/config.go
  18. 4
      vendor/github.com/parquet-go/parquet-go/convert.go
  19. 11
      vendor/github.com/parquet-go/parquet-go/dictionary.go
  20. 2
      vendor/github.com/parquet-go/parquet-go/dictionary_amd64.go
  21. 17
      vendor/github.com/parquet-go/parquet-go/encoding.go
  22. 7
      vendor/github.com/parquet-go/parquet-go/encoding/delta/byte_array.go
  23. 5
      vendor/github.com/parquet-go/parquet-go/encoding/delta/delta.go
  24. 2
      vendor/github.com/parquet-go/parquet-go/encoding/notsupported.go
  25. 5
      vendor/github.com/parquet-go/parquet-go/encoding/rle/rle.go
  26. 4
      vendor/github.com/parquet-go/parquet-go/encoding/thrift/debug.go
  27. 16
      vendor/github.com/parquet-go/parquet-go/encoding/thrift/decode.go
  28. 6
      vendor/github.com/parquet-go/parquet-go/encoding/thrift/encode.go
  29. 36
      vendor/github.com/parquet-go/parquet-go/file.go
  30. 14
      vendor/github.com/parquet-go/parquet-go/filter.go
  31. 24
      vendor/github.com/parquet-go/parquet-go/go.tools.mod
  32. 26
      vendor/github.com/parquet-go/parquet-go/go.tools.sum
  33. 2
      vendor/github.com/parquet-go/parquet-go/internal/debug/finalizer_on.go
  34. 15
      vendor/github.com/parquet-go/parquet-go/merge.go
  35. 8
      vendor/github.com/parquet-go/parquet-go/node.go
  36. 4
      vendor/github.com/parquet-go/parquet-go/null.go
  37. 2
      vendor/github.com/parquet-go/parquet-go/null_purego.go
  38. 39
      vendor/github.com/parquet-go/parquet-go/page.go
  39. 60
      vendor/github.com/parquet-go/parquet-go/print.go
  40. 2
      vendor/github.com/parquet-go/parquet-go/reader.go
  41. 4
      vendor/github.com/parquet-go/parquet-go/row.go
  42. 26
      vendor/github.com/parquet-go/parquet-go/schema.go
  43. 2
      vendor/github.com/parquet-go/parquet-go/search.go
  44. 7
      vendor/github.com/parquet-go/parquet-go/sorting.go
  45. 5
      vendor/github.com/parquet-go/parquet-go/transform.go
  46. 42
      vendor/github.com/parquet-go/parquet-go/type.go
  47. 2
      vendor/github.com/parquet-go/parquet-go/value.go
  48. 131
      vendor/github.com/parquet-go/parquet-go/writer.go
  49. 5
      vendor/modules.txt

@ -135,7 +135,7 @@ require (
github.com/influxdata/tdigest v0.0.2-0.20210216194612-fc98d27c9e8b
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db
github.com/ncw/swift/v2 v2.0.4
github.com/parquet-go/parquet-go v0.25.0
github.com/parquet-go/parquet-go v0.25.1
github.com/prometheus/alertmanager v0.28.1
github.com/prometheus/common/sigv4 v0.1.0
github.com/prometheus/otlptranslator v0.0.0-20250414121140-35db323fe9fb
@ -217,7 +217,6 @@ require (
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/ncw/swift v1.0.53 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/open-telemetry/opentelemetry-collector-contrib/internal/exp/metrics v0.116.0 // indirect
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.116.0 // indirect
github.com/open-telemetry/opentelemetry-collector-contrib/processor/deltatocumulativeprocessor v0.116.0 // indirect

@ -899,7 +899,6 @@ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
@ -996,8 +995,6 @@ github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DV
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@ -1045,8 +1042,8 @@ github.com/oschwald/maxminddb-golang v1.13.0/go.mod h1:BU0z8BfFVhi1LQaonTwwGQlsH
github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI=
github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c=
github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM=
github.com/parquet-go/parquet-go v0.25.0 h1:GwKy11MuF+al/lV6nUsFw8w8HCiPOSAx1/y8yFxjH5c=
github.com/parquet-go/parquet-go v0.25.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw=
github.com/parquet-go/parquet-go v0.25.1 h1:l7jJwNM0xrk0cnIIptWMtnSnuxRkwq53S+Po3KG8Xgo=
github.com/parquet-go/parquet-go v0.25.1/go.mod h1:AXBuotO1XiBtcqJb/FKFyjBG4aqa3aQAAWF3ZPzCanY=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=

@ -1,15 +0,0 @@
# Created by .ignore support plugin (hsz.mobi)
### Go template
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out

@ -1,22 +0,0 @@
language: go
arch:
- ppc64le
- amd64
go:
- 1.3
- 1.4
- 1.5
- 1.6
- 1.7
- 1.8
- 1.9
- "1.10"
- tip
jobs:
exclude :
- arch : ppc64le
go :
- 1.3
- arch : ppc64le
go :
- 1.4

@ -1,19 +0,0 @@
Copyright (C) 2014 by Oleku Konko
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

@ -1,431 +0,0 @@
ASCII Table Writer
=========
[![Build Status](https://travis-ci.org/olekukonko/tablewriter.png?branch=master)](https://travis-ci.org/olekukonko/tablewriter)
[![Total views](https://img.shields.io/sourcegraph/rrc/github.com/olekukonko/tablewriter.svg)](https://sourcegraph.com/github.com/olekukonko/tablewriter)
[![Godoc](https://godoc.org/github.com/olekukonko/tablewriter?status.svg)](https://godoc.org/github.com/olekukonko/tablewriter)
Generate ASCII table on the fly ... Installation is simple as
go get github.com/olekukonko/tablewriter
#### Features
- Automatic Padding
- Support Multiple Lines
- Supports Alignment
- Support Custom Separators
- Automatic Alignment of numbers & percentage
- Write directly to http , file etc via `io.Writer`
- Read directly from CSV file
- Optional row line via `SetRowLine`
- Normalise table header
- Make CSV Headers optional
- Enable or disable table border
- Set custom footer support
- Optional identical cells merging
- Set custom caption
- Optional reflowing of paragraphs in multi-line cells.
#### Example 1 - Basic
```go
data := [][]string{
[]string{"A", "The Good", "500"},
[]string{"B", "The Very very Bad Man", "288"},
[]string{"C", "The Ugly", "120"},
[]string{"D", "The Gopher", "800"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Name", "Sign", "Rating"})
for _, v := range data {
table.Append(v)
}
table.Render() // Send output
```
##### Output 1
```
+------+-----------------------+--------+
| NAME | SIGN | RATING |
+------+-----------------------+--------+
| A | The Good | 500 |
| B | The Very very Bad Man | 288 |
| C | The Ugly | 120 |
| D | The Gopher | 800 |
+------+-----------------------+--------+
```
#### Example 2 - Without Border / Footer / Bulk Append
```go
data := [][]string{
[]string{"1/1/2014", "Domain name", "2233", "$10.98"},
[]string{"1/1/2014", "January Hosting", "2233", "$54.95"},
[]string{"1/4/2014", "February Hosting", "2233", "$51.00"},
[]string{"1/4/2014", "February Extra Bandwidth", "2233", "$30.00"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Date", "Description", "CV2", "Amount"})
table.SetFooter([]string{"", "", "Total", "$146.93"}) // Add Footer
table.SetBorder(false) // Set Border to false
table.AppendBulk(data) // Add Bulk Data
table.Render()
```
##### Output 2
```
DATE | DESCRIPTION | CV2 | AMOUNT
-----------+--------------------------+-------+----------
1/1/2014 | Domain name | 2233 | $10.98
1/1/2014 | January Hosting | 2233 | $54.95
1/4/2014 | February Hosting | 2233 | $51.00
1/4/2014 | February Extra Bandwidth | 2233 | $30.00
-----------+--------------------------+-------+----------
TOTAL | $146 93
--------+----------
```
#### Example 3 - CSV
```go
table, _ := tablewriter.NewCSV(os.Stdout, "testdata/test_info.csv", true)
table.SetAlignment(tablewriter.ALIGN_LEFT) // Set Alignment
table.Render()
```
##### Output 3
```
+----------+--------------+------+-----+---------+----------------+
| FIELD | TYPE | NULL | KEY | DEFAULT | EXTRA |
+----------+--------------+------+-----+---------+----------------+
| user_id | smallint(5) | NO | PRI | NULL | auto_increment |
| username | varchar(10) | NO | | NULL | |
| password | varchar(100) | NO | | NULL | |
+----------+--------------+------+-----+---------+----------------+
```
#### Example 4 - Custom Separator
```go
table, _ := tablewriter.NewCSV(os.Stdout, "testdata/test.csv", true)
table.SetRowLine(true) // Enable row line
// Change table lines
table.SetCenterSeparator("*")
table.SetColumnSeparator("╪")
table.SetRowSeparator("-")
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.Render()
```
##### Output 4
```
*------------*-----------*---------*
╪ FIRST NAME ╪ LAST NAME ╪ SSN ╪
*------------*-----------*---------*
╪ John ╪ Barry ╪ 123456 ╪
*------------*-----------*---------*
╪ Kathy ╪ Smith ╪ 687987 ╪
*------------*-----------*---------*
╪ Bob ╪ McCornick ╪ 3979870 ╪
*------------*-----------*---------*
```
#### Example 5 - Markdown Format
```go
data := [][]string{
[]string{"1/1/2014", "Domain name", "2233", "$10.98"},
[]string{"1/1/2014", "January Hosting", "2233", "$54.95"},
[]string{"1/4/2014", "February Hosting", "2233", "$51.00"},
[]string{"1/4/2014", "February Extra Bandwidth", "2233", "$30.00"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Date", "Description", "CV2", "Amount"})
table.SetBorders(tablewriter.Border{Left: true, Top: false, Right: true, Bottom: false})
table.SetCenterSeparator("|")
table.AppendBulk(data) // Add Bulk Data
table.Render()
```
##### Output 5
```
| DATE | DESCRIPTION | CV2 | AMOUNT |
|----------|--------------------------|------|--------|
| 1/1/2014 | Domain name | 2233 | $10.98 |
| 1/1/2014 | January Hosting | 2233 | $54.95 |
| 1/4/2014 | February Hosting | 2233 | $51.00 |
| 1/4/2014 | February Extra Bandwidth | 2233 | $30.00 |
```
#### Example 6 - Identical cells merging
```go
data := [][]string{
[]string{"1/1/2014", "Domain name", "1234", "$10.98"},
[]string{"1/1/2014", "January Hosting", "2345", "$54.95"},
[]string{"1/4/2014", "February Hosting", "3456", "$51.00"},
[]string{"1/4/2014", "February Extra Bandwidth", "4567", "$30.00"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Date", "Description", "CV2", "Amount"})
table.SetFooter([]string{"", "", "Total", "$146.93"})
table.SetAutoMergeCells(true)
table.SetRowLine(true)
table.AppendBulk(data)
table.Render()
```
##### Output 6
```
+----------+--------------------------+-------+---------+
| DATE | DESCRIPTION | CV2 | AMOUNT |
+----------+--------------------------+-------+---------+
| 1/1/2014 | Domain name | 1234 | $10.98 |
+ +--------------------------+-------+---------+
| | January Hosting | 2345 | $54.95 |
+----------+--------------------------+-------+---------+
| 1/4/2014 | February Hosting | 3456 | $51.00 |
+ +--------------------------+-------+---------+
| | February Extra Bandwidth | 4567 | $30.00 |
+----------+--------------------------+-------+---------+
| TOTAL | $146 93 |
+----------+--------------------------+-------+---------+
```
#### Example 7 - Identical cells merging (specify the column index to merge)
```go
data := [][]string{
[]string{"1/1/2014", "Domain name", "1234", "$10.98"},
[]string{"1/1/2014", "January Hosting", "1234", "$10.98"},
[]string{"1/4/2014", "February Hosting", "3456", "$51.00"},
[]string{"1/4/2014", "February Extra Bandwidth", "4567", "$30.00"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Date", "Description", "CV2", "Amount"})
table.SetFooter([]string{"", "", "Total", "$146.93"})
table.SetAutoMergeCellsByColumnIndex([]int{2, 3})
table.SetRowLine(true)
table.AppendBulk(data)
table.Render()
```
##### Output 7
```
+----------+--------------------------+-------+---------+
| DATE | DESCRIPTION | CV2 | AMOUNT |
+----------+--------------------------+-------+---------+
| 1/1/2014 | Domain name | 1234 | $10.98 |
+----------+--------------------------+ + +
| 1/1/2014 | January Hosting | | |
+----------+--------------------------+-------+---------+
| 1/4/2014 | February Hosting | 3456 | $51.00 |
+----------+--------------------------+-------+---------+
| 1/4/2014 | February Extra Bandwidth | 4567 | $30.00 |
+----------+--------------------------+-------+---------+
| TOTAL | $146.93 |
+----------+--------------------------+-------+---------+
```
#### Table with color
```go
data := [][]string{
[]string{"1/1/2014", "Domain name", "2233", "$10.98"},
[]string{"1/1/2014", "January Hosting", "2233", "$54.95"},
[]string{"1/4/2014", "February Hosting", "2233", "$51.00"},
[]string{"1/4/2014", "February Extra Bandwidth", "2233", "$30.00"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Date", "Description", "CV2", "Amount"})
table.SetFooter([]string{"", "", "Total", "$146.93"}) // Add Footer
table.SetBorder(false) // Set Border to false
table.SetHeaderColor(tablewriter.Colors{tablewriter.Bold, tablewriter.BgGreenColor},
tablewriter.Colors{tablewriter.FgHiRedColor, tablewriter.Bold, tablewriter.BgBlackColor},
tablewriter.Colors{tablewriter.BgRedColor, tablewriter.FgWhiteColor},
tablewriter.Colors{tablewriter.BgCyanColor, tablewriter.FgWhiteColor})
table.SetColumnColor(tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiRedColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgBlackColor})
table.SetFooterColor(tablewriter.Colors{}, tablewriter.Colors{},
tablewriter.Colors{tablewriter.Bold},
tablewriter.Colors{tablewriter.FgHiRedColor})
table.AppendBulk(data)
table.Render()
```
#### Table with color Output
![Table with Color](https://cloud.githubusercontent.com/assets/6460392/21101956/bbc7b356-c0a1-11e6-9f36-dba694746efc.png)
#### Example - 8 Table Cells with Color
Individual Cell Colors from `func Rich` take precedence over Column Colors
```go
data := [][]string{
[]string{"Test1Merge", "HelloCol2 - 1", "HelloCol3 - 1", "HelloCol4 - 1"},
[]string{"Test1Merge", "HelloCol2 - 2", "HelloCol3 - 2", "HelloCol4 - 2"},
[]string{"Test1Merge", "HelloCol2 - 3", "HelloCol3 - 3", "HelloCol4 - 3"},
[]string{"Test2Merge", "HelloCol2 - 4", "HelloCol3 - 4", "HelloCol4 - 4"},
[]string{"Test2Merge", "HelloCol2 - 5", "HelloCol3 - 5", "HelloCol4 - 5"},
[]string{"Test2Merge", "HelloCol2 - 6", "HelloCol3 - 6", "HelloCol4 - 6"},
[]string{"Test2Merge", "HelloCol2 - 7", "HelloCol3 - 7", "HelloCol4 - 7"},
[]string{"Test3Merge", "HelloCol2 - 8", "HelloCol3 - 8", "HelloCol4 - 8"},
[]string{"Test3Merge", "HelloCol2 - 9", "HelloCol3 - 9", "HelloCol4 - 9"},
[]string{"Test3Merge", "HelloCol2 - 10", "HelloCol3 -10", "HelloCol4 - 10"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Col1", "Col2", "Col3", "Col4"})
table.SetFooter([]string{"", "", "Footer3", "Footer4"})
table.SetBorder(false)
table.SetHeaderColor(tablewriter.Colors{tablewriter.Bold, tablewriter.BgGreenColor},
tablewriter.Colors{tablewriter.FgHiRedColor, tablewriter.Bold, tablewriter.BgBlackColor},
tablewriter.Colors{tablewriter.BgRedColor, tablewriter.FgWhiteColor},
tablewriter.Colors{tablewriter.BgCyanColor, tablewriter.FgWhiteColor})
table.SetColumnColor(tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiRedColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgBlackColor})
table.SetFooterColor(tablewriter.Colors{}, tablewriter.Colors{},
tablewriter.Colors{tablewriter.Bold},
tablewriter.Colors{tablewriter.FgHiRedColor})
colorData1 := []string{"TestCOLOR1Merge", "HelloCol2 - COLOR1", "HelloCol3 - COLOR1", "HelloCol4 - COLOR1"}
colorData2 := []string{"TestCOLOR2Merge", "HelloCol2 - COLOR2", "HelloCol3 - COLOR2", "HelloCol4 - COLOR2"}
for i, row := range data {
if i == 4 {
table.Rich(colorData1, []tablewriter.Colors{tablewriter.Colors{}, tablewriter.Colors{tablewriter.Normal, tablewriter.FgCyanColor}, tablewriter.Colors{tablewriter.Bold, tablewriter.FgWhiteColor}, tablewriter.Colors{}})
table.Rich(colorData2, []tablewriter.Colors{tablewriter.Colors{tablewriter.Normal, tablewriter.FgMagentaColor}, tablewriter.Colors{}, tablewriter.Colors{tablewriter.Bold, tablewriter.BgRedColor}, tablewriter.Colors{tablewriter.FgHiGreenColor, tablewriter.Italic, tablewriter.BgHiCyanColor}})
}
table.Append(row)
}
table.SetAutoMergeCells(true)
table.Render()
```
##### Table cells with color Output
![Table cells with Color](https://user-images.githubusercontent.com/9064687/63969376-bcd88d80-ca6f-11e9-9466-c3d954700b25.png)
#### Example 9 - Set table caption
```go
data := [][]string{
[]string{"A", "The Good", "500"},
[]string{"B", "The Very very Bad Man", "288"},
[]string{"C", "The Ugly", "120"},
[]string{"D", "The Gopher", "800"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Name", "Sign", "Rating"})
table.SetCaption(true, "Movie ratings.")
for _, v := range data {
table.Append(v)
}
table.Render() // Send output
```
Note: Caption text will wrap with total width of rendered table.
##### Output 9
```
+------+-----------------------+--------+
| NAME | SIGN | RATING |
+------+-----------------------+--------+
| A | The Good | 500 |
| B | The Very very Bad Man | 288 |
| C | The Ugly | 120 |
| D | The Gopher | 800 |
+------+-----------------------+--------+
Movie ratings.
```
#### Example 10 - Set NoWhiteSpace and TablePadding option
```go
data := [][]string{
{"node1.example.com", "Ready", "compute", "1.11"},
{"node2.example.com", "Ready", "compute", "1.11"},
{"node3.example.com", "Ready", "compute", "1.11"},
{"node4.example.com", "NotReady", "compute", "1.11"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Name", "Status", "Role", "Version"})
table.SetAutoWrapText(false)
table.SetAutoFormatHeaders(true)
table.SetHeaderAlignment(ALIGN_LEFT)
table.SetAlignment(ALIGN_LEFT)
table.SetCenterSeparator("")
table.SetColumnSeparator("")
table.SetRowSeparator("")
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetTablePadding("\t") // pad with tabs
table.SetNoWhiteSpace(true)
table.AppendBulk(data) // Add Bulk Data
table.Render()
```
##### Output 10
```
NAME STATUS ROLE VERSION
node1.example.com Ready compute 1.11
node2.example.com Ready compute 1.11
node3.example.com Ready compute 1.11
node4.example.com NotReady compute 1.11
```
#### Render table into a string
Instead of rendering the table to `io.Stdout` you can also render it into a string. Go 1.10 introduced the `strings.Builder` type which implements the `io.Writer` interface and can therefore be used for this task. Example:
```go
package main
import (
"strings"
"fmt"
"github.com/olekukonko/tablewriter"
)
func main() {
tableString := &strings.Builder{}
table := tablewriter.NewWriter(tableString)
/*
* Code to fill the table
*/
table.Render()
fmt.Println(tableString.String())
}
```
#### TODO
- ~~Import Directly from CSV~~ - `done`
- ~~Support for `SetFooter`~~ - `done`
- ~~Support for `SetBorder`~~ - `done`
- ~~Support table with uneven rows~~ - `done`
- ~~Support custom alignment~~
- General Improvement & Optimisation
- `NewHTML` Parse table from HTML

@ -1,52 +0,0 @@
// Copyright 2014 Oleku Konko All rights reserved.
// Use of this source code is governed by a MIT
// license that can be found in the LICENSE file.
// This module is a Table Writer API for the Go Programming Language.
// The protocols were written in pure Go and works on windows and unix systems
package tablewriter
import (
"encoding/csv"
"io"
"os"
)
// Start A new table by importing from a CSV file
// Takes io.Writer and csv File name
func NewCSV(writer io.Writer, fileName string, hasHeader bool) (*Table, error) {
file, err := os.Open(fileName)
if err != nil {
return &Table{}, err
}
defer file.Close()
csvReader := csv.NewReader(file)
t, err := NewCSVReader(writer, csvReader, hasHeader)
return t, err
}
// Start a New Table Writer with csv.Reader
// This enables customisation such as reader.Comma = ';'
// See http://golang.org/src/pkg/encoding/csv/reader.go?s=3213:3671#L94
func NewCSVReader(writer io.Writer, csvReader *csv.Reader, hasHeader bool) (*Table, error) {
t := NewWriter(writer)
if hasHeader {
// Read the first row
headers, err := csvReader.Read()
if err != nil {
return &Table{}, err
}
t.SetHeader(headers)
}
for {
record, err := csvReader.Read()
if err == io.EOF {
break
} else if err != nil {
return &Table{}, err
}
t.Append(record)
}
return t, nil
}

@ -1,967 +0,0 @@
// Copyright 2014 Oleku Konko All rights reserved.
// Use of this source code is governed by a MIT
// license that can be found in the LICENSE file.
// This module is a Table Writer API for the Go Programming Language.
// The protocols were written in pure Go and works on windows and unix systems
// Create & Generate text based table
package tablewriter
import (
"bytes"
"fmt"
"io"
"regexp"
"strings"
)
const (
MAX_ROW_WIDTH = 30
)
const (
CENTER = "+"
ROW = "-"
COLUMN = "|"
SPACE = " "
NEWLINE = "\n"
)
const (
ALIGN_DEFAULT = iota
ALIGN_CENTER
ALIGN_RIGHT
ALIGN_LEFT
)
var (
decimal = regexp.MustCompile(`^-?(?:\d{1,3}(?:,\d{3})*|\d+)(?:\.\d+)?$`)
percent = regexp.MustCompile(`^-?\d+\.?\d*$%$`)
)
type Border struct {
Left bool
Right bool
Top bool
Bottom bool
}
type Table struct {
out io.Writer
rows [][]string
lines [][][]string
cs map[int]int
rs map[int]int
headers [][]string
footers [][]string
caption bool
captionText string
autoFmt bool
autoWrap bool
reflowText bool
mW int
pCenter string
pRow string
pColumn string
tColumn int
tRow int
hAlign int
fAlign int
align int
newLine string
rowLine bool
autoMergeCells bool
columnsToAutoMergeCells map[int]bool
noWhiteSpace bool
tablePadding string
hdrLine bool
borders Border
colSize int
headerParams []string
columnsParams []string
footerParams []string
columnsAlign []int
}
// Start New Table
// Take io.Writer Directly
func NewWriter(writer io.Writer) *Table {
t := &Table{
out: writer,
rows: [][]string{},
lines: [][][]string{},
cs: make(map[int]int),
rs: make(map[int]int),
headers: [][]string{},
footers: [][]string{},
caption: false,
captionText: "Table caption.",
autoFmt: true,
autoWrap: true,
reflowText: true,
mW: MAX_ROW_WIDTH,
pCenter: CENTER,
pRow: ROW,
pColumn: COLUMN,
tColumn: -1,
tRow: -1,
hAlign: ALIGN_DEFAULT,
fAlign: ALIGN_DEFAULT,
align: ALIGN_DEFAULT,
newLine: NEWLINE,
rowLine: false,
hdrLine: true,
borders: Border{Left: true, Right: true, Bottom: true, Top: true},
colSize: -1,
headerParams: []string{},
columnsParams: []string{},
footerParams: []string{},
columnsAlign: []int{}}
return t
}
// Render table output
func (t *Table) Render() {
if t.borders.Top {
t.printLine(true)
}
t.printHeading()
if t.autoMergeCells {
t.printRowsMergeCells()
} else {
t.printRows()
}
if !t.rowLine && t.borders.Bottom {
t.printLine(true)
}
t.printFooter()
if t.caption {
t.printCaption()
}
}
const (
headerRowIdx = -1
footerRowIdx = -2
)
// Set table header
func (t *Table) SetHeader(keys []string) {
t.colSize = len(keys)
for i, v := range keys {
lines := t.parseDimension(v, i, headerRowIdx)
t.headers = append(t.headers, lines)
}
}
// Set table Footer
func (t *Table) SetFooter(keys []string) {
//t.colSize = len(keys)
for i, v := range keys {
lines := t.parseDimension(v, i, footerRowIdx)
t.footers = append(t.footers, lines)
}
}
// Set table Caption
func (t *Table) SetCaption(caption bool, captionText ...string) {
t.caption = caption
if len(captionText) == 1 {
t.captionText = captionText[0]
}
}
// Turn header autoformatting on/off. Default is on (true).
func (t *Table) SetAutoFormatHeaders(auto bool) {
t.autoFmt = auto
}
// Turn automatic multiline text adjustment on/off. Default is on (true).
func (t *Table) SetAutoWrapText(auto bool) {
t.autoWrap = auto
}
// Turn automatic reflowing of multiline text when rewrapping. Default is on (true).
func (t *Table) SetReflowDuringAutoWrap(auto bool) {
t.reflowText = auto
}
// Set the Default column width
func (t *Table) SetColWidth(width int) {
t.mW = width
}
// Set the minimal width for a column
func (t *Table) SetColMinWidth(column int, width int) {
t.cs[column] = width
}
// Set the Column Separator
func (t *Table) SetColumnSeparator(sep string) {
t.pColumn = sep
}
// Set the Row Separator
func (t *Table) SetRowSeparator(sep string) {
t.pRow = sep
}
// Set the center Separator
func (t *Table) SetCenterSeparator(sep string) {
t.pCenter = sep
}
// Set Header Alignment
func (t *Table) SetHeaderAlignment(hAlign int) {
t.hAlign = hAlign
}
// Set Footer Alignment
func (t *Table) SetFooterAlignment(fAlign int) {
t.fAlign = fAlign
}
// Set Table Alignment
func (t *Table) SetAlignment(align int) {
t.align = align
}
// Set No White Space
func (t *Table) SetNoWhiteSpace(allow bool) {
t.noWhiteSpace = allow
}
// Set Table Padding
func (t *Table) SetTablePadding(padding string) {
t.tablePadding = padding
}
func (t *Table) SetColumnAlignment(keys []int) {
for _, v := range keys {
switch v {
case ALIGN_CENTER:
break
case ALIGN_LEFT:
break
case ALIGN_RIGHT:
break
default:
v = ALIGN_DEFAULT
}
t.columnsAlign = append(t.columnsAlign, v)
}
}
// Set New Line
func (t *Table) SetNewLine(nl string) {
t.newLine = nl
}
// Set Header Line
// This would enable / disable a line after the header
func (t *Table) SetHeaderLine(line bool) {
t.hdrLine = line
}
// Set Row Line
// This would enable / disable a line on each row of the table
func (t *Table) SetRowLine(line bool) {
t.rowLine = line
}
// Set Auto Merge Cells
// This would enable / disable the merge of cells with identical values
func (t *Table) SetAutoMergeCells(auto bool) {
t.autoMergeCells = auto
}
// Set Auto Merge Cells By Column Index
// This would enable / disable the merge of cells with identical values for specific columns
// If cols is empty, it is the same as `SetAutoMergeCells(true)`.
func (t *Table) SetAutoMergeCellsByColumnIndex(cols []int) {
t.autoMergeCells = true
if len(cols) > 0 {
m := make(map[int]bool)
for _, col := range cols {
m[col] = true
}
t.columnsToAutoMergeCells = m
}
}
// Set Table Border
// This would enable / disable line around the table
func (t *Table) SetBorder(border bool) {
t.SetBorders(Border{border, border, border, border})
}
func (t *Table) SetBorders(border Border) {
t.borders = border
}
// Append row to table
func (t *Table) Append(row []string) {
rowSize := len(t.headers)
if rowSize > t.colSize {
t.colSize = rowSize
}
n := len(t.lines)
line := [][]string{}
for i, v := range row {
// Detect string width
// Detect String height
// Break strings into words
out := t.parseDimension(v, i, n)
// Append broken words
line = append(line, out)
}
t.lines = append(t.lines, line)
}
// Append row to table with color attributes
func (t *Table) Rich(row []string, colors []Colors) {
rowSize := len(t.headers)
if rowSize > t.colSize {
t.colSize = rowSize
}
n := len(t.lines)
line := [][]string{}
for i, v := range row {
// Detect string width
// Detect String height
// Break strings into words
out := t.parseDimension(v, i, n)
if len(colors) > i {
color := colors[i]
out[0] = format(out[0], color)
}
// Append broken words
line = append(line, out)
}
t.lines = append(t.lines, line)
}
// Allow Support for Bulk Append
// Eliminates repeated for loops
func (t *Table) AppendBulk(rows [][]string) {
for _, row := range rows {
t.Append(row)
}
}
// NumLines to get the number of lines
func (t *Table) NumLines() int {
return len(t.lines)
}
// Clear rows
func (t *Table) ClearRows() {
t.lines = [][][]string{}
}
// Clear footer
func (t *Table) ClearFooter() {
t.footers = [][]string{}
}
// Center based on position and border.
func (t *Table) center(i int) string {
if i == -1 && !t.borders.Left {
return t.pRow
}
if i == len(t.cs)-1 && !t.borders.Right {
return t.pRow
}
return t.pCenter
}
// Print line based on row width
func (t *Table) printLine(nl bool) {
fmt.Fprint(t.out, t.center(-1))
for i := 0; i < len(t.cs); i++ {
v := t.cs[i]
fmt.Fprintf(t.out, "%s%s%s%s",
t.pRow,
strings.Repeat(string(t.pRow), v),
t.pRow,
t.center(i))
}
if nl {
fmt.Fprint(t.out, t.newLine)
}
}
// Print line based on row width with our without cell separator
func (t *Table) printLineOptionalCellSeparators(nl bool, displayCellSeparator []bool) {
fmt.Fprint(t.out, t.pCenter)
for i := 0; i < len(t.cs); i++ {
v := t.cs[i]
if i > len(displayCellSeparator) || displayCellSeparator[i] {
// Display the cell separator
fmt.Fprintf(t.out, "%s%s%s%s",
t.pRow,
strings.Repeat(string(t.pRow), v),
t.pRow,
t.pCenter)
} else {
// Don't display the cell separator for this cell
fmt.Fprintf(t.out, "%s%s",
strings.Repeat(" ", v+2),
t.pCenter)
}
}
if nl {
fmt.Fprint(t.out, t.newLine)
}
}
// Return the PadRight function if align is left, PadLeft if align is right,
// and Pad by default
func pad(align int) func(string, string, int) string {
padFunc := Pad
switch align {
case ALIGN_LEFT:
padFunc = PadRight
case ALIGN_RIGHT:
padFunc = PadLeft
}
return padFunc
}
// Print heading information
func (t *Table) printHeading() {
// Check if headers is available
if len(t.headers) < 1 {
return
}
// Identify last column
end := len(t.cs) - 1
// Get pad function
padFunc := pad(t.hAlign)
// Checking for ANSI escape sequences for header
is_esc_seq := false
if len(t.headerParams) > 0 {
is_esc_seq = true
}
// Maximum height.
max := t.rs[headerRowIdx]
// Print Heading
for x := 0; x < max; x++ {
// Check if border is set
// Replace with space if not set
if !t.noWhiteSpace {
fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE))
}
for y := 0; y <= end; y++ {
v := t.cs[y]
h := ""
if y < len(t.headers) && x < len(t.headers[y]) {
h = t.headers[y][x]
}
if t.autoFmt {
h = Title(h)
}
pad := ConditionString((y == end && !t.borders.Left), SPACE, t.pColumn)
if t.noWhiteSpace {
pad = ConditionString((y == end && !t.borders.Left), SPACE, t.tablePadding)
}
if is_esc_seq {
if !t.noWhiteSpace {
fmt.Fprintf(t.out, " %s %s",
format(padFunc(h, SPACE, v),
t.headerParams[y]), pad)
} else {
fmt.Fprintf(t.out, "%s %s",
format(padFunc(h, SPACE, v),
t.headerParams[y]), pad)
}
} else {
if !t.noWhiteSpace {
fmt.Fprintf(t.out, " %s %s",
padFunc(h, SPACE, v),
pad)
} else {
// the spaces between breaks the kube formatting
fmt.Fprintf(t.out, "%s%s",
padFunc(h, SPACE, v),
pad)
}
}
}
// Next line
fmt.Fprint(t.out, t.newLine)
}
if t.hdrLine {
t.printLine(true)
}
}
// Print heading information
func (t *Table) printFooter() {
// Check if headers is available
if len(t.footers) < 1 {
return
}
// Only print line if border is not set
if !t.borders.Bottom {
t.printLine(true)
}
// Identify last column
end := len(t.cs) - 1
// Get pad function
padFunc := pad(t.fAlign)
// Checking for ANSI escape sequences for header
is_esc_seq := false
if len(t.footerParams) > 0 {
is_esc_seq = true
}
// Maximum height.
max := t.rs[footerRowIdx]
// Print Footer
erasePad := make([]bool, len(t.footers))
for x := 0; x < max; x++ {
// Check if border is set
// Replace with space if not set
fmt.Fprint(t.out, ConditionString(t.borders.Bottom, t.pColumn, SPACE))
for y := 0; y <= end; y++ {
v := t.cs[y]
f := ""
if y < len(t.footers) && x < len(t.footers[y]) {
f = t.footers[y][x]
}
if t.autoFmt {
f = Title(f)
}
pad := ConditionString((y == end && !t.borders.Top), SPACE, t.pColumn)
if erasePad[y] || (x == 0 && len(f) == 0) {
pad = SPACE
erasePad[y] = true
}
if is_esc_seq {
fmt.Fprintf(t.out, " %s %s",
format(padFunc(f, SPACE, v),
t.footerParams[y]), pad)
} else {
fmt.Fprintf(t.out, " %s %s",
padFunc(f, SPACE, v),
pad)
}
//fmt.Fprintf(t.out, " %s %s",
// padFunc(f, SPACE, v),
// pad)
}
// Next line
fmt.Fprint(t.out, t.newLine)
//t.printLine(true)
}
hasPrinted := false
for i := 0; i <= end; i++ {
v := t.cs[i]
pad := t.pRow
center := t.pCenter
length := len(t.footers[i][0])
if length > 0 {
hasPrinted = true
}
// Set center to be space if length is 0
if length == 0 && !t.borders.Right {
center = SPACE
}
// Print first junction
if i == 0 {
if length > 0 && !t.borders.Left {
center = t.pRow
}
fmt.Fprint(t.out, center)
}
// Pad With space of length is 0
if length == 0 {
pad = SPACE
}
// Ignore left space as it has printed before
if hasPrinted || t.borders.Left {
pad = t.pRow
center = t.pCenter
}
// Change Center end position
if center != SPACE {
if i == end && !t.borders.Right {
center = t.pRow
}
}
// Change Center start position
if center == SPACE {
if i < end && len(t.footers[i+1][0]) != 0 {
if !t.borders.Left {
center = t.pRow
} else {
center = t.pCenter
}
}
}
// Print the footer
fmt.Fprintf(t.out, "%s%s%s%s",
pad,
strings.Repeat(string(pad), v),
pad,
center)
}
fmt.Fprint(t.out, t.newLine)
}
// Print caption text
func (t Table) printCaption() {
width := t.getTableWidth()
paragraph, _ := WrapString(t.captionText, width)
for linecount := 0; linecount < len(paragraph); linecount++ {
fmt.Fprintln(t.out, paragraph[linecount])
}
}
// Calculate the total number of characters in a row
func (t Table) getTableWidth() int {
var chars int
for _, v := range t.cs {
chars += v
}
// Add chars, spaces, seperators to calculate the total width of the table.
// ncols := t.colSize
// spaces := ncols * 2
// seps := ncols + 1
return (chars + (3 * t.colSize) + 2)
}
func (t Table) printRows() {
for i, lines := range t.lines {
t.printRow(lines, i)
}
}
func (t *Table) fillAlignment(num int) {
if len(t.columnsAlign) < num {
t.columnsAlign = make([]int, num)
for i := range t.columnsAlign {
t.columnsAlign[i] = t.align
}
}
}
// Print Row Information
// Adjust column alignment based on type
func (t *Table) printRow(columns [][]string, rowIdx int) {
// Get Maximum Height
max := t.rs[rowIdx]
total := len(columns)
// TODO Fix uneven col size
// if total < t.colSize {
// for n := t.colSize - total; n < t.colSize ; n++ {
// columns = append(columns, []string{SPACE})
// t.cs[n] = t.mW
// }
//}
// Pad Each Height
pads := []int{}
// Checking for ANSI escape sequences for columns
is_esc_seq := false
if len(t.columnsParams) > 0 {
is_esc_seq = true
}
t.fillAlignment(total)
for i, line := range columns {
length := len(line)
pad := max - length
pads = append(pads, pad)
for n := 0; n < pad; n++ {
columns[i] = append(columns[i], " ")
}
}
//fmt.Println(max, "\n")
for x := 0; x < max; x++ {
for y := 0; y < total; y++ {
// Check if border is set
if !t.noWhiteSpace {
fmt.Fprint(t.out, ConditionString((!t.borders.Left && y == 0), SPACE, t.pColumn))
fmt.Fprintf(t.out, SPACE)
}
str := columns[y][x]
// Embedding escape sequence with column value
if is_esc_seq {
str = format(str, t.columnsParams[y])
}
// This would print alignment
// Default alignment would use multiple configuration
switch t.columnsAlign[y] {
case ALIGN_CENTER: //
fmt.Fprintf(t.out, "%s", Pad(str, SPACE, t.cs[y]))
case ALIGN_RIGHT:
fmt.Fprintf(t.out, "%s", PadLeft(str, SPACE, t.cs[y]))
case ALIGN_LEFT:
fmt.Fprintf(t.out, "%s", PadRight(str, SPACE, t.cs[y]))
default:
if decimal.MatchString(strings.TrimSpace(str)) || percent.MatchString(strings.TrimSpace(str)) {
fmt.Fprintf(t.out, "%s", PadLeft(str, SPACE, t.cs[y]))
} else {
fmt.Fprintf(t.out, "%s", PadRight(str, SPACE, t.cs[y]))
// TODO Custom alignment per column
//if max == 1 || pads[y] > 0 {
// fmt.Fprintf(t.out, "%s", Pad(str, SPACE, t.cs[y]))
//} else {
// fmt.Fprintf(t.out, "%s", PadRight(str, SPACE, t.cs[y]))
//}
}
}
if !t.noWhiteSpace {
fmt.Fprintf(t.out, SPACE)
} else {
fmt.Fprintf(t.out, t.tablePadding)
}
}
// Check if border is set
// Replace with space if not set
if !t.noWhiteSpace {
fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE))
}
fmt.Fprint(t.out, t.newLine)
}
if t.rowLine {
t.printLine(true)
}
}
// Print the rows of the table and merge the cells that are identical
func (t *Table) printRowsMergeCells() {
var previousLine []string
var displayCellBorder []bool
var tmpWriter bytes.Buffer
for i, lines := range t.lines {
// We store the display of the current line in a tmp writer, as we need to know which border needs to be print above
previousLine, displayCellBorder = t.printRowMergeCells(&tmpWriter, lines, i, previousLine)
if i > 0 { //We don't need to print borders above first line
if t.rowLine {
t.printLineOptionalCellSeparators(true, displayCellBorder)
}
}
tmpWriter.WriteTo(t.out)
}
//Print the end of the table
if t.rowLine {
t.printLine(true)
}
}
// Print Row Information to a writer and merge identical cells.
// Adjust column alignment based on type
func (t *Table) printRowMergeCells(writer io.Writer, columns [][]string, rowIdx int, previousLine []string) ([]string, []bool) {
// Get Maximum Height
max := t.rs[rowIdx]
total := len(columns)
// Pad Each Height
pads := []int{}
// Checking for ANSI escape sequences for columns
is_esc_seq := false
if len(t.columnsParams) > 0 {
is_esc_seq = true
}
for i, line := range columns {
length := len(line)
pad := max - length
pads = append(pads, pad)
for n := 0; n < pad; n++ {
columns[i] = append(columns[i], " ")
}
}
var displayCellBorder []bool
t.fillAlignment(total)
for x := 0; x < max; x++ {
for y := 0; y < total; y++ {
// Check if border is set
fmt.Fprint(writer, ConditionString((!t.borders.Left && y == 0), SPACE, t.pColumn))
fmt.Fprintf(writer, SPACE)
str := columns[y][x]
// Embedding escape sequence with column value
if is_esc_seq {
str = format(str, t.columnsParams[y])
}
if t.autoMergeCells {
var mergeCell bool
if t.columnsToAutoMergeCells != nil {
// Check to see if the column index is in columnsToAutoMergeCells.
if t.columnsToAutoMergeCells[y] {
mergeCell = true
}
} else {
// columnsToAutoMergeCells was not set.
mergeCell = true
}
//Store the full line to merge mutli-lines cells
fullLine := strings.TrimRight(strings.Join(columns[y], " "), " ")
if len(previousLine) > y && fullLine == previousLine[y] && fullLine != "" && mergeCell {
// If this cell is identical to the one above but not empty, we don't display the border and keep the cell empty.
displayCellBorder = append(displayCellBorder, false)
str = ""
} else {
// First line or different content, keep the content and print the cell border
displayCellBorder = append(displayCellBorder, true)
}
}
// This would print alignment
// Default alignment would use multiple configuration
switch t.columnsAlign[y] {
case ALIGN_CENTER: //
fmt.Fprintf(writer, "%s", Pad(str, SPACE, t.cs[y]))
case ALIGN_RIGHT:
fmt.Fprintf(writer, "%s", PadLeft(str, SPACE, t.cs[y]))
case ALIGN_LEFT:
fmt.Fprintf(writer, "%s", PadRight(str, SPACE, t.cs[y]))
default:
if decimal.MatchString(strings.TrimSpace(str)) || percent.MatchString(strings.TrimSpace(str)) {
fmt.Fprintf(writer, "%s", PadLeft(str, SPACE, t.cs[y]))
} else {
fmt.Fprintf(writer, "%s", PadRight(str, SPACE, t.cs[y]))
}
}
fmt.Fprintf(writer, SPACE)
}
// Check if border is set
// Replace with space if not set
fmt.Fprint(writer, ConditionString(t.borders.Left, t.pColumn, SPACE))
fmt.Fprint(writer, t.newLine)
}
//The new previous line is the current one
previousLine = make([]string, total)
for y := 0; y < total; y++ {
previousLine[y] = strings.TrimRight(strings.Join(columns[y], " "), " ") //Store the full line for multi-lines cells
}
//Returns the newly added line and wether or not a border should be displayed above.
return previousLine, displayCellBorder
}
func (t *Table) parseDimension(str string, colKey, rowKey int) []string {
var (
raw []string
maxWidth int
)
raw = getLines(str)
maxWidth = 0
for _, line := range raw {
if w := DisplayWidth(line); w > maxWidth {
maxWidth = w
}
}
// If wrapping, ensure that all paragraphs in the cell fit in the
// specified width.
if t.autoWrap {
// If there's a maximum allowed width for wrapping, use that.
if maxWidth > t.mW {
maxWidth = t.mW
}
// In the process of doing so, we need to recompute maxWidth. This
// is because perhaps a word in the cell is longer than the
// allowed maximum width in t.mW.
newMaxWidth := maxWidth
newRaw := make([]string, 0, len(raw))
if t.reflowText {
// Make a single paragraph of everything.
raw = []string{strings.Join(raw, " ")}
}
for i, para := range raw {
paraLines, _ := WrapString(para, maxWidth)
for _, line := range paraLines {
if w := DisplayWidth(line); w > newMaxWidth {
newMaxWidth = w
}
}
if i > 0 {
newRaw = append(newRaw, " ")
}
newRaw = append(newRaw, paraLines...)
}
raw = newRaw
maxWidth = newMaxWidth
}
// Store the new known maximum width.
v, ok := t.cs[colKey]
if !ok || v < maxWidth || v == 0 {
t.cs[colKey] = maxWidth
}
// Remember the number of lines for the row printer.
h := len(raw)
v, ok = t.rs[rowKey]
if !ok || v < h || v == 0 {
t.rs[rowKey] = h
}
//fmt.Printf("Raw %+v %d\n", raw, len(raw))
return raw
}

@ -1,136 +0,0 @@
package tablewriter
import (
"fmt"
"strconv"
"strings"
)
const ESC = "\033"
const SEP = ";"
const (
BgBlackColor int = iota + 40
BgRedColor
BgGreenColor
BgYellowColor
BgBlueColor
BgMagentaColor
BgCyanColor
BgWhiteColor
)
const (
FgBlackColor int = iota + 30
FgRedColor
FgGreenColor
FgYellowColor
FgBlueColor
FgMagentaColor
FgCyanColor
FgWhiteColor
)
const (
BgHiBlackColor int = iota + 100
BgHiRedColor
BgHiGreenColor
BgHiYellowColor
BgHiBlueColor
BgHiMagentaColor
BgHiCyanColor
BgHiWhiteColor
)
const (
FgHiBlackColor int = iota + 90
FgHiRedColor
FgHiGreenColor
FgHiYellowColor
FgHiBlueColor
FgHiMagentaColor
FgHiCyanColor
FgHiWhiteColor
)
const (
Normal = 0
Bold = 1
UnderlineSingle = 4
Italic
)
type Colors []int
func startFormat(seq string) string {
return fmt.Sprintf("%s[%sm", ESC, seq)
}
func stopFormat() string {
return fmt.Sprintf("%s[%dm", ESC, Normal)
}
// Making the SGR (Select Graphic Rendition) sequence.
func makeSequence(codes []int) string {
codesInString := []string{}
for _, code := range codes {
codesInString = append(codesInString, strconv.Itoa(code))
}
return strings.Join(codesInString, SEP)
}
// Adding ANSI escape sequences before and after string
func format(s string, codes interface{}) string {
var seq string
switch v := codes.(type) {
case string:
seq = v
case []int:
seq = makeSequence(v)
case Colors:
seq = makeSequence(v)
default:
return s
}
if len(seq) == 0 {
return s
}
return startFormat(seq) + s + stopFormat()
}
// Adding header colors (ANSI codes)
func (t *Table) SetHeaderColor(colors ...Colors) {
if t.colSize != len(colors) {
panic("Number of header colors must be equal to number of headers.")
}
for i := 0; i < len(colors); i++ {
t.headerParams = append(t.headerParams, makeSequence(colors[i]))
}
}
// Adding column colors (ANSI codes)
func (t *Table) SetColumnColor(colors ...Colors) {
if t.colSize != len(colors) {
panic("Number of column colors must be equal to number of headers.")
}
for i := 0; i < len(colors); i++ {
t.columnsParams = append(t.columnsParams, makeSequence(colors[i]))
}
}
// Adding column colors (ANSI codes)
func (t *Table) SetFooterColor(colors ...Colors) {
if len(t.footers) != len(colors) {
panic("Number of footer colors must be equal to number of footer.")
}
for i := 0; i < len(colors); i++ {
t.footerParams = append(t.footerParams, makeSequence(colors[i]))
}
}
func Color(colors ...int) []int {
return colors
}

@ -1,93 +0,0 @@
// Copyright 2014 Oleku Konko All rights reserved.
// Use of this source code is governed by a MIT
// license that can be found in the LICENSE file.
// This module is a Table Writer API for the Go Programming Language.
// The protocols were written in pure Go and works on windows and unix systems
package tablewriter
import (
"math"
"regexp"
"strings"
"github.com/mattn/go-runewidth"
)
var ansi = regexp.MustCompile("\033\\[(?:[0-9]{1,3}(?:;[0-9]{1,3})*)?[m|K]")
func DisplayWidth(str string) int {
return runewidth.StringWidth(ansi.ReplaceAllLiteralString(str, ""))
}
// Simple Condition for string
// Returns value based on condition
func ConditionString(cond bool, valid, inValid string) string {
if cond {
return valid
}
return inValid
}
func isNumOrSpace(r rune) bool {
return ('0' <= r && r <= '9') || r == ' '
}
// Format Table Header
// Replace _ , . and spaces
func Title(name string) string {
origLen := len(name)
rs := []rune(name)
for i, r := range rs {
switch r {
case '_':
rs[i] = ' '
case '.':
// ignore floating number 0.0
if (i != 0 && !isNumOrSpace(rs[i-1])) || (i != len(rs)-1 && !isNumOrSpace(rs[i+1])) {
rs[i] = ' '
}
}
}
name = string(rs)
name = strings.TrimSpace(name)
if len(name) == 0 && origLen > 0 {
// Keep at least one character. This is important to preserve
// empty lines in multi-line headers/footers.
name = " "
}
return strings.ToUpper(name)
}
// Pad String
// Attempts to place string in the center
func Pad(s, pad string, width int) string {
gap := width - DisplayWidth(s)
if gap > 0 {
gapLeft := int(math.Ceil(float64(gap / 2)))
gapRight := gap - gapLeft
return strings.Repeat(string(pad), gapLeft) + s + strings.Repeat(string(pad), gapRight)
}
return s
}
// Pad String Right position
// This would place string at the left side of the screen
func PadRight(s, pad string, width int) string {
gap := width - DisplayWidth(s)
if gap > 0 {
return s + strings.Repeat(string(pad), gap)
}
return s
}
// Pad String Left position
// This would place string at the right side of the screen
func PadLeft(s, pad string, width int) string {
gap := width - DisplayWidth(s)
if gap > 0 {
return strings.Repeat(string(pad), gap) + s
}
return s
}

@ -1,99 +0,0 @@
// Copyright 2014 Oleku Konko All rights reserved.
// Use of this source code is governed by a MIT
// license that can be found in the LICENSE file.
// This module is a Table Writer API for the Go Programming Language.
// The protocols were written in pure Go and works on windows and unix systems
package tablewriter
import (
"math"
"strings"
"github.com/mattn/go-runewidth"
)
var (
nl = "\n"
sp = " "
)
const defaultPenalty = 1e5
// Wrap wraps s into a paragraph of lines of length lim, with minimal
// raggedness.
func WrapString(s string, lim int) ([]string, int) {
words := strings.Split(strings.Replace(s, nl, sp, -1), sp)
var lines []string
max := 0
for _, v := range words {
max = runewidth.StringWidth(v)
if max > lim {
lim = max
}
}
for _, line := range WrapWords(words, 1, lim, defaultPenalty) {
lines = append(lines, strings.Join(line, sp))
}
return lines, lim
}
// WrapWords is the low-level line-breaking algorithm, useful if you need more
// control over the details of the text wrapping process. For most uses,
// WrapString will be sufficient and more convenient.
//
// WrapWords splits a list of words into lines with minimal "raggedness",
// treating each rune as one unit, accounting for spc units between adjacent
// words on each line, and attempting to limit lines to lim units. Raggedness
// is the total error over all lines, where error is the square of the
// difference of the length of the line and lim. Too-long lines (which only
// happen when a single word is longer than lim units) have pen penalty units
// added to the error.
func WrapWords(words []string, spc, lim, pen int) [][]string {
n := len(words)
length := make([][]int, n)
for i := 0; i < n; i++ {
length[i] = make([]int, n)
length[i][i] = runewidth.StringWidth(words[i])
for j := i + 1; j < n; j++ {
length[i][j] = length[i][j-1] + spc + runewidth.StringWidth(words[j])
}
}
nbrk := make([]int, n)
cost := make([]int, n)
for i := range cost {
cost[i] = math.MaxInt32
}
for i := n - 1; i >= 0; i-- {
if length[i][n-1] <= lim {
cost[i] = 0
nbrk[i] = n
} else {
for j := i + 1; j < n; j++ {
d := lim - length[i][j-1]
c := d*d + cost[j]
if length[i][j-1] > lim {
c += pen // too-long lines get a worse penalty
}
if c < cost[i] {
cost[i] = c
nbrk[i] = j
}
}
}
}
var lines [][]string
i := 0
for i < n {
lines = append(lines, words[i:nbrk[i]])
i = nbrk[i]
}
return lines
}
// getLines decomposes a multiline string into a slice of strings.
func getLines(s string) []string {
return strings.Split(s, nl)
}

@ -4,9 +4,12 @@ AUTHORS.txt: .mailmap
go install github.com/kevinburke/write_mailmap@latest
write_mailmap > AUTHORS.txt
format:
go install github.com/kevinburke/differ@latest
differ gofmt -w .
tools:
go mod tidy -modfile go.tools.mod
format: tools
go fmt ./...
go tool -modfile go.tools.mod modernize -fix -test ./...
test:
go test -v -trimpath -race -cover -tags= ./...

@ -42,7 +42,7 @@ dependency on and install with the following command:
go get github.com/parquet-go/parquet-go
```
Go 1.21 or later is required to use the package.
Go 1.22 or later is required to use the package.
### Compatibility Guarantees

@ -4,6 +4,7 @@ import (
"log"
"reflect"
"runtime"
"slices"
"sort"
"sync"
"sync/atomic"
@ -226,6 +227,10 @@ func NewBuffer(options ...RowGroupOption) *Buffer {
return buf
}
// configure sets up the buffer's columns based on the provided schema.
// It also prepares the internal sorting logic by using only the requested sorting columns
// (from buf.config.Sorting.SortingColumns) that are actually found within the schema,
// preserving the requested order but ignoring missing columns.
func (buf *Buffer) configure(schema *Schema) {
if schema == nil {
return
@ -239,7 +244,7 @@ func (buf *Buffer) configure(schema *Schema) {
columnType := leaf.node.Type()
bufferCap := buf.config.ColumnBufferCapacity
dictionary := (Dictionary)(nil)
encoding := encodingOf(leaf.node)
encoding := encodingOf(leaf.node, nil)
if isDictionaryEncoding(encoding) {
estimatedDictBufferSize := columnType.EstimateSize(bufferCap)
@ -273,6 +278,8 @@ func (buf *Buffer) configure(schema *Schema) {
}
})
buf.sorted = slices.DeleteFunc(buf.sorted, func(cb ColumnBuffer) bool { return cb == nil })
buf.schema = schema
buf.rowbuf = make([]Row, 0, 1)
buf.colbuf = make([][]Value, len(buf.columns))
@ -360,7 +367,7 @@ func (buf *Buffer) Reset() {
}
// Write writes a row held in a Go value to the buffer.
func (buf *Buffer) Write(row interface{}) error {
func (buf *Buffer) Write(row any) error {
if buf.schema == nil {
buf.configure(SchemaOf(row))
}
@ -570,7 +577,7 @@ func bufferPoolNextSize(size int) int {
func bufferPoolBucketIndexAndSizeOfGet(size int) (int, int) {
limit := bufferPoolMinSize
for i := 0; i < bufferPoolBucketCount; i++ {
for i := range bufferPoolBucketCount {
if size <= limit {
return i, limit
}
@ -586,7 +593,7 @@ func bufferPoolBucketIndexAndSizeOfPut(size int) (int, int) {
// have to put the buffer is the highest bucket with a size less or equal
// to the buffer capacity.
if limit := bufferPoolMinSize; size >= limit {
for i := 0; i < bufferPoolBucketCount; i++ {
for i := range bufferPoolBucketCount {
n := bufferPoolNextSize(limit)
if size < n {
return i, limit

@ -2,11 +2,13 @@ package parquet
import (
"bytes"
"cmp"
"encoding/json"
"fmt"
"io"
"math/bits"
"reflect"
"slices"
"sort"
"time"
"unsafe"
@ -177,8 +179,8 @@ func (col *optionalColumnBuffer) Clone() ColumnBuffer {
base: col.base.Clone(),
reordered: col.reordered,
maxDefinitionLevel: col.maxDefinitionLevel,
rows: append([]int32{}, col.rows...),
definitionLevels: append([]byte{}, col.definitionLevels...),
rows: slices.Clone(col.rows),
definitionLevels: slices.Clone(col.definitionLevels),
nullOrdering: col.nullOrdering,
}
}
@ -466,9 +468,9 @@ func (col *repeatedColumnBuffer) Clone() ColumnBuffer {
reordered: col.reordered,
maxRepetitionLevel: col.maxRepetitionLevel,
maxDefinitionLevel: col.maxDefinitionLevel,
rows: append([]offsetMapping{}, col.rows...),
repetitionLevels: append([]byte{}, col.repetitionLevels...),
definitionLevels: append([]byte{}, col.definitionLevels...),
rows: slices.Clone(col.rows),
repetitionLevels: slices.Clone(col.repetitionLevels),
definitionLevels: slices.Clone(col.definitionLevels),
nullOrdering: col.nullOrdering,
}
}
@ -758,7 +760,7 @@ func (col *booleanColumnBuffer) Clone() ColumnBuffer {
return &booleanColumnBuffer{
booleanPage: booleanPage{
typ: col.typ,
bits: append([]byte{}, col.bits...),
bits: slices.Clone(col.bits),
offset: col.offset,
numValues: col.numValues,
columnIndex: col.columnIndex,
@ -919,7 +921,7 @@ func (col *int32ColumnBuffer) Clone() ColumnBuffer {
return &int32ColumnBuffer{
int32Page: int32Page{
typ: col.typ,
values: append([]int32{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1017,7 +1019,7 @@ func (col *int64ColumnBuffer) Clone() ColumnBuffer {
return &int64ColumnBuffer{
int64Page: int64Page{
typ: col.typ,
values: append([]int64{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1114,7 +1116,7 @@ func (col *int96ColumnBuffer) Clone() ColumnBuffer {
return &int96ColumnBuffer{
int96Page: int96Page{
typ: col.typ,
values: append([]deprecated.Int96{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1169,7 +1171,7 @@ func (col *int96ColumnBuffer) WriteValues(values []Value) (int, error) {
}
func (col *int96ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := rows.Index(i)
col.values = append(col.values, *(*deprecated.Int96)(p))
}
@ -1211,7 +1213,7 @@ func (col *floatColumnBuffer) Clone() ColumnBuffer {
return &floatColumnBuffer{
floatPage: floatPage{
typ: col.typ,
values: append([]float32{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1308,7 +1310,7 @@ func (col *doubleColumnBuffer) Clone() ColumnBuffer {
return &doubleColumnBuffer{
doublePage: doublePage{
typ: col.typ,
values: append([]float64{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1515,7 +1517,7 @@ func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
}
func (col *byteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := rows.Index(i)
col.append(*(*string)(p))
}
@ -1577,7 +1579,7 @@ func (col *fixedLenByteArrayColumnBuffer) Clone() ColumnBuffer {
fixedLenByteArrayPage: fixedLenByteArrayPage{
typ: col.typ,
size: col.size,
data: append([]byte{}, col.data...),
data: slices.Clone(col.data),
columnIndex: col.columnIndex,
},
tmp: make([]byte, col.size),
@ -1662,7 +1664,7 @@ func (col *fixedLenByteArrayColumnBuffer) writeValues(rows sparse.Array, _ colum
col.data = col.data[:j]
newData := col.data[i:]
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := rows.Index(i)
copy(newData[i*col.size:], unsafe.Slice((*byte)(p), col.size))
}
@ -1704,7 +1706,7 @@ func (col *uint32ColumnBuffer) Clone() ColumnBuffer {
return &uint32ColumnBuffer{
uint32Page: uint32Page{
typ: col.typ,
values: append([]uint32{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1801,7 +1803,7 @@ func (col *uint64ColumnBuffer) Clone() ColumnBuffer {
return &uint64ColumnBuffer{
uint64Page: uint64Page{
typ: col.typ,
values: append([]uint64{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -1898,7 +1900,7 @@ func (col *be128ColumnBuffer) Clone() ColumnBuffer {
return &be128ColumnBuffer{
be128Page: be128Page{
typ: col.typ,
values: append([][16]byte{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}
@ -2188,7 +2190,7 @@ func writeRowsFuncOfPointer(t reflect.Type, schema *Schema, path columnPath) wri
return writeRows(columns, rows, levels)
}
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := *(*unsafe.Pointer)(rows.Index(i))
a := sparse.Array{}
if p != nil {
@ -2208,7 +2210,7 @@ func writeRowsFuncOfPointer(t reflect.Type, schema *Schema, path columnPath) wri
return writeRows(columns, rows, levels)
}
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := *(*unsafe.Pointer)(rows.Index(i))
a := sparse.Array{}
elemLevels := levels
@ -2245,7 +2247,7 @@ func writeRowsFuncOfSlice(t reflect.Type, schema *Schema, path columnPath) write
levels.repetitionDepth++
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := (*sliceHeader)(rows.Index(i))
a := makeArray(p.base, p.len, elemSize)
b := sparse.Array{}
@ -2357,10 +2359,15 @@ func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRo
}
levels.repetitionDepth++
mapKey := reflect.New(keyType).Elem()
mapValue := reflect.New(valueType).Elem()
mapKey := reflect.Value{}
mapValue := reflect.Value{}
compareKeys := compareFuncOf(keyType)
if compareKeys == nil {
mapKey = reflect.New(keyType).Elem()
mapValue = reflect.New(valueType).Elem()
}
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
m := reflect.NewAt(t, rows.Index(i)).Elem()
if m.Len() == 0 {
@ -2368,10 +2375,29 @@ func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRo
if err := writeKeyValues(columns, empty, empty, levels); err != nil {
return err
}
} else {
elemLevels := levels
elemLevels.definitionLevel++
continue
}
elemLevels := levels
elemLevels.definitionLevel++
if compareKeys != nil {
keys := m.MapKeys()
slices.SortFunc(keys, compareKeys)
for _, key := range keys {
value := m.MapIndex(key)
k := makeArray(reflectValueData(key), 1, keySize)
v := makeArray(reflectValueData(value), 1, valueSize)
if err := writeKeyValues(columns, k, v, elemLevels); err != nil {
return err
}
elemLevels.repetitionLevel = elemLevels.repetitionDepth
}
} else {
for it := m.MapRange(); it.Next(); {
mapKey.SetIterKey(it)
mapValue.SetIterValue(it)
@ -2392,6 +2418,29 @@ func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRo
}
}
func compareFuncOf(t reflect.Type) func(reflect.Value, reflect.Value) int {
switch t.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return func(a, b reflect.Value) int {
return cmp.Compare(a.Int(), b.Int())
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return func(a, b reflect.Value) int {
return cmp.Compare(a.Uint(), b.Uint())
}
case reflect.Float32, reflect.Float64:
return func(a, b reflect.Value) int {
return cmp.Compare(a.Float(), b.Float())
}
case reflect.String:
return func(a, b reflect.Value) int {
return cmp.Compare(a.String(), b.String())
}
default:
return nil
}
}
func writeRowsFuncOfJSON(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
// If this is a string or a byte array write directly.
switch t.Kind() {
@ -2411,7 +2460,7 @@ func writeRowsFuncOfJSON(t reflect.Type, schema *Schema, path columnPath) writeR
if rows.Len() == 0 {
return writer(columns, rows, levels)
}
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
val := reflect.NewAt(t, rows.Index(i))
asI := val.Interface()
@ -2448,7 +2497,7 @@ func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath) writeR
}
times := rows.TimeArray()
for i := 0; i < times.Len(); i++ {
for i := range times.Len() {
t := times.Index(i)
var val int64
switch {

@ -1,13 +1,14 @@
package parquet
import (
"slices"
"strings"
)
type columnPath []string
func (path columnPath) append(names ...string) columnPath {
return append(path[:len(path):len(path)], names...)
return slices.Concat(path, names)
}
func (path columnPath) equal(other columnPath) bool {
@ -37,13 +38,9 @@ func stringsAreEqual(strings1, strings2 []string) bool {
}
func stringsAreOrdered(strings1, strings2 []string) bool {
n := len(strings1)
n := min(len(strings1), len(strings2))
if n > len(strings2) {
n = len(strings2)
}
for i := 0; i < n; i++ {
for i := range n {
if strings1[i] >= strings2[i] {
return false
}

@ -7,7 +7,10 @@ import (
"strings"
"sync"
"slices"
"github.com/parquet-go/parquet-go/compress"
"github.com/parquet-go/parquet-go/encoding"
)
// ReadMode is an enum that is used to configure the way that a File reads pages.
@ -219,6 +222,7 @@ type WriterConfig struct {
Compression compress.Codec
Sorting SortingConfig
SkipPageBounds [][]string
Encodings map[Kind]encoding.Encoding
}
// DefaultWriterConfig returns a new WriterConfig value initialized with the
@ -269,6 +273,16 @@ func (c *WriterConfig) ConfigureWriter(config *WriterConfig) {
}
}
encodings := config.Encodings
if len(c.Encodings) > 0 {
if encodings == nil {
encodings = make(map[Kind]encoding.Encoding, len(c.Encodings))
}
for k, v := range c.Encodings {
encodings[k] = v
}
}
*config = WriterConfig{
CreatedBy: coalesceString(c.CreatedBy, config.CreatedBy),
ColumnPageBuffers: coalesceBufferPool(c.ColumnPageBuffers, config.ColumnPageBuffers),
@ -283,6 +297,8 @@ func (c *WriterConfig) ConfigureWriter(config *WriterConfig) {
BloomFilters: coalesceBloomFilters(c.BloomFilters, config.BloomFilters),
Compression: coalesceCompression(c.Compression, config.Compression),
Sorting: coalesceSortingConfig(c.Sorting, config.Sorting),
SkipPageBounds: coalesceSkipPageBounds(c.SkipPageBounds, config.SkipPageBounds),
Encodings: encodings,
}
}
@ -631,7 +647,7 @@ func KeyValueMetadata(key, value string) WriterOption {
// and applications need to explicitly declare the columns that they want to
// create filters for.
func BloomFilters(filters ...BloomFilterColumn) WriterOption {
filters = append([]BloomFilterColumn{}, filters...)
filters = slices.Clone(filters)
return writerOption(func(config *WriterConfig) { config.BloomFilters = filters })
}
@ -644,7 +660,7 @@ func Compression(codec compress.Codec) WriterOption {
// SortingWriterConfig is a writer option which applies configuration specific
// to sorting writers.
func SortingWriterConfig(options ...SortingOption) WriterOption {
options = append([]SortingOption{}, options...)
options = slices.Clone(options)
return writerOption(func(config *WriterConfig) { config.Sorting.Apply(options...) })
}
@ -657,6 +673,42 @@ func SkipPageBounds(path ...string) WriterOption {
return writerOption(func(config *WriterConfig) { config.SkipPageBounds = append(config.SkipPageBounds, path) })
}
// DefaultEncodingFor creates a configuration option which sets the default encoding
// used by a writer for columns with the specified primitive type where none were defined.
//
// It will fail if the specified enconding isn't compatible with the specified primitive type.
func DefaultEncodingFor(kind Kind, enc encoding.Encoding) WriterOption {
return writerOption(func(config *WriterConfig) { defaultEncodingFor(config, kind, enc) })
}
func defaultEncodingFor(config *WriterConfig, kind Kind, enc encoding.Encoding) {
if !canEncode(enc, kind) {
panic("cannot use encoding " + enc.Encoding().String() + " for kind " + kind.String())
}
if config.Encodings == nil {
config.Encodings = map[Kind]encoding.Encoding{kind: enc}
} else {
config.Encodings[kind] = enc
}
}
// DefaultEncoding creates a configuration option which sets the default encoding
// used by a writer for columns where none were defined.
//
// It will fail if the specified enconding isn't compatible with any of the primitive types.
func DefaultEncoding(enc encoding.Encoding) WriterOption {
return writerOption(func(config *WriterConfig) {
defaultEncodingFor(config, Boolean, enc)
defaultEncodingFor(config, Int32, enc)
defaultEncodingFor(config, Int64, enc)
defaultEncodingFor(config, Int96, enc)
defaultEncodingFor(config, Float, enc)
defaultEncodingFor(config, Double, enc)
defaultEncodingFor(config, ByteArray, enc)
defaultEncodingFor(config, FixedLenByteArray, enc)
})
}
// ColumnBufferCapacity creates a configuration option which defines the size of
// row group column buffers.
//
@ -668,7 +720,7 @@ func ColumnBufferCapacity(size int) RowGroupOption {
// SortingRowGroupConfig is a row group option which applies configuration
// specific sorting row groups.
func SortingRowGroupConfig(options ...SortingOption) RowGroupOption {
options = append([]SortingOption{}, options...)
options = slices.Clone(options)
return rowGroupOption(func(config *RowGroupConfig) { config.Sorting.Apply(options...) })
}
@ -683,7 +735,7 @@ func SortingColumns(columns ...SortingColumn) SortingOption {
// for the variable argument list, and also avoid having a nil slice when
// the option is passed with no sorting columns, so we can differentiate it
// from it not being passed.
columns = append([]SortingColumn{}, columns...)
columns = slices.Clone(columns)
return sortingOption(func(config *SortingConfig) { config.SortingColumns = columns })
}
@ -794,6 +846,13 @@ func coalesceBloomFilters(f1, f2 []BloomFilterColumn) []BloomFilterColumn {
return f2
}
func coalesceSkipPageBounds(b1, b2 [][]string) [][]string {
if b1 != nil {
return b1
}
return b2
}
func coalesceCompression(c1, c2 compress.Codec) compress.Codec {
if c1 != nil {
return c1
@ -816,22 +875,20 @@ func validatePositiveInt64(optionName string, optionValue int64) error {
}
func validateOneOfInt(optionName string, optionValue int, supportedValues ...int) error {
for _, value := range supportedValues {
if value == optionValue {
return nil
}
if slices.Contains(supportedValues, optionValue) {
return nil
}
return errorInvalidOptionValue(optionName, optionValue)
}
func validateNotNil(optionName string, optionValue interface{}) error {
func validateNotNil(optionName string, optionValue any) error {
if optionValue != nil {
return nil
}
return errorInvalidOptionValue(optionName, optionValue)
}
func errorInvalidOptionValue(optionName string, optionValue interface{}) error {
func errorInvalidOptionValue(optionName string, optionValue any) error {
return fmt.Errorf("invalid option value: %s: %v", optionName, optionValue)
}

@ -280,7 +280,7 @@ func Convert(to, from Node) (conv Conversion, err error) {
targetNode := to
sourceNode := from
for j := 0; j < len(path); j++ {
for j := range path {
targetNode = fieldByName(targetNode, path[j])
sourceNode = fieldByName(sourceNode, path[j])
@ -432,7 +432,7 @@ func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Conversion) Row
columns[i] = &missing[i]
}
for i := 0; i < numColumns; i++ {
for i := range numColumns {
j := conv.Column(i)
if j >= 0 && j < len(columns) {
columns[j] = rowGroupColumns[j]

@ -12,6 +12,7 @@ import (
"github.com/parquet-go/parquet-go/internal/bitpack"
"github.com/parquet-go/parquet-go/internal/unsafecast"
"github.com/parquet-go/parquet-go/sparse"
"slices"
)
const (
@ -161,7 +162,7 @@ func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) {
values := rows.Uint8Array()
dict := d.table
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
v := values.Index(i) & 1
indexes[i] = dict[v]
}
@ -443,7 +444,7 @@ func (d *int96Dictionary) insertValues(indexes []int32, count int, valueAt func(
}
}
for i := 0; i < count; i++ {
for i := range count {
value := valueAt(i)
index, exists := d.hashmap[value]
@ -708,7 +709,7 @@ func (d *byteArrayDictionary) init() {
numValues := d.len()
d.table = make(map[string]int32, numValues)
for i := 0; i < numValues; i++ {
for i := range numValues {
d.table[string(d.index(i))] = int32(len(d.table))
}
}
@ -840,7 +841,7 @@ func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, v
}
}
for i := 0; i < count; i++ {
for i := range count {
value := unsafe.Slice(valueAt(i), d.size)
index, exists := d.hashmap[string(value)]
@ -1347,7 +1348,7 @@ func (col *indexedColumnBuffer) Clone() ColumnBuffer {
return &indexedColumnBuffer{
indexedPage: indexedPage{
typ: col.typ,
values: append([]int32{}, col.values...),
values: slices.Clone(col.values),
columnIndex: col.columnIndex,
},
}

@ -77,7 +77,7 @@ func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
//
// This command was used to trigger the problem:
//
// GOMAXPROCS=8 go test -run TestIssue368 -count 10
// GOMAXPROCS=8 go test -run TestIssueSegmentio368 -count 10
//
// https://github.com/segmentio/parquet-go/issues/368
//

@ -2,6 +2,7 @@ package parquet
import (
"math/bits"
"sync"
"github.com/parquet-go/parquet-go/encoding"
"github.com/parquet-go/parquet-go/encoding/bitpacked"
@ -83,6 +84,8 @@ var (
}
)
var extraEncodings sync.Map
func isDictionaryEncoding(encoding encoding.Encoding) bool {
return isDictionaryFormat(encoding.Encoding())
}
@ -91,6 +94,17 @@ func isDictionaryFormat(encoding format.Encoding) bool {
return encoding == format.PlainDictionary || encoding == format.RLEDictionary
}
func RegisterEncoding(enc encoding.Encoding) {
ns := encoding.NotSupported{}
if enc == ns {
panic("cannot register parquet encoding as not-supported")
}
if LookupEncoding(enc.Encoding()) != ns {
panic("cannot register parquet encoding that overrides the standard specification")
}
extraEncodings.Store(enc.Encoding(), enc)
}
// LookupEncoding returns the parquet encoding associated with the given code.
//
// The function never returns nil. If the encoding is not supported,
@ -101,6 +115,9 @@ func LookupEncoding(enc format.Encoding) encoding.Encoding {
return e
}
}
if enc, ok := extraEncodings.Load(enc); ok {
return enc.(encoding.Encoding)
}
return encoding.NotSupported{}
}

@ -193,7 +193,7 @@ func (e *ByteArrayEncoding) wrap(err error) error {
return err
}
func (e *ByteArrayEncoding) wrapf(msg string, args ...interface{}) error {
func (e *ByteArrayEncoding) wrapf(msg string, args ...any) error {
return encoding.Errorf(e, msg, args...)
}
@ -205,10 +205,7 @@ func linearSearchPrefixLength(base, data []byte) (n int) {
}
func binarySearchPrefixLength(base, data []byte) int {
n := len(base)
if n > len(data) {
n = len(data)
}
n := min(len(base), len(data))
return sort.Search(n, func(i int) bool {
return !bytes.Equal(base[:i+1], data[:i+1])
})

@ -73,10 +73,7 @@ func resize(buf []byte, size int) []byte {
}
func grow(buf []byte, size int) []byte {
newCap := 2 * cap(buf)
if newCap < size {
newCap = size
}
newCap := max(2*cap(buf), size)
newBuf := make([]byte, size, newCap)
copy(newBuf, buf)
return newBuf

@ -34,7 +34,7 @@ func Error(e Encoding, err error) error {
// Errorf is like Error but constructs the error message from the given format
// and arguments.
func Errorf(e Encoding, msg string, args ...interface{}) error {
func Errorf(e Encoding, msg string, args ...any) error {
return Error(e, fmt.Errorf(msg, args...))
}

@ -509,10 +509,7 @@ func resize(buf []byte, size int) []byte {
}
func grow(buf []byte, size int) []byte {
newCap := 2 * cap(buf)
if newCap < size {
newCap = size
}
newCap := max(2*cap(buf), size)
newBuf := make([]byte, size, newCap)
copy(newBuf, buf)
return newBuf

@ -24,7 +24,7 @@ type debugReader struct {
l *log.Logger
}
func (d *debugReader) log(method string, res interface{}, err error) {
func (d *debugReader) log(method string, res any, err error) {
if err != nil {
d.l.Printf("(%T).%s() → ERROR: %v", d.r, method, err)
} else {
@ -129,7 +129,7 @@ type debugWriter struct {
l *log.Logger
}
func (d *debugWriter) log(method string, arg interface{}, err error) {
func (d *debugWriter) log(method string, arg any, err error) {
if err != nil {
d.l.Printf("(%T).%s(%#v) → ERROR: %v", d.w, method, arg, err)
} else {

@ -19,7 +19,7 @@ import (
// to Unmarshal, allowing the function to reuse objects referenced by pointer
// fields of struct values. When reusing objects, the application is responsible
// for resetting the state of v before calling Unmarshal again.
func Unmarshal(p Protocol, b []byte, v interface{}) error {
func Unmarshal(p Protocol, b []byte, v any) error {
br := bytes.NewReader(b)
pr := p.NewReader(br)
@ -43,7 +43,7 @@ func NewDecoder(r Reader) *Decoder {
return &Decoder{r: r, f: decoderFlags(r)}
}
func (d *Decoder) Decode(v interface{}) error {
func (d *Decoder) Decode(v any) error {
t := reflect.TypeOf(v)
p := reflect.ValueOf(v)
@ -237,7 +237,7 @@ func decodeFuncSliceOf(t reflect.Type, seen decodeFuncCache) decodeFunc {
v.Set(reflect.MakeSlice(t, int(l.Size), int(l.Size)))
flags = flags.only(decodeFlags)
for i := 0; i < int(l.Size); i++ {
for i := range int(l.Size) {
if err := dec(r, v.Index(i), flags); err != nil {
return with(dontExpectEOF(err), &decodeErrorList{cause: l, index: i})
}
@ -292,7 +292,7 @@ func decodeFuncMapOf(t reflect.Type, seen decodeFuncCache) decodeFunc {
tmpElem := reflect.New(elem).Elem()
flags = flags.only(decodeFlags)
for i := 0; i < int(m.Size); i++ {
for i := range int(m.Size) {
if err := decodeKey(r, tmpKey, flags); err != nil {
return with(dontExpectEOF(err), &decodeErrorMap{cause: m, index: i})
}
@ -345,7 +345,7 @@ func decodeFuncMapAsSetOf(t reflect.Type, seen decodeFuncCache) decodeFunc {
tmp := reflect.New(key).Elem()
flags = flags.only(decodeFlags)
for i := 0; i < int(s.Size); i++ {
for i := range int(s.Size) {
if err := dec(r, tmp, flags); err != nil {
return with(dontExpectEOF(err), &decodeErrorSet{cause: s, index: i})
}
@ -542,7 +542,7 @@ func readList(r Reader, f func(Reader, Type) error) error {
return err
}
for i := 0; i < int(l.Size); i++ {
for i := range int(l.Size) {
if err := f(r, l.Type); err != nil {
return with(dontExpectEOF(err), &decodeErrorList{cause: l, index: i})
}
@ -557,7 +557,7 @@ func readSet(r Reader, f func(Reader, Type) error) error {
return err
}
for i := 0; i < int(s.Size); i++ {
for i := range int(s.Size) {
if err := f(r, s.Type); err != nil {
return with(dontExpectEOF(err), &decodeErrorSet{cause: s, index: i})
}
@ -572,7 +572,7 @@ func readMap(r Reader, f func(Reader, Type, Type) error) error {
return err
}
for i := 0; i < int(m.Size); i++ {
for i := range int(m.Size) {
if err := f(r, m.Key, m.Value); err != nil {
return with(dontExpectEOF(err), &decodeErrorMap{cause: m, index: i})
}

@ -14,7 +14,7 @@ import (
// protocol p.
//
// The function panics if v cannot be converted to a thrift representation.
func Marshal(p Protocol, v interface{}) ([]byte, error) {
func Marshal(p Protocol, v any) ([]byte, error) {
buf := new(bytes.Buffer)
enc := NewEncoder(p.NewWriter(buf))
err := enc.Encode(v)
@ -30,7 +30,7 @@ func NewEncoder(w Writer) *Encoder {
return &Encoder{w: w, f: encoderFlags(w)}
}
func (e *Encoder) Encode(v interface{}) error {
func (e *Encoder) Encode(v any) error {
t := reflect.TypeOf(v)
cache, _ := encoderCache.Load().(map[typeID]encodeFunc)
encode, _ := cache[makeTypeID(t)]
@ -155,7 +155,7 @@ func encodeFuncSliceOf(t reflect.Type, seen encodeFuncCache) encodeFunc {
return err
}
for i := 0; i < n; i++ {
for i := range n {
if err := enc(w, v.Index(i), flags); err != nil {
return err
}

@ -781,6 +781,9 @@ func (f *FilePages) ReadPage() (Page, error) {
return nil, io.EOF
}
// seekToRowStart indicates whether we are in the process of seeking to the start
// of requested row to read, as opposed to reading sequentially values and moving through pages
seekToRowStart := f.skip > 0
for {
// Instantiate a new format.PageHeader for each page.
//
@ -798,6 +801,14 @@ func (f *FilePages) ReadPage() (Page, error) {
if err := f.decoder.Decode(header); err != nil {
return nil, err
}
// if this is a dictionary page and we've already read and decoded the dictionary we can skip past it.
// call f.rbuf.Discard to skip the page data and realign f.rbuf with the next page header
if header.Type == format.DictionaryPage && f.dictionary != nil {
f.rbuf.Discard(int(header.CompressedPageSize))
continue
}
data, err := f.readPage(header, f.rbuf)
if err != nil {
return nil, err
@ -830,7 +841,30 @@ func (f *FilePages) ReadPage() (Page, error) {
f.index++
if f.skip == 0 {
return page, nil
// f.skip==0 can be true:
// (1) while reading a row of a column which has multiple values (ie. X.list.element) and values continue
// across pages. In that case we just want to keep reading without skipping any values.
// (2) when seeking to a specific row and trying to reach the start offset of the first
// row in a new page.
if !seekToRowStart || header.Type != format.DataPage {
// keep reading values from beginning of new page
return page, nil
}
// We need to seek to beginning of row.
// V1 data pages do not necessarily start at a row boundary.
if page.NumRows() == 0 {
// if current page does not have any rows, continue until a page with at least 1 row is reached
Release(page)
continue
}
repLvls := page.RepetitionLevels()
if len(repLvls) > 0 && repLvls[0] == 0 {
// avoid page slice if page starts at a row boundary
return page, nil
}
tail := page.Slice(0, page.NumRows())
Release(page)
return tail, nil
}
// TODO: what about pages that don't embed the number of rows?

@ -19,15 +19,11 @@ type filterRowReader struct {
func (f *filterRowReader) ReadRows(rows []Row) (n int, err error) {
for n < len(rows) {
r := len(rows) - n
if r > len(f.rows) {
r = len(f.rows)
}
r := min(len(rows)-n, len(f.rows))
r, err = f.reader.ReadRows(f.rows[:r])
for i := 0; i < r; i++ {
for i := range r {
if f.predicate(f.rows[i]) {
rows[n] = append(rows[n][:0], f.rows[i]...)
n++
@ -63,11 +59,7 @@ func (f *filterRowWriter) WriteRows(rows []Row) (n int, err error) {
for n < len(rows) {
i := 0
j := len(rows) - n
if j > len(f.rows) {
j = len(f.rows)
}
j := min(len(rows)-n, len(f.rows))
for _, row := range rows[n : n+j] {
if f.predicate(row) {

@ -0,0 +1,24 @@
module github.com/parquet-go/parquet-go
go 1.23.4
toolchain go1.24.0
tool golang.org/x/tools/gopls/internal/analysis/modernize/cmd/modernize
require (
github.com/andybalholm/brotli v1.1.1
github.com/google/uuid v1.6.0
github.com/hexops/gotextdiff v1.0.3
github.com/klauspost/compress v1.18.0
github.com/pierrec/lz4/v4 v4.1.22
golang.org/x/sys v0.30.0
google.golang.org/protobuf v1.36.5
)
require (
golang.org/x/mod v0.23.0 // indirect
golang.org/x/sync v0.11.0 // indirect
golang.org/x/tools v0.30.1-0.20250221230316-5055f70f240c // indirect
golang.org/x/tools/gopls v0.18.1 // indirect
)

@ -0,0 +1,26 @@
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU=
github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/tools v0.30.1-0.20250221230316-5055f70f240c h1:Ja/5gV5a9Vvho3p2NC/T2TtxhHjrWS/2DvCKMvA0a+Y=
golang.org/x/tools v0.30.1-0.20250221230316-5055f70f240c/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
golang.org/x/tools/gopls v0.18.1 h1:2xJBNzdImS5u/kV/ZzqDLSvlBSeZX+pWY9uKVP7Pask=
golang.org/x/tools/gopls v0.18.1/go.mod h1:UdNu0zeGjkmjL9L20QDszXu9tP2798pUIHC980kOBrI=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=

@ -4,4 +4,4 @@ package debug
import "runtime"
func SetFinalizer(obj, finalizer interface{}) { runtime.SetFinalizer(obj, finalizer) }
func SetFinalizer(obj, finalizer any) { runtime.SetFinalizer(obj, finalizer) }

@ -139,10 +139,7 @@ func (r *mergedRowGroupRows) Close() (lastErr error) {
func (r *mergedRowGroupRows) ReadRows(rows []Row) (int, error) {
for r.rowIndex < r.seekToRow {
n := int(r.seekToRow - r.rowIndex)
if n > len(rows) {
n = len(rows)
}
n := min(int(r.seekToRow-r.rowIndex), len(rows))
n, err := r.readInternal(rows[:n])
if err != nil {
return 0, err
@ -284,11 +281,11 @@ func (m *mergedRowReader) Swap(i, j int) {
m.readers[i], m.readers[j] = m.readers[j], m.readers[i]
}
func (m *mergedRowReader) Push(x interface{}) {
func (m *mergedRowReader) Push(x any) {
panic("NOT IMPLEMENTED")
}
func (m *mergedRowReader) Pop() interface{} {
func (m *mergedRowReader) Pop() any {
i := len(m.readers) - 1
r := m.readers[i]
m.readers = m.readers[:i]
@ -424,7 +421,7 @@ func (m *mergeBuffer) Less(i, j int) bool {
return m.compare(x[m.head[i]], y[m.head[j]]) == -1
}
func (m *mergeBuffer) Pop() interface{} {
func (m *mergeBuffer) Pop() any {
m.len--
// We don't use the popped value.
return nil
@ -439,7 +436,7 @@ func (m *mergeBuffer) Swap(i, j int) {
m.head[i], m.head[j] = m.head[j], m.head[i]
}
func (m *mergeBuffer) Push(x interface{}) {
func (m *mergeBuffer) Push(x any) {
panic("NOT IMPLEMENTED")
}
@ -468,7 +465,7 @@ func (m *mergeBuffer) WriteRowsTo(w RowWriter) (n int64, err error) {
}
func (m *mergeBuffer) left() bool {
for i := 0; i < m.len; i++ {
for i := range m.len {
if m.head[i] < len(m.buffer[i]) {
return true
}

@ -452,8 +452,12 @@ func mapKeyValueOf(node Node) Node {
panic("node with logical type MAP is not composed of a repeated .key_value group (or .map group) with key and value fields")
}
func encodingOf(node Node) encoding.Encoding {
func encodingOf(node Node, defaultEncodings map[Kind]encoding.Encoding) encoding.Encoding {
encoding := node.Encoding()
kind := node.Type().Kind()
if encoding == nil && defaultEncodings != nil {
encoding = defaultEncodings[kind]
}
// The parquet-format documentation states that the
// DELTA_LENGTH_BYTE_ARRAY is always preferred to PLAIN when
// encoding BYTE_ARRAY values. We apply it as a default if
@ -461,7 +465,7 @@ func encodingOf(node Node) encoding.Encoding {
// the opportunity to override this behavior if needed.
//
// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6
if node.Type().Kind() == ByteArray && encoding == nil {
if kind == ByteArray && encoding == nil {
encoding = &DeltaLengthByteArray
}
if encoding == nil {

@ -21,7 +21,7 @@ type nullIndexFunc func(bits []uint64, rows sparse.Array)
func nullIndex[T comparable](bits []uint64, rows sparse.Array) {
var zero T
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
v := *(*T)(rows.Index(i))
if v != zero {
x := uint(i) / 64
@ -100,7 +100,7 @@ func nullIndexFuncOf(t reflect.Type) nullIndexFunc {
func nullIndexFuncOfByteArray(n int) nullIndexFunc {
return func(bits []uint64, rows sparse.Array) {
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := (*byte)(rows.Index(i))
b := unsafe.Slice(p, n)
if !isZero(b) {

@ -49,7 +49,7 @@ func nullIndexString(bits []uint64, rows sparse.Array) {
}
func nullIndexSlice(bits []uint64, rows sparse.Array) {
for i := 0; i < rows.Len(); i++ {
for i := range rows.Len() {
p := *(**struct{})(rows.Index(i))
b := uint64(0)
if p != nil {

@ -2,6 +2,7 @@ package parquet
import (
"bytes"
"errors"
"fmt"
"io"
@ -171,6 +172,8 @@ func (pages *asyncPages) Close() (err error) {
pages.done = nil
}
for p := range pages.read {
Release(p.page)
// Capture the last error, which is the value returned from closing the
// underlying Pages instance.
err = p.err
@ -196,6 +199,9 @@ func (pages *asyncPages) ReadPage() (Page, error) {
if p.version == pages.version {
return p.page, p.err
}
// the page is being dropped here b/c it was the wrong version
Release(p.page)
}
}
@ -255,18 +261,23 @@ func readPages(pages Pages, read chan<- asyncPage, seek <-chan asyncSeek, init,
seekTo.rowIndex = -1
}
var err error
for {
var page Page
var err error
if seekTo.rowIndex >= 0 {
err = pages.SeekToRow(seekTo.rowIndex)
if err == nil {
seekTo.rowIndex = -1
continue
// if err is not fatal we consider the underlying pages object to be in an unknown state
// and we only repeatedly return that error
if !isFatalError(err) {
if seekTo.rowIndex >= 0 {
err = pages.SeekToRow(seekTo.rowIndex)
if err == nil {
seekTo.rowIndex = -1
continue
}
} else {
page, err = pages.ReadPage()
}
} else {
page, err = pages.ReadPage()
}
select {
@ -284,6 +295,10 @@ func readPages(pages Pages, read chan<- asyncPage, seek <-chan asyncSeek, init,
}
}
func isFatalError(err error) bool {
return err != nil && err != io.EOF && !errors.Is(err, ErrSeekOutOfRange) // ErrSeekOutOfRange can be returned from FilePages but is recoverable
}
type singlePage struct {
page Page
seek int64
@ -354,7 +369,7 @@ type errorPage struct {
columnIndex int
}
func newErrorPage(typ Type, columnIndex int, msg string, args ...interface{}) *errorPage {
func newErrorPage(typ Type, columnIndex int, msg string, args ...any) *errorPage {
return &errorPage{
typ: typ,
err: fmt.Errorf(msg, args...),
@ -595,7 +610,7 @@ func (page *booleanPage) valueAt(i int) bool {
}
func (page *booleanPage) min() bool {
for i := 0; i < int(page.numValues); i++ {
for i := range int(page.numValues) {
if !page.valueAt(i) {
return false
}
@ -604,7 +619,7 @@ func (page *booleanPage) min() bool {
}
func (page *booleanPage) max() bool {
for i := 0; i < int(page.numValues); i++ {
for i := range int(page.numValues) {
if page.valueAt(i) {
return true
}
@ -615,7 +630,7 @@ func (page *booleanPage) max() bool {
func (page *booleanPage) bounds() (min, max bool) {
hasFalse, hasTrue := false, false
for i := 0; i < int(page.numValues); i++ {
for i := range int(page.numValues) {
v := page.valueAt(i)
if v {
hasTrue = true

@ -6,8 +6,7 @@ import (
"io"
"strconv"
"strings"
"github.com/olekukonko/tablewriter"
"text/tabwriter"
)
func PrintSchema(w io.Writer, name string, node Node) error {
@ -209,12 +208,11 @@ func sprint(name string, node Node) string {
func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
schema := rowGroup.Schema()
pw := &printWriter{writer: w}
tw := tablewriter.NewWriter(pw)
tw := tabwriter.NewWriter(pw, 0, 0, 2, ' ', 0)
columns := schema.Columns()
header := make([]string, len(columns))
footer := make([]string, len(columns))
alignment := make([]int, len(columns))
for i, column := range columns {
leaf, _ := schema.Lookup(column...)
@ -222,14 +220,25 @@ func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
header[i] = strings.Join(column, ".")
footer[i] = columnType.String()
}
switch columnType.Kind() {
case ByteArray:
alignment[i] = tablewriter.ALIGN_LEFT
default:
alignment[i] = tablewriter.ALIGN_RIGHT
// Print header
for i, h := range header {
if i > 0 {
pw.WriteString("\t")
}
pw.WriteString(h)
}
pw.WriteString("\n")
// Print separator line
for i := range header {
if i > 0 {
pw.WriteString("\t")
}
pw.WriteString(strings.Repeat("-", len(header[i])))
}
pw.WriteString("\n")
rowbuf := make([]Row, defaultRowBufferSize)
cells := make([]string, 0, len(columns))
@ -253,11 +262,17 @@ func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
cells[columnIndex] = value.String()
} else {
cells[columnIndex] += "," + value.String()
alignment[columnIndex] = tablewriter.ALIGN_LEFT
}
}
tw.Append(cells)
// Print row
for i, cell := range cells {
if i > 0 {
tw.Write([]byte("\t"))
}
tw.Write([]byte(cell))
}
tw.Write([]byte("\n"))
}
if err != nil {
@ -268,14 +283,23 @@ func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
}
}
tw.SetAutoFormatHeaders(false)
tw.SetColumnAlignment(alignment)
tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
tw.SetFooterAlignment(tablewriter.ALIGN_LEFT)
tw.SetHeader(header)
tw.SetFooter(footer)
tw.Render()
// Print footer
for i := range header {
if i > 0 {
pw.WriteString("\t")
}
pw.WriteString(strings.Repeat("-", len(header[i])))
}
pw.WriteString("\n")
for i, f := range footer {
if i > 0 {
pw.WriteString("\t")
}
pw.WriteString(f)
}
pw.WriteString("\n")
tw.Flush()
fmt.Fprintf(pw, "%d rows\n\n", rowGroup.NumRows())
return pw.err
}

@ -391,7 +391,7 @@ func (r *Reader) Reset() {
// of the underlying parquet file or an error will be returned.
//
// The method returns io.EOF when no more rows can be read from r.
func (r *Reader) Read(row interface{}) error {
func (r *Reader) Read(row any) error {
if rowType := dereference(reflect.TypeOf(row)); rowType.Kind() == reflect.Struct {
if r.seen != rowType {
if err := r.updateReadSchema(rowType); err != nil {

@ -661,7 +661,7 @@ func reconstructFuncOfRepeated(columnIndex int16, node Node) (int16, reconstruct
value = setMakeSlice(value, n)
for i := 0; i < n; i++ {
for i := range n {
for j, column := range values {
column = column[:cap(column)]
if len(column) == 0 {
@ -749,7 +749,7 @@ func reconstructFuncOfMap(columnIndex int16, node Node) (int16, reconstructFunc)
}
elem := reflect.New(keyValueElem).Elem()
for i := 0; i < n; i++ {
for range n {
for j, column := range values {
column = column[:cap(column)]
k := 1

@ -75,6 +75,8 @@ func (v *onceValue[T]) load(f func() *T) *T {
// delta | enables delta encoding on the parquet column
// list | for slice types, use the parquet LIST logical type
// enum | for string types, use the parquet ENUM logical type
// bytes | for string types, use no parquet logical type
// string | for []byte types, use the parquet STRING logical type
// uuid | for string and [16]byte types, use the parquet UUID logical type
// decimal | for int32, int64 and [n]byte types, use the parquet DECIMAL logical type
// date | for int32 types use the DATE logical type
@ -130,7 +132,7 @@ func (v *onceValue[T]) load(f func() *T) *T {
// }
//
// The schema name is the Go type name of the value.
func SchemaOf(model interface{}) *Schema {
func SchemaOf(model any) *Schema {
return schemaOf(dereference(reflect.TypeOf(model)))
}
@ -262,7 +264,7 @@ func (s *Schema) GoType() reflect.Type { return s.root.GoType() }
//
// The method panics is the structure of the go value does not match the
// parquet schema.
func (s *Schema) Deconstruct(row Row, value interface{}) Row {
func (s *Schema) Deconstruct(row Row, value any) Row {
state := s.lazyLoadState()
funcs := s.lazyLoadFuncs()
columns := make([][]Value, len(state.columns))
@ -291,7 +293,7 @@ func (s *Schema) Deconstruct(row Row, value interface{}) Row {
//
// The method panics if the structure of the go value and parquet row do not
// match.
func (s *Schema) Reconstruct(value interface{}, row Row) error {
func (s *Schema) Reconstruct(value any, row Row) error {
v := reflect.ValueOf(value)
if !v.IsValid() {
panic("cannot reconstruct row into go value of type <nil>")
@ -349,7 +351,7 @@ func (v *valuesSliceBuffer) release() {
}
var valuesSliceBufferPool = &sync.Pool{
New: func() interface{} {
New: func() any {
return &valuesSliceBuffer{
// use 64 as a cache friendly base estimate of max column numbers we will be
// reading.
@ -957,6 +959,22 @@ func makeNodeOf(t reflect.Type, name string, tag []string) Node {
}
setNode(Decimal(scale, precision, baseType))
case "string":
switch {
case t.Kind() == reflect.String:
case t.Kind() == reflect.Slice && t.Elem().Kind() == reflect.Uint8:
default:
throwInvalidTag(t, name, option)
}
setNode(String())
case "bytes":
switch {
case t.Kind() == reflect.String:
case t.Kind() == reflect.Slice && t.Elem().Kind() == reflect.Uint8:
default:
throwInvalidTag(t, name, option)
}
setNode(Leaf(ByteArrayType))
case "date":
switch t.Kind() {
case reflect.Int32:

@ -103,7 +103,7 @@ func binarySearch(index ColumnIndex, value Value, cmp func(Value, Value) int) in
func linearSearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
n := index.NumPages()
for i := 0; i < n; i++ {
for i := range n {
min := index.MinValue(i)
max := index.MaxValue(i)

@ -60,6 +60,7 @@ func NewSortingWriter[T any](output io.Writer, sortRowCount int64, options ...Wr
Schema: config.Schema,
Compression: config.Compression,
Sorting: config.Sorting,
Encodings: config.Encodings,
}),
output: NewGenericWriter[T](output, config),
maxRows: sortRowCount,
@ -222,3 +223,9 @@ func (w *SortingWriter[T]) sortAndWriteBufferedRows() error {
w.numRows += n
return nil
}
// File returns a FileView of the written parquet file.
// Only available after Close is called.
func (w *SortingWriter[T]) File() FileView {
return w.output.File()
}

@ -111,10 +111,7 @@ func (t *transformRowWriter) WriteRows(rows []Row) (n int, err error) {
}
for n < len(rows) {
numRows := len(rows) - n
if numRows > len(t.rows) {
numRows = len(t.rows)
}
numRows := min(len(rows)-n, len(t.rows))
if err := t.writeRows(rows[n : n+numRows]); err != nil {
return n, err
}

@ -348,14 +348,19 @@ func (t booleanType) ConvertValue(val Value, typ Type) (Value, error) {
type int32Type struct{}
func (t int32Type) String() string { return "INT32" }
func (t int32Type) Kind() Kind { return Int32 }
func (t int32Type) Length() int { return 32 }
func (t int32Type) EstimateSize(n int) int { return 4 * n }
func (t int32Type) EstimateNumValues(n int) int { return n / 4 }
func (t int32Type) Compare(a, b Value) int { return compareInt32(a.int32(), b.int32()) }
func (t int32Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
func (t int32Type) LogicalType() *format.LogicalType { return nil }
func (t int32Type) String() string { return "INT32" }
func (t int32Type) Kind() Kind { return Int32 }
func (t int32Type) Length() int { return 32 }
func (t int32Type) EstimateSize(n int) int { return 4 * n }
func (t int32Type) EstimateNumValues(n int) int { return n / 4 }
func (t int32Type) Compare(a, b Value) int { return compareInt32(a.int32(), b.int32()) }
func (t int32Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
func (t int32Type) LogicalType() *format.LogicalType {
return &format.LogicalType{Integer: &format.IntType{
BitWidth: 32,
IsSigned: true,
}}
}
func (t int32Type) ConvertedType() *deprecated.ConvertedType { return nil }
func (t int32Type) PhysicalType() *format.Type { return &physicalTypes[Int32] }
@ -431,14 +436,19 @@ func (t int32Type) ConvertValue(val Value, typ Type) (Value, error) {
type int64Type struct{}
func (t int64Type) String() string { return "INT64" }
func (t int64Type) Kind() Kind { return Int64 }
func (t int64Type) Length() int { return 64 }
func (t int64Type) EstimateSize(n int) int { return 8 * n }
func (t int64Type) EstimateNumValues(n int) int { return n / 8 }
func (t int64Type) Compare(a, b Value) int { return compareInt64(a.int64(), b.int64()) }
func (t int64Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
func (t int64Type) LogicalType() *format.LogicalType { return nil }
func (t int64Type) String() string { return "INT64" }
func (t int64Type) Kind() Kind { return Int64 }
func (t int64Type) Length() int { return 64 }
func (t int64Type) EstimateSize(n int) int { return 8 * n }
func (t int64Type) EstimateNumValues(n int) int { return n / 8 }
func (t int64Type) Compare(a, b Value) int { return compareInt64(a.int64(), b.int64()) }
func (t int64Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
func (t int64Type) LogicalType() *format.LogicalType {
return &format.LogicalType{Integer: &format.IntType{
BitWidth: 64,
IsSigned: true,
}}
}
func (t int64Type) ConvertedType() *deprecated.ConvertedType { return nil }
func (t int64Type) PhysicalType() *format.Type { return &physicalTypes[Int64] }

@ -173,7 +173,7 @@ func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, e
// The repetition and definition levels of the returned value are both zero.
//
// The function panics if the Go value cannot be represented in parquet.
func ValueOf(v interface{}) Value {
func ValueOf(v any) Value {
k := Kind(-1)
t := reflect.TypeOf(v)

@ -100,15 +100,28 @@ func NewGenericWriter[T any](output io.Writer, options ...WriterOption) *Generic
schema := config.Schema
t := typeOf[T]()
var genWriteErr error
if schema == nil && t != nil {
schema = schemaOf(dereference(t))
if len(schema.Columns()) == 0 {
genWriteErr = fmt.Errorf("cannot write %v: it has no columns (maybe it has no exported fields)", t)
}
config.Schema = schema
} else if schema != nil && len(schema.Columns()) == 0 {
genWriteErr = fmt.Errorf("cannot write %v: schema has no columns", t)
}
if config.Schema == nil {
panic("generic writer must be instantiated with schema or concrete type.")
}
var writeFn writeFunc[T]
if genWriteErr != nil {
writeFn = func(*GenericWriter[T], []T) (int, error) { return 0, genWriteErr }
} else {
writeFn = writeFuncOf[T](t, config.Schema)
}
return &GenericWriter[T]{
base: Writer{
output: output,
@ -116,7 +129,7 @@ func NewGenericWriter[T any](output io.Writer, options ...WriterOption) *Generic
schema: schema,
writer: newWriter(output, config),
},
write: writeFuncOf[T](t, config.Schema),
write: writeFn,
}
}
@ -182,7 +195,7 @@ func (w *GenericWriter[T]) Write(rows []T) (int, error) {
for _, c := range w.base.writer.columns {
if c.columnBuffer.Size() >= int64(c.bufferSize) {
if err := c.flush(); err != nil {
if err := c.Flush(); err != nil {
return n, err
}
}
@ -221,7 +234,7 @@ func (w *GenericWriter[T]) Schema() *Schema {
return w.base.Schema()
}
func (w *GenericWriter[T]) ColumnWriters() []ValueWriter {
func (w *GenericWriter[T]) ColumnWriters() []*ColumnWriter {
return w.base.ColumnWriters()
}
@ -385,7 +398,7 @@ func (w *Writer) Reset(output io.Writer) {
// and decompose it into a set of columns and values. If no schema were passed
// to NewWriter, it is deducted from the Go type of the row, which then have to
// be a struct or pointer to struct.
func (w *Writer) Write(row interface{}) error {
func (w *Writer) Write(row any) error {
if w.schema == nil {
w.configure(SchemaOf(row))
}
@ -491,7 +504,7 @@ func (w *Writer) SetKeyValueMetadata(key, value string) {
// ColumnWriters returns writers for each column. This allows applications to
// write values directly to each column instead of having to first assemble
// values into rows to use WriteRows.
func (w *Writer) ColumnWriters() []ValueWriter { return w.writer.valueWriters }
func (w *Writer) ColumnWriters() []*ColumnWriter { return w.writer.columns }
type writerFileView struct {
writer *writer
@ -572,11 +585,10 @@ type writer struct {
createdBy string
metadata []format.KeyValue
columns []*writerColumn
valueWriters []ValueWriter
columnChunk []format.ColumnChunk
columnIndex []format.ColumnIndex
offsetIndex []format.OffsetIndex
columns []*ColumnWriter
columnChunk []format.ColumnChunk
columnIndex []format.ColumnIndex
offsetIndex []format.OffsetIndex
columnOrders []format.ColumnOrder
schemaElements []format.SchemaElement
@ -658,7 +670,7 @@ func newWriter(output io.Writer, config *WriterConfig) *writer {
buffers := new(writerBuffers)
forEachLeafColumnOf(config.Schema, func(leaf leafColumn) {
encoding := encodingOf(leaf.node)
encoding := encodingOf(leaf.node, config.Encodings)
dictionary := Dictionary(nil)
columnType := leaf.node.Type()
columnIndex := int(leaf.columnIndex)
@ -677,7 +689,7 @@ func newWriter(output io.Writer, config *WriterConfig) *writer {
columnType = dictionary.Type()
}
c := &writerColumn{
c := &ColumnWriter{
buffers: buffers,
pool: config.ColumnPageBuffers,
columnPath: leaf.path,
@ -764,10 +776,6 @@ func newWriter(output io.Writer, config *WriterConfig) *writer {
for i, c := range w.columns {
w.columnOrders[i] = *c.columnType.ColumnOrder()
}
w.valueWriters = make([]ValueWriter, len(w.columns))
for i, c := range w.columns {
w.valueWriters[i] = c
}
return w
}
@ -853,7 +861,6 @@ func (w *writer) writeFileFooter() error {
protocol := new(thrift.CompactProtocol)
encoder := thrift.NewEncoder(protocol.NewWriter(&w.writer))
w.columnIndex = w.columnIndex[:0]
for i, columnIndexes := range w.columnIndexes {
rowGroup := &w.rowGroups[i]
for j := range columnIndexes {
@ -864,10 +871,8 @@ func (w *writer) writeFileFooter() error {
}
column.ColumnIndexLength = int32(w.writer.offset - column.ColumnIndexOffset)
}
w.columnIndex = append(w.columnIndex, columnIndexes...)
}
w.offsetIndex = w.offsetIndex[:0]
for i, offsetIndexes := range w.offsetIndexes {
rowGroup := &w.rowGroups[i]
for j := range offsetIndexes {
@ -878,7 +883,6 @@ func (w *writer) writeFileFooter() error {
}
column.OffsetIndexLength = int32(w.writer.offset - column.OffsetIndexOffset)
}
w.offsetIndex = append(w.offsetIndex, offsetIndexes...)
}
numRows := int64(0)
@ -917,6 +921,9 @@ func (w *writer) writeFileFooter() error {
}
func (w *writer) writeRowGroup(rowGroupSchema *Schema, rowGroupSortingColumns []SortingColumn) (int64, error) {
if len(w.columns) == 0 {
return 0, nil
}
numRows := w.columns[0].totalRowCount()
if numRows == 0 {
return 0, nil
@ -937,7 +944,7 @@ func (w *writer) writeRowGroup(rowGroupSchema *Schema, rowGroupSortingColumns []
}()
for _, c := range w.columns {
if err := c.flush(); err != nil {
if err := c.Flush(); err != nil {
return 0, err
}
if err := c.flushFilterPages(); err != nil {
@ -1067,7 +1074,7 @@ func (w *writer) WriteRows(rows []Row) (int, error) {
for i, values := range w.values {
if len(values) > 0 {
if err := w.columns[i].writeRows(values); err != nil {
if _, err := w.columns[i].WriteRowValues(values); err != nil {
return 0, err
}
}
@ -1123,7 +1130,7 @@ func (w *writer) writeRows(numRows int, write func(i, j int) (int, error)) (int,
// The WriteValues method is intended to work in pair with WritePage to allow
// programs to target writing values to specific columns of of the writer.
func (w *writer) WriteValues(values []Value) (numValues int, err error) {
return w.columns[values[0].Column()].WriteValues(values)
return w.columns[values[0].Column()].writeValues(values)
}
// One writerBuffers is used by each writer instance, the memory buffers here
@ -1221,7 +1228,8 @@ func (wb *writerBuffers) swapPageAndScratchBuffers() {
wb.page, wb.scratch = wb.scratch, wb.page[:0]
}
type writerColumn struct {
// ColumnWriter writes values for a single column to underlying medium.
type ColumnWriter struct {
pool BufferPool
pageBuffer io.ReadWriteSeeker
numPages int
@ -1259,7 +1267,7 @@ type writerColumn struct {
offsetIndex *format.OffsetIndex
}
func (c *writerColumn) reset() {
func (c *ColumnWriter) reset() {
if c.columnBuffer != nil {
c.columnBuffer.Reset()
}
@ -1291,7 +1299,7 @@ func (c *writerColumn) reset() {
c.offsetIndex.PageLocations = c.offsetIndex.PageLocations[:0]
}
func (c *writerColumn) totalRowCount() int64 {
func (c *ColumnWriter) totalRowCount() int64 {
n := c.numRows
if c.columnBuffer != nil {
n += int64(c.columnBuffer.Len())
@ -1299,7 +1307,11 @@ func (c *writerColumn) totalRowCount() int64 {
return n
}
func (c *writerColumn) flush() (err error) {
// Flush writes any buffered data to the underlying [io.Writer].
func (c *ColumnWriter) Flush() (err error) {
if c.columnBuffer == nil {
return nil
}
if c.columnBuffer.Len() > 0 {
defer c.columnBuffer.Reset()
_, err = c.writeDataPage(c.columnBuffer.Page())
@ -1307,7 +1319,7 @@ func (c *writerColumn) flush() (err error) {
return err
}
func (c *writerColumn) flushFilterPages() (err error) {
func (c *ColumnWriter) flushFilterPages() (err error) {
if c.columnFilter == nil {
return nil
}
@ -1375,7 +1387,7 @@ func (c *writerColumn) flushFilterPages() (err error) {
decoder := thrift.NewDecoder(c.header.protocol.NewReader(pageReader))
for i := 0; i < c.numPages; i++ {
for range c.numPages {
header := new(format.PageHeader)
if err := decoder.Decode(header); err != nil {
return err
@ -1409,7 +1421,7 @@ func (c *writerColumn) flushFilterPages() (err error) {
return nil
}
func (c *writerColumn) resizeBloomFilter(numValues int64) {
func (c *ColumnWriter) resizeBloomFilter(numValues int64) {
filterSize := c.columnFilter.Size(numValues)
if cap(c.filter) < filterSize {
c.filter = make([]byte, filterSize)
@ -1421,7 +1433,7 @@ func (c *writerColumn) resizeBloomFilter(numValues int64) {
}
}
func (c *writerColumn) newColumnBuffer() ColumnBuffer {
func (c *ColumnWriter) newColumnBuffer() ColumnBuffer {
column := c.columnType.NewColumnBuffer(int(c.bufferIndex), c.columnType.EstimateNumValues(int(c.bufferSize)))
switch {
case c.maxRepetitionLevel > 0:
@ -1432,29 +1444,54 @@ func (c *writerColumn) newColumnBuffer() ColumnBuffer {
return column
}
func (c *writerColumn) writeRows(rows []Value) error {
// WriteRowValues writes entire rows to the column. On success, this returns the
// number of rows written (not the number of values).
//
// Unlike ValueWriter, where arbitrary values may be written regardless of row
// boundaries, this method requires whole rows. This is because the written
// values may be automatically flushed to a data page, based on the writer's
// configured page buffer size, and a single row is not permitted to span two
// pages.
func (c *ColumnWriter) WriteRowValues(rows []Value) (int, error) {
var startingRows int64
if c.columnBuffer == nil {
// Lazily create the row group column so we don't need to allocate it if
// rows are not written individually to the column.
c.columnBuffer = c.newColumnBuffer()
} else {
startingRows = int64(c.columnBuffer.Len())
}
if _, err := c.columnBuffer.WriteValues(rows); err != nil {
return err
return 0, err
}
numRows := int(int64(c.columnBuffer.Len()) - startingRows)
if c.columnBuffer.Size() >= int64(c.bufferSize) {
return c.flush()
return numRows, c.Flush()
}
return numRows, nil
}
// Close closes the column writer and releases all dependent resources.
// New values should not be written after the ColumnWriter is closed.
func (c *ColumnWriter) Close() (err error) {
if c.columnBuffer == nil {
return nil
}
if err := c.Flush(); err != nil {
return err
}
c.columnBuffer = nil
return nil
}
func (c *writerColumn) WriteValues(values []Value) (numValues int, err error) {
func (c *ColumnWriter) writeValues(values []Value) (numValues int, err error) {
if c.columnBuffer == nil {
c.columnBuffer = c.newColumnBuffer()
}
return c.columnBuffer.WriteValues(values)
}
func (c *writerColumn) writeBloomFilter(w io.Writer) error {
func (c *ColumnWriter) writeBloomFilter(w io.Writer) error {
e := thrift.NewEncoder(c.header.protocol.NewWriter(w))
h := bloomFilterHeader(c.columnFilter)
h.NumBytes = int32(len(c.filter))
@ -1465,7 +1502,7 @@ func (c *writerColumn) writeBloomFilter(w io.Writer) error {
return err
}
func (c *writerColumn) writeDataPage(page Page) (int64, error) {
func (c *ColumnWriter) writeDataPage(page Page) (int64, error) {
numValues := page.NumValues()
if numValues == 0 {
return 0, nil
@ -1578,7 +1615,7 @@ func (c *writerColumn) writeDataPage(page Page) (int64, error) {
return numValues, nil
}
func (c *writerColumn) writeDictionaryPage(output io.Writer, dict Dictionary) (err error) {
func (c *ColumnWriter) writeDictionaryPage(output io.Writer, dict Dictionary) (err error) {
buf := c.buffers
buf.reset()
@ -1623,14 +1660,14 @@ func (c *writerColumn) writeDictionaryPage(output io.Writer, dict Dictionary) (e
return nil
}
func (w *writerColumn) writePageToFilter(page Page) (err error) {
func (c *ColumnWriter) writePageToFilter(page Page) (err error) {
pageType := page.Type()
pageData := page.Data()
w.filter, err = pageType.Encode(w.filter, pageData, w.columnFilter.Encoding())
c.filter, err = pageType.Encode(c.filter, pageData, c.columnFilter.Encoding())
return err
}
func (c *writerColumn) writePageTo(size int64, writeTo func(io.Writer) (int64, error)) (err error) {
func (c *ColumnWriter) writePageTo(size int64, writeTo func(io.Writer) (int64, error)) (err error) {
if c.pageBuffer == nil {
c.pageBuffer = c.pool.GetBuffer()
defer func() {
@ -1654,7 +1691,7 @@ func (c *writerColumn) writePageTo(size int64, writeTo func(io.Writer) (int64, e
return nil
}
func (c *writerColumn) makePageStatistics(page Page) format.Statistics {
func (c *ColumnWriter) makePageStatistics(page Page) format.Statistics {
numNulls := page.NumNulls()
minValue, maxValue, _ := page.Bounds()
minValueBytes := minValue.Bytes()
@ -1668,7 +1705,7 @@ func (c *writerColumn) makePageStatistics(page Page) format.Statistics {
}
}
func (c *writerColumn) recordPageStats(headerSize int32, header *format.PageHeader, page Page) {
func (c *ColumnWriter) recordPageStats(headerSize int32, header *format.PageHeader, page Page) {
uncompressedSize := headerSize + header.UncompressedPageSize
compressedSize := headerSize + header.CompressedPageSize
@ -1743,10 +1780,8 @@ func (c *writerColumn) recordPageStats(headerSize int32, header *format.PageHead
}
func addEncoding(encodings []format.Encoding, add format.Encoding) []format.Encoding {
for _, enc := range encodings {
if enc == add {
return encodings
}
if slices.Contains(encodings, add) {
return encodings
}
return append(encodings, add)
}
@ -1815,8 +1850,6 @@ var (
_ RowWriter = (*writer)(nil)
_ ValueWriter = (*writer)(nil)
_ ValueWriter = (*writerColumn)(nil)
_ io.ReaderFrom = (*offsetTrackingWriter)(nil)
_ io.StringWriter = (*offsetTrackingWriter)(nil)
)

@ -1466,9 +1466,6 @@ github.com/oklog/run
# github.com/oklog/ulid v1.3.1
## explicit
github.com/oklog/ulid
# github.com/olekukonko/tablewriter v0.0.5
## explicit; go 1.12
github.com/olekukonko/tablewriter
# github.com/open-telemetry/opentelemetry-collector-contrib/internal/exp/metrics v0.116.0
## explicit; go 1.22.0
github.com/open-telemetry/opentelemetry-collector-contrib/internal/exp/metrics/identity
@ -1511,7 +1508,7 @@ github.com/oschwald/geoip2-golang
# github.com/oschwald/maxminddb-golang v1.13.0
## explicit; go 1.21
github.com/oschwald/maxminddb-golang
# github.com/parquet-go/parquet-go v0.25.0
# github.com/parquet-go/parquet-go v0.25.1
## explicit; go 1.22
github.com/parquet-go/parquet-go
github.com/parquet-go/parquet-go/bloom

Loading…
Cancel
Save