diff --git a/go.mod b/go.mod index db248e87179..2f11e0515b0 100644 --- a/go.mod +++ b/go.mod @@ -42,6 +42,7 @@ require ( github.com/klauspost/compress v1.4.1 // indirect github.com/klauspost/cpuid v1.2.0 // indirect github.com/lib/pq v1.2.0 + github.com/linkedin/goavro v2.1.0+incompatible github.com/mattn/go-isatty v0.0.10 github.com/mattn/go-sqlite3 v1.11.0 github.com/opentracing/opentracing-go v1.1.0 @@ -75,6 +76,7 @@ require ( gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d // indirect gopkg.in/ini.v1 v1.46.0 gopkg.in/ldap.v3 v3.0.2 + gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect gopkg.in/macaron.v1 v1.3.4 gopkg.in/mail.v2 v2.3.1 gopkg.in/redis.v5 v5.2.9 diff --git a/go.sum b/go.sum index 62fb51b3dbe..5b5c1fefe0a 100644 --- a/go.sum +++ b/go.sum @@ -97,6 +97,7 @@ github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/flatbuffers v1.11.0 h1:O7CEyB8Cb3/DmtxODGtLHcEvpr81Jm5qLg/hsHnxA2A= github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= @@ -113,10 +114,6 @@ github.com/gosimple/slug v1.4.2 h1:jDmprx3q/9Lfk4FkGZtvzDQ9Cj9eAmsjzeQGp24PeiQ= github.com/gosimple/slug v1.4.2/go.mod h1:ER78kgg1Mv0NQGlXiDe57DpCyfbNywXXZ9mIorhxAf0= github.com/grafana/grafana-plugin-model v0.0.0-20190930120109-1fc953a61fb4 h1:SPdxCL9BChFTlyi0Khv64vdCW4TMna8+sxL7+Chx+Ag= github.com/grafana/grafana-plugin-model v0.0.0-20190930120109-1fc953a61fb4/go.mod h1:nc0XxBzjeGcrMltCDw269LoWF9S8ibhgxolCdA1R8To= -github.com/grafana/grafana-plugin-sdk-go v0.2.0 h1:MgcTjCuzIkZcjb/2vCPK1RvLEHfRnQtFK7AF0W3SQm0= -github.com/grafana/grafana-plugin-sdk-go v0.2.0/go.mod h1:yA268OaX+C71ubT39tyACEfFwyhEzS1kbEVHUCgkKS8= -github.com/grafana/grafana-plugin-sdk-go v0.3.1-0.20191125180836-d77f6ffe8e05 h1:COdehD2bs2CJ3zrGAOueGrqCOaCG/M9aYiO4y+J4MUk= -github.com/grafana/grafana-plugin-sdk-go v0.3.1-0.20191125180836-d77f6ffe8e05/go.mod h1:yA268OaX+C71ubT39tyACEfFwyhEzS1kbEVHUCgkKS8= github.com/grafana/grafana-plugin-sdk-go v0.4.0 h1:bypT7gwGL9i584JEUQ1twcLxoUPO/60XW3VM8VYndYI= github.com/grafana/grafana-plugin-sdk-go v0.4.0/go.mod h1:yA268OaX+C71ubT39tyACEfFwyhEzS1kbEVHUCgkKS8= github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI= @@ -167,6 +164,8 @@ github.com/lib/pq v1.0.0 h1:X5PMW56eZitiTeO7tKzZxFCSpbFZJtkMMooicw2us9A= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/linkedin/goavro v2.1.0+incompatible h1:DV2aUlj2xZiuxQyvag8Dy7zjY69ENjS66bWkSfdpddY= +github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MElg9tdeLbINjLjiM2xZFYM= github.com/lunny/log v0.0.0-20160921050905-7887c61bf0de/go.mod h1:3q8WtuPQsoRbatJuy3nvq/hRSvuBJrHHr+ybPPiNvHQ= github.com/lunny/nodb v0.0.0-20160621015157-fc1ef06ad4af/go.mod h1:Cqz6pqow14VObJ7peltM+2n3PWOz7yTrfUuGbVFkzN0= github.com/mattetti/filebuffer v1.0.0 h1:ixTvQ0JjBTwWbdpDZ98lLrydo7KRi8xNRIi5RFszsbY= @@ -391,6 +390,8 @@ gopkg.in/ini.v1 v1.46.0 h1:VeDZbLYGaupuvIrsYCEOe/L/2Pcs5n7hdO1ZTjporag= gopkg.in/ini.v1 v1.46.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ldap.v3 v3.0.2 h1:R6RBtabK6e1GO0eQKtkyOFbAHO73QesLzI2w2DZ6b9w= gopkg.in/ldap.v3 v3.0.2/go.mod h1:oxD7NyBuxchC+SgJDE1Q5Od05eGt29SDQVBmV+HYbzw= +gopkg.in/linkedin/goavro.v1 v1.0.5 h1:BJa69CDh0awSsLUmZ9+BowBdokpduDZSM9Zk8oKHfN4= +gopkg.in/linkedin/goavro.v1 v1.0.5/go.mod h1:Aw5GdAbizjOEl0kAMHV9iHmA8reZzW/OKuJAl4Hb9F0= gopkg.in/macaron.v1 v1.3.4 h1:HvIscOwxhFhx3swWM/979wh2QMYyuXrNmrF9l+j3HZs= gopkg.in/macaron.v1 v1.3.4/go.mod h1:/RoHTdC8ALpyJ3+QR36mKjwnT1F1dyYtsGM9Ate6ZFI= gopkg.in/mail.v2 v2.3.1 h1:WYFn/oANrAGP2C0dcV6/pbkPzv8yGzqTjPmTeO7qoXk= diff --git a/pkg/extensions/main.go b/pkg/extensions/main.go index 865762a83dd..8e33b1c4362 100644 --- a/pkg/extensions/main.go +++ b/pkg/extensions/main.go @@ -9,6 +9,7 @@ import ( "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/registry" _ "github.com/jung-kurt/gofpdf" + _ "github.com/linkedin/goavro" _ "github.com/pkg/errors" _ "github.com/robfig/cron" _ "github.com/robfig/cron/v3" diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore new file mode 100644 index 00000000000..042091d9b3b --- /dev/null +++ b/vendor/github.com/golang/snappy/.gitignore @@ -0,0 +1,16 @@ +cmd/snappytool/snappytool +testdata/bench + +# These explicitly listed benchmark data files are for an obsolete version of +# snappy_test.go. +testdata/alice29.txt +testdata/asyoulik.txt +testdata/fireworks.jpeg +testdata/geo.protodata +testdata/html +testdata/html_x_4 +testdata/kppkn.gtb +testdata/lcet10.txt +testdata/paper-100k.pdf +testdata/plrabn12.txt +testdata/urls.10K diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS new file mode 100644 index 00000000000..bcfa19520af --- /dev/null +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -0,0 +1,15 @@ +# This is the official list of Snappy-Go authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization +# The email address is not required for organizations. + +# Please keep the list sorted. + +Damian Gryski +Google Inc. +Jan Mercl <0xjnml@gmail.com> +Rodolfo Carvalho +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS new file mode 100644 index 00000000000..931ae31606f --- /dev/null +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -0,0 +1,37 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Snappy-Go repository. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# The submission process automatically checks to make sure +# that people submitting code are listed in this file (by email address). +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# http://code.google.com/legal/individual-cla-v1.0.html +# http://code.google.com/legal/corporate-cla-v1.0.html +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. + +# Names should be added to this file like so: +# Name + +# Please keep the list sorted. + +Damian Gryski +Jan Mercl <0xjnml@gmail.com> +Kai Backman +Marc-Antoine Ruel +Nigel Tao +Rob Pike +Rodolfo Carvalho +Russ Cox +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE new file mode 100644 index 00000000000..6050c10f4c8 --- /dev/null +++ b/vendor/github.com/golang/snappy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README new file mode 100644 index 00000000000..cea12879a0e --- /dev/null +++ b/vendor/github.com/golang/snappy/README @@ -0,0 +1,107 @@ +The Snappy compression format in the Go programming language. + +To download and install from source: +$ go get github.com/golang/snappy + +Unless otherwise noted, the Snappy-Go source files are distributed +under the BSD-style license found in the LICENSE file. + + + +Benchmarks. + +The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten +or so files, the same set used by the C++ Snappy code (github.com/google/snappy +and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ +3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: + +"go test -test.bench=." + +_UFlat0-8 2.19GB/s ± 0% html +_UFlat1-8 1.41GB/s ± 0% urls +_UFlat2-8 23.5GB/s ± 2% jpg +_UFlat3-8 1.91GB/s ± 0% jpg_200 +_UFlat4-8 14.0GB/s ± 1% pdf +_UFlat5-8 1.97GB/s ± 0% html4 +_UFlat6-8 814MB/s ± 0% txt1 +_UFlat7-8 785MB/s ± 0% txt2 +_UFlat8-8 857MB/s ± 0% txt3 +_UFlat9-8 719MB/s ± 1% txt4 +_UFlat10-8 2.84GB/s ± 0% pb +_UFlat11-8 1.05GB/s ± 0% gaviota + +_ZFlat0-8 1.04GB/s ± 0% html +_ZFlat1-8 534MB/s ± 0% urls +_ZFlat2-8 15.7GB/s ± 1% jpg +_ZFlat3-8 740MB/s ± 3% jpg_200 +_ZFlat4-8 9.20GB/s ± 1% pdf +_ZFlat5-8 991MB/s ± 0% html4 +_ZFlat6-8 379MB/s ± 0% txt1 +_ZFlat7-8 352MB/s ± 0% txt2 +_ZFlat8-8 396MB/s ± 1% txt3 +_ZFlat9-8 327MB/s ± 1% txt4 +_ZFlat10-8 1.33GB/s ± 1% pb +_ZFlat11-8 605MB/s ± 1% gaviota + + + +"go test -test.bench=. -tags=noasm" + +_UFlat0-8 621MB/s ± 2% html +_UFlat1-8 494MB/s ± 1% urls +_UFlat2-8 23.2GB/s ± 1% jpg +_UFlat3-8 1.12GB/s ± 1% jpg_200 +_UFlat4-8 4.35GB/s ± 1% pdf +_UFlat5-8 609MB/s ± 0% html4 +_UFlat6-8 296MB/s ± 0% txt1 +_UFlat7-8 288MB/s ± 0% txt2 +_UFlat8-8 309MB/s ± 1% txt3 +_UFlat9-8 280MB/s ± 1% txt4 +_UFlat10-8 753MB/s ± 0% pb +_UFlat11-8 400MB/s ± 0% gaviota + +_ZFlat0-8 409MB/s ± 1% html +_ZFlat1-8 250MB/s ± 1% urls +_ZFlat2-8 12.3GB/s ± 1% jpg +_ZFlat3-8 132MB/s ± 0% jpg_200 +_ZFlat4-8 2.92GB/s ± 0% pdf +_ZFlat5-8 405MB/s ± 1% html4 +_ZFlat6-8 179MB/s ± 1% txt1 +_ZFlat7-8 170MB/s ± 1% txt2 +_ZFlat8-8 189MB/s ± 1% txt3 +_ZFlat9-8 164MB/s ± 1% txt4 +_ZFlat10-8 479MB/s ± 1% pb +_ZFlat11-8 270MB/s ± 1% gaviota + + + +For comparison (Go's encoded output is byte-for-byte identical to C++'s), here +are the numbers from C++ Snappy's + +make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log + +BM_UFlat/0 2.4GB/s html +BM_UFlat/1 1.4GB/s urls +BM_UFlat/2 21.8GB/s jpg +BM_UFlat/3 1.5GB/s jpg_200 +BM_UFlat/4 13.3GB/s pdf +BM_UFlat/5 2.1GB/s html4 +BM_UFlat/6 1.0GB/s txt1 +BM_UFlat/7 959.4MB/s txt2 +BM_UFlat/8 1.0GB/s txt3 +BM_UFlat/9 864.5MB/s txt4 +BM_UFlat/10 2.9GB/s pb +BM_UFlat/11 1.2GB/s gaviota + +BM_ZFlat/0 944.3MB/s html (22.31 %) +BM_ZFlat/1 501.6MB/s urls (47.78 %) +BM_ZFlat/2 14.3GB/s jpg (99.95 %) +BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) +BM_ZFlat/4 8.3GB/s pdf (83.30 %) +BM_ZFlat/5 903.5MB/s html4 (22.52 %) +BM_ZFlat/6 336.0MB/s txt1 (57.88 %) +BM_ZFlat/7 312.3MB/s txt2 (61.91 %) +BM_ZFlat/8 353.1MB/s txt3 (54.99 %) +BM_ZFlat/9 289.9MB/s txt4 (66.26 %) +BM_ZFlat/10 1.2GB/s pb (19.68 %) +BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go new file mode 100644 index 00000000000..72efb0353dd --- /dev/null +++ b/vendor/github.com/golang/snappy/decode.go @@ -0,0 +1,237 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = errors.New("snappy: corrupt input") + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = errors.New("snappy: decoded block is too large") + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrTooLarge + } + return int(v), n, nil +} + +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 +) + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= len(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + } + return nil, ErrCorrupt +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return &Reader{ + r: r, + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + readHeader bool +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.readHeader = false +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + for { + if r.i < r.j { + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil + } + if !r.readFull(r.buf[:4], true) { + return 0, r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return 0, r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + r.err = ErrUnsupported + return 0, r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return 0, r.err + } + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.decoded[:n], false) { + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return 0, r.err + } + for i := 0; i < len(magicBody); i++ { + if r.buf[i] != magicBody[i] { + r.err = ErrCorrupt + return 0, r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return 0, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return 0, r.err + } + } +} diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go new file mode 100644 index 00000000000..fcd192b849e --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_amd64.go @@ -0,0 +1,14 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +package snappy + +// decode has the same semantics as in decode_other.go. +// +//go:noescape +func decode(dst, src []byte) int diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s new file mode 100644 index 00000000000..e6179f65e35 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_amd64.s @@ -0,0 +1,490 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - AX scratch +// - BX scratch +// - CX length or x +// - DX offset +// - SI &src[s] +// - DI &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. +// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. +TEXT ·decode(SB), NOSPLIT, $48-56 + // Initialize SI, DI and R8-R13. + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, DI + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, SI + MOVQ R11, R13 + ADDQ R12, R13 + +loop: + // for s < len(src) + CMPQ SI, R13 + JEQ end + + // CX = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBLZX (SI), CX + MOVL CX, BX + ANDL $3, BX + CMPL BX, $1 + JAE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + SHRL $2, CX + CMPL CX, $60 + JAE tagLit60Plus + + // case x < 60: + // s++ + INCQ SI + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that CX == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // CX can hold 64 bits, so the increment cannot overflow. + INCQ CX + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // AX = len(dst) - d + // BX = len(src) - s + MOVQ R10, AX + SUBQ DI, AX + MOVQ R13, BX + SUBQ SI, BX + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMPQ CX, $16 + JGT callMemmove + CMPQ AX, $16 + JLT callMemmove + CMPQ BX, $16 + JLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(SI), X0 + MOVOU X0, 0(DI) + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMPQ CX, AX + JGT errCorrupt + CMPQ CX, BX + JGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, R13 + ADDQ R12, R13 + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADDQ CX, SI + SUBQ $58, SI + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // case x == 60: + CMPL CX, $61 + JEQ tagLit61 + JA tagLit62Plus + + // x = uint32(src[s-1]) + MOVBLZX -1(SI), CX + JMP doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVWLZX -2(SI), CX + JMP doLit + +tagLit62Plus: + CMPL CX, $62 + JA tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVWLZX -3(SI), CX + MOVBLZX -1(SI), BX + SHLL $16, BX + ORL BX, CX + JMP doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVL -4(SI), CX + JMP doLit + +// The code above handles literal tags. +// ---------------------------------------- +// The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADDQ $5, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-5])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVLQZX -4(SI), DX + JMP doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADDQ $3, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-3])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVWQZX -2(SI), DX + JMP doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - BX == src[s] & 0x03 + // - CX == src[s] + CMPQ BX, $2 + JEQ tagCopy2 + JA tagCopy4 + + // case tagCopy1: + // s += 2 + ADDQ $2, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVQ CX, DX + ANDQ $0xe0, DX + SHLQ $3, DX + MOVBQZX -1(SI), BX + ORQ BX, DX + + // length = 4 + int(src[s-2])>>2&0x7 + SHRQ $2, CX + ANDQ $7, CX + ADDQ $4, CX + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - CX == length && CX > 0 + // - DX == offset + + // if offset <= 0 { etc } + CMPQ DX, $0 + JLE errCorrupt + + // if d < offset { etc } + MOVQ DI, BX + SUBQ R8, BX + CMPQ BX, DX + JLT errCorrupt + + // if length > len(dst)-d { etc } + MOVQ R10, BX + SUBQ DI, BX + CMPQ CX, BX + JGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVQ R10, R14 + SUBQ DI, R14 + MOVQ DI, R15 + SUBQ DX, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMPQ CX, $16 + JGT slowForwardCopy + CMPQ DX, $8 + JLT slowForwardCopy + CMPQ R14, $16 + JLT slowForwardCopy + MOVQ 0(R15), AX + MOVQ AX, 0(DI) + MOVQ 8(R15), BX + MOVQ BX, 8(DI) + ADDQ CX, DI + JMP loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUBQ $10, R14 + CMPQ CX, R14 + JGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMPQ DX, $8 + JGE fixUpSlowForwardCopy + MOVQ (R15), BX + MOVQ BX, (DI) + SUBQ DX, CX + ADDQ DX, DI + ADDQ DX, DX + JMP makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by DI being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save DI to AX so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVQ DI, AX + ADDQ CX, DI + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + CMPQ CX, $0 + JLE loop + MOVQ (R15), BX + MOVQ BX, (AX) + ADDQ $8, R15 + ADDQ $8, AX + SUBQ $8, CX + JMP finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), BX + MOVB BX, (DI) + INCQ R15 + INCQ DI + DECQ CX + JNZ verySlowForwardCopy + JMP loop + +// The code above handles copy tags. +// ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMPQ DI, R10 + JNE errCorrupt + + // return 0 + MOVQ $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVQ $1, ret+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go new file mode 100644 index 00000000000..8c9f2049bc7 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -0,0 +1,101 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine !gc noasm + +package snappy + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. Unlike + // the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + for end := d + length; d != end; d++ { + dst[d] = dst[d-offset] + } + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go new file mode 100644 index 00000000000..8d393e904bb --- /dev/null +++ b/vendor/github.com/golang/snappy/encode.go @@ -0,0 +1,285 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return dst[:d] +} + +// inputMargin is the minimum number of extra input bytes to keep, inside +// encodeBlock's inner loop. On some architectures, this margin lets us +// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) +// literals can be implemented as a single load to and store from a 16-byte +// register. That literal's actual length can be as short as 1 byte, so this +// can copy up to 15 bytes too much, but that's OK as subsequent iterations of +// the encoding loop will fix up the copy overrun, and this inputMargin ensures +// that we don't overrun the dst and src buffers. +const inputMargin = 16 - 1 + +// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that +// could be encoded with a copy tag. This is the minimum with respect to the +// algorithm used by encodeBlock, not a minimum enforced by the file format. +// +// The encoded output must start with at least a 1 byte literal, as there are +// no previous bytes to copy. A minimal (1 byte) copy after that, generated +// from an emitCopy call in encodeBlock's main loop, would require at least +// another inputMargin bytes, for the reason above: we want any emitLiteral +// calls inside encodeBlock's main loop to use the fast path if possible, which +// requires being able to overrun by inputMargin bytes. Thus, +// minNonLiteralBlockSize equals 1 + 1 + inputMargin. +// +// The C++ code doesn't use this exact threshold, but it could, as discussed at +// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion +// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an +// optimization. It should not affect the encoded form. This is tested by +// TestSameEncodingAsCppShortCopies. +const minNonLiteralBlockSize = 1 + 1 + inputMargin + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + n := uint64(srcLen) + if n > 0xffffffff { + return -1 + } + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + n = 32 + n + n/6 + if n > 0xffffffff { + return -1 + } + return int(n) +} + +var errClosed = errors.New("snappy: Writer is closed") + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + obuf: make([]byte, obufLen), + } +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + ibuf: make([]byte, 0, maxBlockSize), + obuf: make([]byte, obufLen), + } +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +type Writer struct { + w io.Writer + err error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + // + // Its use is optional. For backwards compatibility, Writers created by the + // NewWriter function have ibuf == nil, do not buffer incoming bytes, and + // therefore do not need to be Flush'ed or Close'd. + ibuf []byte + + // obuf is a buffer for the outgoing (compressed) bytes. + obuf []byte + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool +} + +// Reset discards the writer's state and switches the Snappy writer to write to +// w. This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + w.w = writer + w.err = nil + if w.ibuf != nil { + w.ibuf = w.ibuf[:0] + } + w.wroteStreamHeader = false +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if w.ibuf == nil { + // Do not buffer incoming bytes. This does not perform or compress well + // if the caller of Writer.Write writes many small slices. This + // behavior is therefore deprecated, but still supported for backwards + // compatibility with code that doesn't explicitly Flush or Close. + return w.write(p) + } + + // The remainder of this method is based on bufio.Writer.Write from the + // standard library. + + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.Flush() + } + nRet += n + p = p[n:] + } + if w.err != nil { + return nRet, w.err + } + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if w.err != nil { + return 0, w.err + } + for len(p) > 0 { + obufStart := len(magicChunk) + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + copy(w.obuf, magicChunk) + obufStart = 0 + } + + var uncompressed []byte + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] + } else { + uncompressed, p = p, nil + } + checksum := crc(uncompressed) + + // Compress the buffer, discarding the result if the improvement + // isn't at least 12.5%. + compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) + chunkType := uint8(chunkTypeCompressedData) + chunkLen := 4 + len(compressed) + obufEnd := obufHeaderLen + len(compressed) + if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { + chunkType = chunkTypeUncompressedData + chunkLen = 4 + len(uncompressed) + obufEnd = obufHeaderLen + } + + // Fill in the per-chunk header that comes before the body. + w.obuf[len(magicChunk)+0] = chunkType + w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) + w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) + w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) + w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) + w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) + w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) + w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) + + if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { + w.err = err + return nRet, err + } + if chunkType == chunkTypeUncompressedData { + if _, err := w.w.Write(uncompressed); err != nil { + w.err = err + return nRet, err + } + } + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +func (w *Writer) Flush() error { + if w.err != nil { + return w.err + } + if len(w.ibuf) == 0 { + return nil + } + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err +} + +// Close calls Flush and then closes the Writer. +func (w *Writer) Close() error { + w.Flush() + ret := w.err + if w.err == nil { + w.err = errClosed + } + return ret +} diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_amd64.go new file mode 100644 index 00000000000..150d91bc8be --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_amd64.go @@ -0,0 +1,29 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +package snappy + +// emitLiteral has the same semantics as in encode_other.go. +// +//go:noescape +func emitLiteral(dst, lit []byte) int + +// emitCopy has the same semantics as in encode_other.go. +// +//go:noescape +func emitCopy(dst []byte, offset, length int) int + +// extendMatch has the same semantics as in encode_other.go. +// +//go:noescape +func extendMatch(src []byte, i, j int) int + +// encodeBlock has the same semantics as in encode_other.go. +// +//go:noescape +func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s new file mode 100644 index 00000000000..adfd979fe27 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_amd64.s @@ -0,0 +1,730 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a +// Go toolchain regression. See https://github.com/golang/go/issues/15426 and +// https://github.com/golang/snappy/issues/29 +// +// As a workaround, the package was built with a known good assembler, and +// those instructions were disassembled by "objdump -d" to yield the +// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 +// style comments, in AT&T asm syntax. Note that rsp here is a physical +// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). +// The instructions were then encoded as "BYTE $0x.." sequences, which assemble +// fine on Go 1.6. + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - AX len(lit) +// - BX n +// - DX return value +// - DI &dst[i] +// - R10 &lit[0] +// +// The 24 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $24-56 + MOVQ dst_base+0(FP), DI + MOVQ lit_base+24(FP), R10 + MOVQ lit_len+32(FP), AX + MOVQ AX, DX + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT oneByte + CMPL BX, $256 + JLT twoBytes + +threeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + ADDQ $3, DX + JMP memmove + +twoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + ADDQ $2, DX + JMP memmove + +oneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + ADDQ $1, DX + +memmove: + MOVQ DX, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - AX length +// - SI &dst[0] +// - DI &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVQ dst_base+0(FP), DI + MOVQ DI, SI + MOVQ offset+24(FP), R11 + MOVQ length+32(FP), AX + +loop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP loop0 + +step1: + // if length > 64 { etc } + CMPL AX, $64 + JLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMPL AX, $12 + JGE step3 + CMPL R11, $2048 + JGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - DX &src[0] +// - SI &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), R14 + MOVQ i+24(FP), R15 + MOVQ j+32(FP), SI + ADDQ DX, R14 + ADDQ DX, R15 + ADDQ DX, SI + MOVQ R14, R13 + SUBQ $8, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA cmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE bsf + ADDQ $8, R15 + ADDQ $8, SI + JMP cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE extendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE extendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - AX . . +// - BX . . +// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). +// - DX 64 &src[0], tableSize +// - SI 72 &src[s] +// - DI 80 &dst[d] +// - R9 88 sLimit +// - R10 . &src[nextEmit] +// - R11 96 prevHash, currHash, nextHash, offset +// - R12 104 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 112 candidate +// +// The second column (56, 64, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. +TEXT ·encodeBlock(SB), 0, $32888-56 + MOVQ dst_base+0(FP), DI + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVQ $24, CX + MOVQ $256, DX + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + CMPQ DX, $16384 + JGE varTable + CMPQ DX, R14 + JGE varTable + SUBQ $1, CX + SHLQ $1, DX + JMP calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU + // writes 16 bytes, so we can do only tableSize/8 writes instead of the + // 2048 writes that would zero-initialize all of table's 32768 bytes. + SHRQ $3, DX + LEAQ table-32768(SP), BX + PXOR X0, X0 + +memclr: + MOVOU X0, 0(BX) + ADDQ $16, BX + SUBQ $1, DX + JNZ memclr + + // !!! DX = &src[0] + MOVQ SI, DX + + // sLimit := len(src) - inputMargin + MOVQ R14, R9 + SUBQ $15, R9 + + // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't + // change for the rest of the function. + MOVQ CX, 56(SP) + MOVQ DX, 64(SP) + MOVQ R9, 88(SP) + + // nextEmit := 0 + MOVQ DX, R10 + + // s := 1 + ADDQ $1, SI + + // nextHash := hash(load32(src, s), shift) + MOVL 0(SI), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + +outer: + // for { etc } + + // skip := 32 + MOVQ $32, R12 + + // nextS := s + MOVQ SI, R13 + + // candidate := 0 + MOVQ $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVQ R13, SI + + // bytesBetweenHashLookups := skip >> 5 + MOVQ R12, R14 + SHRQ $5, R14 + + // nextS = s + bytesBetweenHashLookups + ADDQ R14, R13 + + // skip += bytesBetweenHashLookups + ADDQ R14, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVQ R13, AX + SUBQ DX, AX + CMPQ AX, R9 + JA emitRemainder + + // candidate = int(table[nextHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[nextHash] = uint16(s) + MOVQ SI, AX + SUBQ DX, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // nextHash = hash(load32(src, nextS), shift) + MOVL 0(R13), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVL 0(SI), AX + MOVL (DX)(R15*1), BX + CMPL AX, BX + JNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVQ SI, AX + SUBQ R10, AX + CMPQ AX, $16 + JLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT inlineEmitLiteralOneByte + CMPL BX, $256 + JLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVQ SI, 72(SP) + MOVQ DI, 80(SP) + MOVQ R15, 112(SP) + CALL runtime·memmove(SB) + MOVQ 56(SP), CX + MOVQ 64(SP), DX + MOVQ 72(SP), SI + MOVQ 80(SP), DI + MOVQ 88(SP), R9 + MOVQ 112(SP), R15 + JMP inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB AX, BX + SUBB $1, BX + SHLB $2, BX + MOVB BX, (DI) + ADDQ $1, DI + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(R10), X0 + MOVOU X0, 0(DI) + ADDQ AX, DI + +inner1: + // for { etc } + + // base := s + MOVQ SI, R12 + + // !!! offset := base - candidate + MOVQ R12, R11 + SUBQ R15, R11 + SUBQ DX, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVQ src_len+32(FP), R14 + ADDQ DX, R14 + + // !!! R13 = &src[len(src) - 8] + MOVQ R14, R13 + SUBQ $8, R13 + + // !!! R15 = &src[candidate + 4] + ADDQ $4, R15 + ADDQ DX, R15 + + // !!! s += 4 + ADDQ $4, SI + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA inlineExtendMatchCmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE inlineExtendMatchBSF + ADDQ $8, R15 + ADDQ $8, SI + JMP inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + JMP inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE inlineExtendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE inlineExtendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVQ SI, AX + SUBQ R12, AX + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + CMPL AX, $64 + JLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + CMPL AX, $12 + JGE inlineEmitCopyStep3 + CMPL R11, $2048 + JGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + JMP inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVQ SI, R10 + + // if s >= sLimit { goto emitRemainder } + MOVQ SI, AX + SUBQ DX, AX + CMPQ AX, R9 + JAE emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVQ -1(SI), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // table[prevHash] = uint16(s-1) + MOVQ SI, AX + SUBQ DX, AX + SUBQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // currHash := hash(uint32(x>>8), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // candidate = int(table[currHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[currHash] = uint16(s) + ADDQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVL (DX)(R15*1), BX + CMPL R14, BX + JEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // s++ + ADDQ $1, SI + + // break out of the inner1 for loop, i.e. continue the outer loop. + JMP outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVQ src_len+32(FP), AX + ADDQ DX, AX + CMPQ R10, AX + JEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVQ DI, 0(SP) + MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ R10, 24(SP) + SUBQ R10, AX + MOVQ AX, 32(SP) + MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVQ DI, 80(SP) + CALL ·emitLiteral(SB) + MOVQ 80(SP), DI + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADDQ 48(SP), DI + +encodeBlockEnd: + MOVQ dst_base+0(FP), AX + SUBQ AX, DI + MOVQ DI, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go new file mode 100644 index 00000000000..dbcae905e6e --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -0,0 +1,238 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine !gc noasm + +package snappy + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= len(lit) && len(lit) <= 65536 +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + default: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + } + return i + copy(dst[i:], lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= 65535 +// 4 <= length && length <= 65535 +func emitCopy(dst []byte, offset, length int) int { + i := 0 + // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The + // threshold for this loop is a little higher (at 68 = 64 + 4), and the + // length emitted down below is is a little lower (at 60 = 64 - 4), because + // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed + // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as + // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as + // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a + // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an + // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. + for length >= 68 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[i+0] = 63<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 64 + } + if length > 64 { + // Emit a length 60 copy, encoded as 3 bytes. + dst[i+0] = 59<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 60 + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[i+0] = uint8(length-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + return i + 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + return i + 2 +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} + +func hash(u, shift uint32) uint32 { + return (u * 0x1e35a7bd) >> shift +} + +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxBlockSize and maxBlockSize == 65536. + const ( + maxTableSize = 1 << 14 + // tableMask is redundant, but helps the compiler eliminate bounds + // checks. + tableMask = maxTableSize - 1 + ) + shift := uint32(32 - 8) + for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + shift-- + } + // In Go, all array elements are zero-initialized, so there is no advantage + // to a smaller tableSize per se. However, it matches the C++ algorithm, + // and in the asm versions of this code, we can get away with zeroing only + // the first tableSize elements. + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s), shift) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS), shift) + if load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { + } + + d += emitCopy(dst[d:], base-candidate, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x>>0), shift) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x>>8), shift) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x>>16), shift) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/golang/snappy/go.mod b/vendor/github.com/golang/snappy/go.mod new file mode 100644 index 00000000000..f6406bb2c76 --- /dev/null +++ b/vendor/github.com/golang/snappy/go.mod @@ -0,0 +1 @@ +module github.com/golang/snappy diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go new file mode 100644 index 00000000000..ece692ea461 --- /dev/null +++ b/vendor/github.com/golang/snappy/snappy.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the Snappy compression format. It aims for very +// high speeds and reasonable compression. +// +// There are actually two Snappy formats: block and stream. They are related, +// but different: trying to decompress block-compressed data as a Snappy stream +// will fail, and vice versa. The block format is the Decode and Encode +// functions and the stream format is the Reader and Writer types. +// +// The block format, the more common case, is used when the complete size (the +// number of bytes) of the original data is known upfront, at the time +// compression starts. The stream format, also known as the framing format, is +// for when that isn't always true. +// +// The canonical, C++ implementation is at https://github.com/google/snappy and +// it only implements the block format. +package snappy // import "github.com/golang/snappy" + +import ( + "hash/crc32" +) + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer issued by most + encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in + [1, 65). The length is 1 + m. The offset is the little-endian unsigned + integer denoted by the next 4 bytes. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) + +const ( + checksumSize = 4 + chunkHeaderSize = 4 + magicChunk = "\xff\x06\x00\x00" + magicBody + magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 + + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 + + obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func crc(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/linkedin/goavro/.gitignore b/vendor/github.com/linkedin/goavro/.gitignore new file mode 100644 index 00000000000..9ed3b07cefe --- /dev/null +++ b/vendor/github.com/linkedin/goavro/.gitignore @@ -0,0 +1 @@ +*.test diff --git a/vendor/github.com/linkedin/goavro/AUTHORS b/vendor/github.com/linkedin/goavro/AUTHORS new file mode 100644 index 00000000000..3889c343d08 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/AUTHORS @@ -0,0 +1,38 @@ +Goavro was originally created during the Fall of 2014 at LinkedIn, +Corp., in New York City, New York, USA. + +The following persons, listed in alphabetical order, have participated +with goavro development by contributing code and test cases. + + Alan Gardner + Billy Hand + Christian Blades + Corey Scott + Darshan Shaligram + Dylan Wen + Enrico Candino + Fellyn Silliman + James Crasta + Jeff Haynie + Joe Roth + Karrick S. McDermott + Kasey Klipsch + Michael Johnson + Murray Resinski + Nicolas Kaiser + Sebastien Launay + Thomas Desrosiers + kklipsch + seborama + +A big thank you to these persons who provided testing and amazing +feedback to goavro during its initial implementation: + + Dennis Ordanov + Thomas Desrosiers + +Also a big thank you is extended to our supervisors who supported our +efforts to bring goavro to the open source community: + + Greg Leffler + Nick Berry diff --git a/vendor/github.com/linkedin/goavro/Gopkg.lock b/vendor/github.com/linkedin/goavro/Gopkg.lock new file mode 100644 index 00000000000..5eab0574c47 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/Gopkg.lock @@ -0,0 +1,21 @@ +# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. + + +[[projects]] + branch = "master" + name = "github.com/golang/snappy" + packages = ["."] + revision = "553a641470496b2327abcac10b36396bd98e45c9" + +[[projects]] + name = "gopkg.in/linkedin/goavro.v1" + packages = ["."] + revision = "45b9a0062a837ab3214741a91bff89e2a2e4fae6" + version = "v1.0.5" + +[solve-meta] + analyzer-name = "dep" + analyzer-version = 1 + inputs-digest = "3badedae1f576bec3315e8c72e8e390df4ad16cdc4a5750c6468076ff5fba6c3" + solver-name = "gps-cdcl" + solver-version = 1 diff --git a/vendor/github.com/linkedin/goavro/Gopkg.toml b/vendor/github.com/linkedin/goavro/Gopkg.toml new file mode 100644 index 00000000000..08a65614215 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/Gopkg.toml @@ -0,0 +1,30 @@ + +# Gopkg.toml example +# +# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md +# for detailed Gopkg.toml documentation. +# +# required = ["github.com/user/thing/cmd/thing"] +# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] +# +# [[constraint]] +# name = "github.com/user/project" +# version = "1.0.0" +# +# [[constraint]] +# name = "github.com/user/project2" +# branch = "dev" +# source = "github.com/myfork/project2" +# +# [[override]] +# name = "github.com/x/y" +# version = "2.4.0" + + +[[constraint]] + branch = "master" + name = "github.com/golang/snappy" + +[[constraint]] + name = "gopkg.in/linkedin/goavro.v1" + version = "1.0.5" diff --git a/vendor/github.com/linkedin/goavro/LICENSE b/vendor/github.com/linkedin/goavro/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/linkedin/goavro/README.md b/vendor/github.com/linkedin/goavro/README.md new file mode 100644 index 00000000000..b54429fee34 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/README.md @@ -0,0 +1,424 @@ +# goavro + +Goavro is a library that encodes and decodes Avro data. + +## Description + +* Encodes to and decodes from both binary and textual JSON Avro data. +* `Codec` is stateless and is safe to use by multiple goroutines. + +With the exception of features not yet supported, goavro attempts to be +fully compliant with the most recent version of the +[Avro specification](http://avro.apache.org/docs/1.8.2/spec.html). + +## NOTICE + +This goavro library has been rewritten to correct a large number of +shortcomings: + +* https://github.com/linkedin/goavro/issues/8 +* https://github.com/linkedin/goavro/issues/36 +* https://github.com/linkedin/goavro/issues/45 +* https://github.com/linkedin/goavro/issues/55 +* https://github.com/linkedin/goavro/issues/71 +* https://github.com/linkedin/goavro/issues/72 +* https://github.com/linkedin/goavro/issues/81 + +As a consequence of the rewrite, the API has been significantly +simplified, taking into account suggestions from users received during +the past few years since its original release. + +The original version of this library is still available, however the +v1 branch does not support all the same features, has a number of +outstanding bugs, and performs significantly slower than the v2 +branch. Users are highly encouraged to update their software to use +the v2 branch, but until they do, they can continue to use the v1 +branch by modifying import statements: + +```Go +import goavro "gopkg.in/linkedin/goavro.v1" +``` + +### Justification for API Change + +It was a very difficult decision to break the API when creating the +new version, but in the end the benefits outweighed the consequences: + +1. Allowed proper handling of Avro namespaces. +1. Eliminated largest gripe of users: getting data into and out of + records. +1. Provided significant, 3x--4x speed improvement for all tasks. +1. Allowed textual encoding to and decoding from Avro JSON. +1. Better handling of record field default values. + +#### Avro namespaces + +The original version of this library was written prior to my really +understanding how Avro namespaces ought to work. After using Avro for +a long time now, and after a lot of research, I think I grok Avro +namespaces properly, and the library now correctly handles every test +case the Apache Avro distribution has for namespaces, including being +able to refer to a previously defined data type later on in the same +schema. + +#### Getting Data into and out of Records + +The original version of this library required creating `goavro.Record` +instances, and use of getters and setters to access a record's +fields. When schemas were complex, this required a lot of work to +debug and get right. The original version also required users to break +schemas in chunks, and have a different schema for each record +type. This was cumbersome, annoying, and error prone. + +The new version of this library eliminates the `goavro.Record` type, +and accepts a native Go map for all records to be encoded. Keys are +the field names, and values are the field values. Nothing could be +more easy. Conversely, decoding Avro data yields a native Go map for +the upstream client to pull data back out out. + +Furthermore, there is never a reason to ever have to break your schema +down into record schemas. Merely feed the entire schema into the +`NewCodec` function once when you create the `Codec`, then use +it. This library knows how to parse the data provided to it and ensure +data values for records and their fields are properly encoded and +decoded. + +#### 3x--4x Performance Improvement + +The original version of this library was truly written with Go's idea +of `io.Reader` and `io.Writer` composition in mind. Although +composition is a powerful tool, the original library had to pull bytes +off the `io.Reader`--often one byte at a time--check for read errors, +decode the bytes, and repeat. This version, by using a native Go byte +slice, both decoding and encoding complex Avro data here at LinkedIn +is between three and four times faster than before. + +#### Avro JSON Support + +The original version of this library did not support JSON encoding or +decoding, because it wasn't deemed useful for our internal use at the +time. When writing the new version of the library I decided to tackle +this issue once and for all, because so many engineers needed this +functionality for their work. + +#### Better Handling of Record Field Default Values + +The original version of this library did not well handle default +values for record fields. This version of the library uses a default +value of a record field when encoding from native Go data to Avro data +and the record field is not specified. Additionally, when decoding +from Avro JSON data to native Go data, and a field is not specified, +the default value will be used to populate the field. + +## Contrast With Code Generation Tools + +If you have the ability to rebuild and redeploy your software whenever +data schemas change, code generation tools might be the best solution +for your application. + +There are numerous excellent tools for generating source code to +translate data between native and Avro binary or textual data. One +such tool is linked below. If a particular application is designed to +work with a rarely changing schema, programs that use code generated +functions can potentially be more performant than a program that uses +goavro to create a `Codec` dynamically at run time. + +* [gogen-avro](https://github.com/alanctgardner/gogen-avro) + +I recommend benchmarking the resultant programs using typical data +using both the code generated functions and using goavro to see which +performs better. Not all code generated functions will out perform +goavro for all data corpuses. + +If you don't have the ability to rebuild and redeploy software updates +whenever a data schema change occurs, goavro could be a great fit for +your needs. With goavro, your program can be given a new schema while +running, compile it into a `Codec` on the fly, and immediately start +encoding or decoding data using that `Codec`. Because Avro encoding +specifies that encoded data always be accompanied by a schema this is +not usually a problem. If the schema change is backwards compatible, +and the portion of your program that handles the decoded data is still +able to reference the decoded fields, there is nothing that needs to +be done when the schema change is detected by your program when using +goavro `Codec` instances to encode or decode data. + +## Resources + +* [Avro CLI Examples](https://github.com/miguno/avro-cli-examples) +* [Avro](https://avro.apache.org/) +* [Google Snappy](https://google.github.io/snappy/) +* [JavaScript Object Notation, JSON](https://www.json.org/) +* [Kafka](https://kafka.apache.org) + +## Usage + +Documentation is available via +[![GoDoc](https://godoc.org/github.com/linkedin/goavro?status.svg)](https://godoc.org/github.com/linkedin/goavro). + +```Go +package main + +import ( + "fmt" + + "github.com/linkedin/goavro" +) + +func main() { + codec, err := goavro.NewCodec(` + { + "type": "record", + "name": "LongList", + "fields" : [ + {"name": "next", "type": ["null", "LongList"], "default": null} + ] + }`) + if err != nil { + fmt.Println(err) + } + + // NOTE: May omit fields when using default value + textual := []byte(`{"next":{"LongList":{}}}`) + + // Convert textual Avro data (in Avro JSON format) to native Go form + native, _, err := codec.NativeFromTextual(textual) + if err != nil { + fmt.Println(err) + } + + // Convert native Go form to binary Avro data + binary, err := codec.BinaryFromNative(nil, native) + if err != nil { + fmt.Println(err) + } + + // Convert binary Avro data back to native Go form + native, _, err = codec.NativeFromBinary(binary) + if err != nil { + fmt.Println(err) + } + + // Convert native Go form to textual Avro data + textual, err = codec.TextualFromNative(nil, native) + if err != nil { + fmt.Println(err) + } + + // NOTE: Textual encoding will show all fields, even those with values that + // match their default values + fmt.Println(string(textual)) + // Output: {"next":{"LongList":{"next":null}}} +} +``` + +Also please see the example programs in the `examples` directory for +reference. + +### ab2t + +The `ab2t` program is similar to the reference standard +`avrocat` program and converts Avro OCF files to Avro JSON +encoding. + +### arw + +The Avro-ReWrite program, `arw`, can be used to rewrite an +Avro OCF file while optionally changing the block counts, the +compression algorithm. `arw` can also upgrade the schema provided the +existing datum values can be encoded with the newly provided schema. + +### avroheader + +The Avro Header program, `avroheader`, can be used to print various +header information from an OCF file. + +### splice + +The `splice` program can be used to splice together an OCF file from +an Avro schema file and a raw Avro binary data file. + +### Translating Data + +A `Codec` provides four methods for translating between a byte slice +of either binary or textual Avro data and native Go data. + +The following methods convert data between native Go data and byte +slices of the binary Avro representation: + + BinaryFromNative + NativeFromBinary + +The following methods convert data between native Go data and byte +slices of the textual Avro representation: + + NativeFromTextual + TextualFromNative + +Each `Codec` also exposes the `Schema` method to return a simplified +version of the JSON schema string used to create the `Codec`. + +#### Translating From Avro to Go Data + +Goavro does not use Go's structure tags to translate data between +native Go types and Avro encoded data. + +When translating from either binary or textual Avro to native Go data, +goavro returns primitive Go data values for corresponding Avro data +values. The table below shows how goavro translates Avro types to Go +types. + +| Avro | Go     | +| ------------------ | ------------------------ | +| `null` | `nil` | +| `boolean` | `bool` | +| `bytes` | `[]byte` | +| `float` | `float32` | +| `double` | `float64` | +| `long` | `int64` | +| `int` | `int32`   | +| `string` | `string` | +| `array` | `[]interface{}` | +| `enum` | `string` | +| `fixed` | `[]byte`       | +| `map` and `record` | `map[string]interface{}` | +| `union` | *see below*    | + +Because of encoding rules for Avro unions, when an union's value is +`null`, a simple Go `nil` is returned. However when an union's value +is non-`nil`, a Go `map[string]interface{}` with a single key is +returned for the union. The map's single key is the Avro type name and +its value is the datum's value. + +#### Translating From Go to Avro Data + +Goavro does not use Go's structure tags to translate data between +native Go types and Avro encoded data. + +When translating from native Go to either binary or textual Avro data, +goavro generally requires the same native Go data types as the decoder +would provide, with some exceptions for programmer convenience. Goavro +will accept any numerical data type provided there is no precision +lost when encoding the value. For instance, providing `float64(3.0)` +to an encoder expecting an Avro `int` would succeed, while sending +`float64(3.5)` to the same encoder would return an error. + +When providing a slice of items for an encoder, the encoder will +accept either `[]interface{}`, or any slice of the required type. For +instance, when the Avro schema specifies: +`{"type":"array","items":"string"}`, the encoder will accept either +`[]interface{}`, or `[]string`. If given `[]int`, the encoder will +return an error when it attempts to encode the first non-string array +value using the string encoder. + +When providing a value for an Avro union, the encoder will accept +`nil` for a `null` value. If the value is non-`nil`, it must be a +`map[string]interface{}` with a single key-value pair, where the key +is the Avro type name and the value is the datum's value. As a +convenience, the `Union` function wraps any datum value in a map as +specified above. + +```Go +func ExampleUnion() { + codec, err := goavro.NewCodec(`["null","string","int"]`) + if err != nil { + fmt.Println(err) + } + buf, err := codec.TextFromNative(nil, goavro.Union("string", "some string")) + if err != nil { + fmt.Println(err) + } + fmt.Println(string(buf)) + // Output: {"string":"some string"} +} +``` + +## Limitations + +Goavro is a fully featured encoder and decoder of binary and textual +JSON Avro data. It fully supports recursive data structures, unions, +and namespacing. It does have a few limitations that have yet to be +implemented. + +### Aliases + +The Avro specification allows an implementation to optionally map a +writer's schema to a reader's schema using aliases. Although goavro +can compile schemas with aliases, it does not yet implement this +feature. + +### Kafka Streams + +[Kafka](http://kafka.apache.org) is the reason goavro was +written. Similar to Avro Object Container Files being a layer of +abstraction above Avro Data Serialization format, Kafka's use of Avro +is a layer of abstraction that also sits above Avro Data Serialization +format, but has its own schema. Like Avro Object Container Files, this +has been implemented but removed until the API can be improved. + +### Default Maximum Block Counts, and Block Sizes + +When decoding arrays, maps, and OCF files, the Avro specification +states that the binary includes block counts and block sizes that +specify how many items are in the next block, and how many bytes are +in the next block. To prevent possible denial-of-service attacks on +clients that use this library caused by attempting to decode +maliciously crafted data, decoded block counts and sizes are compared +against public library variables MaxBlockCount and MaxBlockSize. When +the decoded values exceed these values, the decoder returns an error. + +Because not every upstream client is the same, we've chosen some sane +defaults for these values, but left them as mutable variables, so that +clients are able to override if deemed necessary for their +purposes. Their initial default values are (`math.MaxInt32` or +~2.2GB). + +## License + +### Goavro license + +Copyright 2017 LinkedIn Corp. Licensed under the Apache License, +Version 2.0 (the "License"); you may not use this file except in +compliance with the License. You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. + +### Google Snappy license + +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +## Third Party Dependencies + +### Google Snappy + +Goavro links with [Google Snappy](http://google.github.io/snappy/) +to provide Snappy compression and decompression support. diff --git a/vendor/github.com/linkedin/goavro/array.go b/vendor/github.com/linkedin/goavro/array.go new file mode 100644 index 00000000000..b1ef69eadee --- /dev/null +++ b/vendor/github.com/linkedin/goavro/array.go @@ -0,0 +1,226 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" + "io" + "math" + "reflect" +) + +func makeArrayCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { + // array type must have items + itemSchema, ok := schemaMap["items"] + if !ok { + return nil, fmt.Errorf("Array ought to have items key") + } + itemCodec, err := buildCodec(st, enclosingNamespace, itemSchema) + if err != nil { + return nil, fmt.Errorf("Array items ought to be valid Avro type: %s", err) + } + + return &Codec{ + typeName: &name{"array", nullNamespace}, + nativeFromBinary: func(buf []byte) (interface{}, []byte, error) { + var value interface{} + var err error + + // block count and block size + if value, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary array block count: %s", err) + } + blockCount := value.(int64) + if blockCount < 0 { + // NOTE: A negative block count implies there is a long encoded + // block size following the negative block count. We have no use + // for the block size in this decoder, so we read and discard + // the value. + if blockCount == math.MinInt64 { + // The minimum number for any signed numerical type can never be made positive + return nil, nil, fmt.Errorf("cannot decode binary array with block count: %d", math.MinInt64) + } + blockCount = -blockCount // convert to its positive equivalent + if _, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary array block size: %s", err) + } + } + // Ensure block count does not exceed some sane value. + if blockCount > MaxBlockCount { + return nil, nil, fmt.Errorf("cannot decode binary array when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + // NOTE: While the attempt of a RAM optimization shown below is not + // necessary, many encoders will encode all items in a single block. + // We can optimize amount of RAM allocated by runtime for the array + // by initializing the array for that number of items. + arrayValues := make([]interface{}, 0, blockCount) + + for blockCount != 0 { + // Decode `blockCount` datum values from buffer + for i := int64(0); i < blockCount; i++ { + if value, buf, err = itemCodec.nativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary array item %d: %s", i+1, err) + } + arrayValues = append(arrayValues, value) + } + // Decode next blockCount from buffer, because there may be more blocks + if value, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary array block count: %s", err) + } + blockCount = value.(int64) + if blockCount < 0 { + // NOTE: A negative block count implies there is a long + // encoded block size following the negative block count. We + // have no use for the block size in this decoder, so we + // read and discard the value. + if blockCount == math.MinInt64 { + // The minimum number for any signed numerical type can + // never be made positive + return nil, nil, fmt.Errorf("cannot decode binary array with block count: %d", math.MinInt64) + } + blockCount = -blockCount // convert to its positive equivalent + if _, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary array block size: %s", err) + } + } + // Ensure block count does not exceed some sane value. + if blockCount > MaxBlockCount { + return nil, nil, fmt.Errorf("cannot decode binary array when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + } + return arrayValues, buf, nil + }, + binaryFromNative: func(buf []byte, datum interface{}) ([]byte, error) { + arrayValues, err := convertArray(datum) + if err != nil { + return nil, fmt.Errorf("cannot encode binary array: %s", err) + } + + arrayLength := int64(len(arrayValues)) + var alreadyEncoded, remainingInBlock int64 + + for i, item := range arrayValues { + if remainingInBlock == 0 { // start a new block + remainingInBlock = arrayLength - alreadyEncoded + if remainingInBlock > MaxBlockCount { + // limit block count to MacBlockCount + remainingInBlock = MaxBlockCount + } + buf, _ = longBinaryFromNative(buf, remainingInBlock) + } + + if buf, err = itemCodec.binaryFromNative(buf, item); err != nil { + return nil, fmt.Errorf("cannot encode binary array item %d: %v: %s", i+1, item, err) + } + + remainingInBlock-- + alreadyEncoded++ + } + + return longBinaryFromNative(buf, 0) // append trailing 0 block count to signal end of Array + }, + nativeFromTextual: func(buf []byte) (interface{}, []byte, error) { + var arrayValues []interface{} + var value interface{} + var err error + var b byte + + if buf, err = advanceAndConsume(buf, '['); err != nil { + return nil, nil, fmt.Errorf("cannot decode textual array: %s", err) + } + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, fmt.Errorf("cannot decode textual array: %s", io.ErrShortBuffer) + } + // NOTE: Special case for empty array + if buf[0] == ']' { + return arrayValues, buf[1:], nil + } + + // NOTE: Also terminates when read ']' byte. + for len(buf) > 0 { + // decode value + value, buf, err = itemCodec.nativeFromTextual(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual array: %s", err) + } + arrayValues = append(arrayValues, value) + // either comma or closing curly brace + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, fmt.Errorf("cannot decode textual array: %s", io.ErrShortBuffer) + } + switch b = buf[0]; b { + case ']': + return arrayValues, buf[1:], nil + case ',': + // no-op + default: + return nil, nil, fmt.Errorf("cannot decode textual array: expected ',' or ']'; received: %q", b) + } + // NOTE: consume comma from above + if buf, _ = advanceToNonWhitespace(buf[1:]); len(buf) == 0 { + return nil, nil, fmt.Errorf("cannot decode textual array: %s", io.ErrShortBuffer) + } + } + return nil, buf, io.ErrShortBuffer + }, + textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) { + arrayValues, err := convertArray(datum) + if err != nil { + return nil, fmt.Errorf("cannot encode textual array: %s", err) + } + + var atLeastOne bool + + buf = append(buf, '[') + + for i, item := range arrayValues { + atLeastOne = true + + // Encode value + buf, err = itemCodec.textualFromNative(buf, item) + if err != nil { + // field was specified in datum; therefore its value was invalid + return nil, fmt.Errorf("cannot encode textual array item %d; %v: %s", i+1, item, err) + } + buf = append(buf, ',') + } + + if atLeastOne { + return append(buf[:len(buf)-1], ']'), nil + } + return append(buf, ']'), nil + }, + }, nil +} + +// convertArray converts interface{} to []interface{} if possible. +func convertArray(datum interface{}) ([]interface{}, error) { + arrayValues, ok := datum.([]interface{}) + if ok { + return arrayValues, nil + } + // NOTE: When given a slice of any other type, zip values to + // items as a convenience to client. + v := reflect.ValueOf(datum) + if v.Kind() != reflect.Slice { + return nil, fmt.Errorf("cannot create []interface{}: expected slice; received: %T", datum) + } + // NOTE: Two better alternatives to the current algorithm are: + // (1) mutate the reflection tuple underneath to convert the + // []int, for example, to []interface{}, with O(1) complexity + // (2) use copy builtin to zip the data items over with O(n) complexity, + // but more efficient than what's below. + // Suggestions? + arrayValues = make([]interface{}, v.Len()) + for idx := 0; idx < v.Len(); idx++ { + arrayValues[idx] = v.Index(idx).Interface() + } + return arrayValues, nil +} diff --git a/vendor/github.com/linkedin/goavro/binaryReader.go b/vendor/github.com/linkedin/goavro/binaryReader.go new file mode 100644 index 00000000000..f2c302bbfe7 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/binaryReader.go @@ -0,0 +1,160 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" + "io" + "math" +) + +// bytesBinaryReader reads bytes from io.Reader and returns byte slice of +// specified size or the error encountered while trying to read those bytes. +func bytesBinaryReader(ior io.Reader) ([]byte, error) { + size, err := longBinaryReader(ior) + if err != nil { + return nil, fmt.Errorf("cannot read bytes: cannot read size: %s", err) + } + if size < 0 { + return nil, fmt.Errorf("cannot read bytes: size is negative: %d", size) + } + if size > MaxBlockSize { + return nil, fmt.Errorf("cannot read bytes: size exceeds MaxBlockSize: %d > %d", size, MaxBlockSize) + } + buf := make([]byte, size) + _, err = io.ReadAtLeast(ior, buf, int(size)) + if err != nil { + return nil, fmt.Errorf("cannot read bytes: %s", err) + } + return buf, nil +} + +// longBinaryReader reads bytes from io.Reader until has complete long value, or +// read error. +func longBinaryReader(ior io.Reader) (int64, error) { + var value uint64 + var shift uint + var err error + var b byte + + // NOTE: While benchmarks show it's more performant to invoke ReadByte when + // available, testing whether a variable's data type implements a particular + // method is quite slow too. So perform the test once, and branch to the + // appropriate loop based on the results. + + if byteReader, ok := ior.(io.ByteReader); ok { + for { + if b, err = byteReader.ReadByte(); err != nil { + return 0, err // NOTE: must send back unaltered error to detect io.EOF + } + value |= uint64(b&intMask) << shift + if b&intFlag == 0 { + return (int64(value>>1) ^ -int64(value&1)), nil + } + shift += 7 + } + } + + // NOTE: ior does not also implement io.ByteReader, so we must allocate a + // byte slice with a single byte, and read each byte into the slice. + buf := make([]byte, 1) + for { + if _, err = ior.Read(buf); err != nil { + return 0, err // NOTE: must send back unaltered error to detect io.EOF + } + b = buf[0] + value |= uint64(b&intMask) << shift + if b&intFlag == 0 { + return (int64(value>>1) ^ -int64(value&1)), nil + } + shift += 7 + } +} + +// metadataBinaryReader reads bytes from io.Reader until has entire map value, +// or read error. +func metadataBinaryReader(ior io.Reader) (map[string][]byte, error) { + var err error + var value interface{} + + // block count and block size + if value, err = longBinaryReader(ior); err != nil { + return nil, fmt.Errorf("cannot read map block count: %s", err) + } + blockCount := value.(int64) + if blockCount < 0 { + if blockCount == math.MinInt64 { + // The minimum number for any signed numerical type can never be + // made positive + return nil, fmt.Errorf("cannot read map with block count: %d", math.MinInt64) + } + // NOTE: A negative block count implies there is a long encoded block + // size following the negative block count. We have no use for the block + // size in this decoder, so we read and discard the value. + blockCount = -blockCount // convert to its positive equivalent + if _, err = longBinaryReader(ior); err != nil { + return nil, fmt.Errorf("cannot read map block size: %s", err) + } + } + // Ensure block count does not exceed some sane value. + if blockCount > MaxBlockCount { + return nil, fmt.Errorf("cannot read map when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + // NOTE: While the attempt of a RAM optimization shown below is not + // necessary, many encoders will encode all items in a single block. We can + // optimize amount of RAM allocated by runtime for the array by initializing + // the array for that number of items. + mapValues := make(map[string][]byte, blockCount) + + for blockCount != 0 { + // Decode `blockCount` datum values from buffer + for i := int64(0); i < blockCount; i++ { + // first decode the key string + keyBytes, err := bytesBinaryReader(ior) + if err != nil { + return nil, fmt.Errorf("cannot read map key: %s", err) + } + key := string(keyBytes) + if _, ok := mapValues[key]; ok { + return nil, fmt.Errorf("cannot read map: duplicate key: %q", key) + } + // metadata values are always bytes + buf, err := bytesBinaryReader(ior) + if err != nil { + return nil, fmt.Errorf("cannot read map value for key %q: %s", key, err) + } + mapValues[key] = buf + } + // Decode next blockCount from buffer, because there may be more blocks + if value, err = longBinaryReader(ior); err != nil { + return nil, fmt.Errorf("cannot read map block count: %s", err) + } + blockCount = value.(int64) + if blockCount < 0 { + if blockCount == math.MinInt64 { + // The minimum number for any signed numerical type can never be + // made positive + return nil, fmt.Errorf("cannot read map with block count: %d", math.MinInt64) + } + // NOTE: A negative block count implies there is a long encoded + // block size following the negative block count. We have no use for + // the block size in this decoder, so we read and discard the value. + blockCount = -blockCount // convert to its positive equivalent + if _, err = longBinaryReader(ior); err != nil { + return nil, fmt.Errorf("cannot read map block size: %s", err) + } + } + // Ensure block count does not exceed some sane value. + if blockCount > MaxBlockCount { + return nil, fmt.Errorf("cannot read map when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + } + return mapValues, nil +} diff --git a/vendor/github.com/linkedin/goavro/boolean.go b/vendor/github.com/linkedin/goavro/boolean.go new file mode 100644 index 00000000000..2ae3ec3dca9 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/boolean.go @@ -0,0 +1,72 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "errors" + "fmt" + "io" +) + +func booleanNativeFromBinary(buf []byte) (interface{}, []byte, error) { + if len(buf) < 1 { + return nil, nil, io.ErrShortBuffer + } + var b byte + b, buf = buf[0], buf[1:] + switch b { + case byte(0): + return false, buf, nil + case byte(1): + return true, buf, nil + default: + return nil, nil, fmt.Errorf("cannot decode binary boolean: expected: Go byte(0) or byte(1); received: byte(%d)", b) + } +} + +func booleanBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + value, ok := datum.(bool) + if !ok { + return nil, fmt.Errorf("cannot encode binary boolean: expected: Go bool; received: %T", datum) + } + var b byte + if value { + b = 1 + } + return append(buf, b), nil +} + +func booleanNativeFromTextual(buf []byte) (interface{}, []byte, error) { + if len(buf) < 4 { + return nil, nil, fmt.Errorf("cannot decode textual boolean: %s", io.ErrShortBuffer) + } + if bytes.Equal(buf[:4], []byte("true")) { + return true, buf[4:], nil + } + if len(buf) < 5 { + return nil, nil, fmt.Errorf("cannot decode textual boolean: %s", io.ErrShortBuffer) + } + if bytes.Equal(buf[:5], []byte("false")) { + return false, buf[5:], nil + } + return nil, nil, errors.New("expected false or true") +} + +func booleanTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + value, ok := datum.(bool) + if !ok { + return nil, fmt.Errorf("boolean: expected: Go bool; received: %T", datum) + } + if value { + return append(buf, "true"...), nil + } + return append(buf, "false"...), nil +} diff --git a/vendor/github.com/linkedin/goavro/bytes.go b/vendor/github.com/linkedin/goavro/bytes.go new file mode 100644 index 00000000000..baddaae04da --- /dev/null +++ b/vendor/github.com/linkedin/goavro/bytes.go @@ -0,0 +1,369 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "encoding/hex" + "errors" + "fmt" + "io" + "unicode" + "unicode/utf16" + "unicode/utf8" +) + +//////////////////////////////////////// +// Binary Decode +//////////////////////////////////////// + +func bytesNativeFromBinary(buf []byte) (interface{}, []byte, error) { + if len(buf) < 1 { + return nil, nil, fmt.Errorf("cannot decode binary bytes: %s", io.ErrShortBuffer) + } + var decoded interface{} + var err error + if decoded, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary bytes: %s", err) + } + size := decoded.(int64) // always returns int64 + if size < 0 { + return nil, nil, fmt.Errorf("cannot decode binary bytes: negative size: %d", size) + } + if size > int64(len(buf)) { + return nil, nil, fmt.Errorf("cannot decode binary bytes: %s", io.ErrShortBuffer) + } + return buf[:size], buf[size:], nil +} + +func stringNativeFromBinary(buf []byte) (interface{}, []byte, error) { + d, b, err := bytesNativeFromBinary(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode binary string: %s", err) + } + return string(d.([]byte)), b, nil +} + +//////////////////////////////////////// +// Binary Encode +//////////////////////////////////////// + +func bytesBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + var d []byte + switch datum.(type) { + case []byte: + d = datum.([]byte) + case string: + d = []byte(datum.(string)) + default: + return nil, fmt.Errorf("cannot encode binary bytes: expected: []byte; received: %T", datum) + } + buf, _ = longBinaryFromNative(buf, len(d)) // only fails when given non integer + return append(buf, d...), nil // append datum bytes +} + +func stringBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + someBytes, ok := datum.(string) + if !ok { + return nil, fmt.Errorf("cannot encode binary bytes: expected: string; received: %T", datum) + } + buf, _ = longBinaryFromNative(buf, len(someBytes)) // only fails when given non integer + return append(buf, someBytes...), nil // append datum bytes +} + +//////////////////////////////////////// +// Text Decode +//////////////////////////////////////// + +func bytesNativeFromTextual(buf []byte) (interface{}, []byte, error) { + buflen := len(buf) + if buflen < 2 { + return nil, nil, fmt.Errorf("cannot decode textual bytes: %s", io.ErrShortBuffer) + } + if buf[0] != '"' { + return nil, nil, fmt.Errorf("cannot decode textual bytes: expected initial \"; found: %#U", buf[0]) + } + var newBytes []byte + var escaped bool + // Loop through bytes following initial double quote, but note we will + // return immediately when find unescaped double quote. + for i := 1; i < buflen; i++ { + b := buf[i] + if escaped { + escaped = false + if b2, ok := unescapeSpecialJSON(b); ok { + newBytes = append(newBytes, b2) + continue + } + if b == 'u' { + // NOTE: Need at least 4 more bytes to read uint16, but subtract + // 1 because do not want to count the trailing quote and + // subtract another 1 because already consumed u but have yet to + // increment i. + if i > buflen-6 { + return nil, nil, fmt.Errorf("cannot decode textual bytes: %s", io.ErrShortBuffer) + } + // NOTE: Avro bytes represent binary data, and do not + // necessarily represent text. Therefore, Avro bytes are not + // encoded in UTF-16. Each \u is followed by 4 hexadecimal + // digits, the first and second of which must be 0. + v, err := parseUint64FromHexSlice(buf[i+3 : i+5]) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual bytes: %s", err) + } + i += 4 // absorb 4 characters: one 'u' and three of the digits + newBytes = append(newBytes, byte(v)) + continue + } + newBytes = append(newBytes, b) + continue + } + if b == '\\' { + escaped = true + continue + } + if b == '"' { + return newBytes, buf[i+1:], nil + } + newBytes = append(newBytes, b) + } + return nil, nil, fmt.Errorf("cannot decode textual bytes: expected final \"; found: %#U", buf[buflen-1]) +} + +func stringNativeFromTextual(buf []byte) (interface{}, []byte, error) { + buflen := len(buf) + if buflen < 2 { + return nil, nil, fmt.Errorf("cannot decode textual string: %s", io.ErrShortBuffer) + } + if buf[0] != '"' { + return nil, nil, fmt.Errorf("cannot decode textual string: expected initial \"; found: %#U", buf[0]) + } + var newBytes []byte + var escaped bool + // Loop through bytes following initial double quote, but note we will + // return immediately when find unescaped double quote. + for i := 1; i < buflen; i++ { + b := buf[i] + if escaped { + escaped = false + if b2, ok := unescapeSpecialJSON(b); ok { + newBytes = append(newBytes, b2) + continue + } + if b == 'u' { + // NOTE: Need at least 4 more bytes to read uint16, but subtract + // 1 because do not want to count the trailing quote and + // subtract another 1 because already consumed u but have yet to + // increment i. + if i > buflen-6 { + return nil, nil, fmt.Errorf("cannot decode textual string: %s", io.ErrShortBuffer) + } + v, err := parseUint64FromHexSlice(buf[i+1 : i+5]) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual string: %s", err) + } + i += 4 // absorb 4 characters: one 'u' and three of the digits + + nbl := len(newBytes) + newBytes = append(newBytes, []byte{0, 0, 0, 0}...) // grow to make room for UTF-8 encoded rune + + r := rune(v) + if utf16.IsSurrogate(r) { + i++ // absorb final hexadecimal digit from previous value + + // Expect second half of surrogate pair + if i > buflen-6 || buf[i] != '\\' || buf[i+1] != 'u' { + return nil, nil, errors.New("cannot decode textual string: missing second half of surrogate pair") + } + + v, err = parseUint64FromHexSlice(buf[i+2 : i+6]) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual string: %s", err) + } + i += 5 // absorb 5 characters: two for '\u', and 3 of the 4 digits + + // Get code point by combining high and low surrogate bits + r = utf16.DecodeRune(r, rune(v)) + } + + width := utf8.EncodeRune(newBytes[nbl:], r) // append UTF-8 encoded version of code point + newBytes = newBytes[:nbl+width] // trim off excess bytes + continue + } + newBytes = append(newBytes, b) + continue + } + if b == '\\' { + escaped = true + continue + } + if b == '"' { + return string(newBytes), buf[i+1:], nil + } + newBytes = append(newBytes, b) + } + return nil, nil, fmt.Errorf("cannot decode textual string: expected final \"; found: %x", buf[buflen-1]) +} + +func parseUint64FromHexSlice(buf []byte) (uint64, error) { + var value uint64 + for _, b := range buf { + diff := uint64(b - '0') + if diff < 10 { + value = (value << 4) | diff + continue + } + b10 := b + 10 + diff = uint64(b10 - 'A') + if diff < 10 { + return 0, hex.InvalidByteError(b) + } + if diff < 16 { + value = (value << 4) | diff + continue + } + diff = uint64(b10 - 'a') + if diff < 10 { + return 0, hex.InvalidByteError(b) + } + if diff < 16 { + value = (value << 4) | diff + continue + } + return 0, hex.InvalidByteError(b) + } + return value, nil +} + +func unescapeSpecialJSON(b byte) (byte, bool) { + // NOTE: The following 8 special JSON characters must be escaped: + switch b { + case '"', '\\', '/': + return b, true + case 'b': + return '\b', true + case 'f': + return '\f', true + case 'n': + return '\n', true + case 'r': + return '\r', true + case 't': + return '\t', true + } + return b, false +} + +//////////////////////////////////////// +// Text Encode +//////////////////////////////////////// + +func bytesTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + someBytes, ok := datum.([]byte) + if !ok { + return nil, fmt.Errorf("cannot encode textual bytes: expected: []byte; received: %T", datum) + } + buf = append(buf, '"') // prefix buffer with double quote + for _, b := range someBytes { + if escaped, ok := escapeSpecialJSON(b); ok { + buf = append(buf, escaped...) + continue + } + if r := rune(b); r < utf8.RuneSelf && unicode.IsPrint(r) { + buf = append(buf, b) + continue + } + // This Code Point _could_ be encoded as a single byte, however, it's + // above standard ASCII range (b > 127), therefore must encode using its + // four-byte hexadecimal equivalent, which will always start with the + // high byte 00 + buf = appendUnicodeHex(buf, uint16(b)) + } + return append(buf, '"'), nil // postfix buffer with double quote +} + +func stringTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + someString, ok := datum.(string) + if !ok { + return nil, fmt.Errorf("cannot encode textual string: expected: string; received: %T", datum) + } + buf = append(buf, '"') // prefix buffer with double quote + for _, r := range someString { + if escaped, ok := escapeSpecialJSON(byte(r)); ok { + buf = append(buf, escaped...) + continue + } + if r < utf8.RuneSelf && unicode.IsPrint(r) { + buf = append(buf, byte(r)) + continue + } + // NOTE: Attempt to encode code point as UTF-16 surrogate pair + r1, r2 := utf16.EncodeRune(r) + if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar { + // code point does require surrogate pair, and thus two uint16 values + buf = appendUnicodeHex(buf, uint16(r1)) + buf = appendUnicodeHex(buf, uint16(r2)) + continue + } + // Code Point does not require surrogate pair. + buf = appendUnicodeHex(buf, uint16(r)) + } + return append(buf, '"'), nil // postfix buffer with double quote +} + +func appendUnicodeHex(buf []byte, v uint16) []byte { + // Start with '\u' prefix: + buf = append(buf, sliceUnicode...) + // And tack on 4 hexadecimal digits: + buf = append(buf, hexDigits[(v&0xF000)>>12]) + buf = append(buf, hexDigits[(v&0xF00)>>8]) + buf = append(buf, hexDigits[(v&0xF0)>>4]) + buf = append(buf, hexDigits[(v&0xF)]) + return buf +} + +const hexDigits = "0123456789ABCDEF" + +func escapeSpecialJSON(b byte) ([]byte, bool) { + // NOTE: The following 8 special JSON characters must be escaped: + switch b { + case '"': + return sliceQuote, true + case '\\': + return sliceBackslash, true + case '/': + return sliceSlash, true + case '\b': + return sliceBackspace, true + case '\f': + return sliceFormfeed, true + case '\n': + return sliceNewline, true + case '\r': + return sliceCarriageReturn, true + case '\t': + return sliceTab, true + } + return nil, false +} + +// While slices in Go are never constants, we can initialize them once and reuse +// them many times. We define these slices at library load time and reuse them +// when encoding JSON. +var ( + sliceQuote = []byte("\\\"") + sliceBackslash = []byte("\\\\") + sliceSlash = []byte("\\/") + sliceBackspace = []byte("\\b") + sliceFormfeed = []byte("\\f") + sliceNewline = []byte("\\n") + sliceCarriageReturn = []byte("\\r") + sliceTab = []byte("\\t") + sliceUnicode = []byte("\\u") +) diff --git a/vendor/github.com/linkedin/goavro/codec.go b/vendor/github.com/linkedin/goavro/codec.go new file mode 100644 index 00000000000..5a1dff6414a --- /dev/null +++ b/vendor/github.com/linkedin/goavro/codec.go @@ -0,0 +1,442 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "encoding/json" + "fmt" + "math" +) + +var ( + // MaxBlockCount is the maximum number of data items allowed in a single + // block that will be decoded from a binary stream, whether when reading + // blocks to decode an array or a map, or when reading blocks from an OCF + // stream. This check is to ensure decoding binary data will not cause the + // library to over allocate RAM, potentially creating a denial of service on + // the system. + // + // If a particular application needs to decode binary Avro data that + // potentially has more data items in a single block, then this variable may + // be modified at your discretion. + MaxBlockCount = int64(math.MaxInt32) + + // MaxBlockSize is the maximum number of bytes that will be allocated for a + // single block of data items when decoding from a binary stream. This check + // is to ensure decoding binary data will not cause the library to over + // allocate RAM, potentially creating a denial of service on the system. + // + // If a particular application needs to decode binary Avro data that + // potentially has more bytes in a single block, then this variable may be + // modified at your discretion. + MaxBlockSize = int64(math.MaxInt32) +) + +// Codec supports decoding binary and text Avro data to Go native data types, +// and conversely encoding Go native data types to binary or text Avro data. A +// Codec is created as a stateless structure that can be safely used in multiple +// go routines simultaneously. +type Codec struct { + typeName *name + schema string + + nativeFromTextual func([]byte) (interface{}, []byte, error) + binaryFromNative func([]byte, interface{}) ([]byte, error) + nativeFromBinary func([]byte) (interface{}, []byte, error) + textualFromNative func([]byte, interface{}) ([]byte, error) +} + +func newSymbolTable() map[string]*Codec { + return map[string]*Codec{ + "boolean": { + typeName: &name{"boolean", nullNamespace}, + binaryFromNative: booleanBinaryFromNative, + nativeFromBinary: booleanNativeFromBinary, + nativeFromTextual: booleanNativeFromTextual, + textualFromNative: booleanTextualFromNative, + }, + "bytes": { + typeName: &name{"bytes", nullNamespace}, + binaryFromNative: bytesBinaryFromNative, + nativeFromBinary: bytesNativeFromBinary, + nativeFromTextual: bytesNativeFromTextual, + textualFromNative: bytesTextualFromNative, + }, + "double": { + typeName: &name{"double", nullNamespace}, + binaryFromNative: doubleBinaryFromNative, + nativeFromBinary: doubleNativeFromBinary, + nativeFromTextual: doubleNativeFromTextual, + textualFromNative: doubleTextualFromNative, + }, + "float": { + typeName: &name{"float", nullNamespace}, + binaryFromNative: floatBinaryFromNative, + nativeFromBinary: floatNativeFromBinary, + nativeFromTextual: floatNativeFromTextual, + textualFromNative: floatTextualFromNative, + }, + "int": { + + typeName: &name{"int", nullNamespace}, + binaryFromNative: intBinaryFromNative, + nativeFromBinary: intNativeFromBinary, + nativeFromTextual: intNativeFromTextual, + textualFromNative: intTextualFromNative, + }, + "long": { + typeName: &name{"long", nullNamespace}, + binaryFromNative: longBinaryFromNative, + nativeFromBinary: longNativeFromBinary, + nativeFromTextual: longNativeFromTextual, + textualFromNative: longTextualFromNative, + }, + "null": { + typeName: &name{"null", nullNamespace}, + binaryFromNative: nullBinaryFromNative, + nativeFromBinary: nullNativeFromBinary, + nativeFromTextual: nullNativeFromTextual, + textualFromNative: nullTextualFromNative, + }, + "string": { + typeName: &name{"string", nullNamespace}, + binaryFromNative: stringBinaryFromNative, + nativeFromBinary: stringNativeFromBinary, + nativeFromTextual: stringNativeFromTextual, + textualFromNative: stringTextualFromNative, + }, + } +} + +// NewCodec returns a Codec used to translate between a byte slice of either +// binary or textual Avro data and native Go data. +// +// Creating a `Codec` is fast, but ought to be performed exactly once per Avro +// schema to process. Once a `Codec` is created, it may be used multiple times +// to convert data between native form and binary Avro representation, or +// between native form and textual Avro representation. +// +// A particular `Codec` can work with only one Avro schema. However, +// there is no practical limit to how many `Codec`s may be created and +// used in a program. Internally a `Codec` is merely a named tuple of +// four function pointers, and maintains no runtime state that is mutated +// after instantiation. In other words, `Codec`s may be safely used by +// many go routines simultaneously, as your program requires. +// +// codec, err := goavro.NewCodec(` +// { +// "type": "record", +// "name": "LongList", +// "fields" : [ +// {"name": "next", "type": ["null", "LongList"], "default": null} +// ] +// }`) +// if err != nil { +// fmt.Println(err) +// } +func NewCodec(schemaSpecification string) (*Codec, error) { + // bootstrap a symbol table with primitive type codecs for the new codec + st := newSymbolTable() + + // NOTE: Some clients might give us unadorned primitive type name for the + // schema, e.g., "long". While it is not valid JSON, it is a valid schema. + // Provide special handling for primitive type names. + if c, ok := st[schemaSpecification]; ok { + c.schema = schemaSpecification + return c, nil + } + + // NOTE: At this point, schema should be valid JSON, otherwise it's an error + // condition. + var schema interface{} + if err := json.Unmarshal([]byte(schemaSpecification), &schema); err != nil { + return nil, fmt.Errorf("cannot unmarshal schema JSON: %s", err) + } + + c, err := buildCodec(st, nullNamespace, schema) + if err == nil { + // compact schema and save it + compact, err := json.Marshal(schema) + if err != nil { + return nil, fmt.Errorf("cannot remarshal schema: %s", err) + } + c.schema = string(compact) + } + return c, err +} + +// BinaryFromNative appends the binary encoded byte slice representation of the +// provided native datum value to the provided byte slice +// in accordance with the Avro schema supplied when +// creating the Codec. It is supplied a byte slice to which to append the binary +// encoded data along with the actual data to encode. On success, it returns a +// new byte slice with the encoded bytes appended, and a nil error value. On +// error, it returns the original byte slice, and the error message. +// +// func ExampleBinaryFromNative() { +// codec, err := goavro.NewCodec(` +// { +// "type": "record", +// "name": "LongList", +// "fields" : [ +// {"name": "next", "type": ["null", "LongList"], "default": null} +// ] +// }`) +// if err != nil { +// fmt.Println(err) +// } +// +// // Convert native Go form to binary Avro data +// binary, err := codec.BinaryFromNative(nil, map[string]interface{}{ +// "next": map[string]interface{}{ +// "LongList": map[string]interface{}{ +// "next": map[string]interface{}{ +// "LongList": map[string]interface{}{ +// // NOTE: May omit fields when using default value +// }, +// }, +// }, +// }, +// }) +// if err != nil { +// fmt.Println(err) +// } +// +// fmt.Printf("%#v", binary) +// // Output: []byte{0x2, 0x2, 0x0} +// } +func (c *Codec) BinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + newBuf, err := c.binaryFromNative(buf, datum) + if err != nil { + return buf, err // if error, return original byte slice + } + return newBuf, nil +} + +// NativeFromBinary returns a native datum value from the binary encoded byte +// slice in accordance with the Avro schema supplied when creating the Codec. On +// success, it returns the decoded datum, along with a new byte slice with the +// decoded bytes consumed, and a nil error value. On error, it returns nil for +// the datum value, the original byte slice, and the error message. +// +// func ExampleNativeFromBinary() { +// codec, err := goavro.NewCodec(` +// { +// "type": "record", +// "name": "LongList", +// "fields" : [ +// {"name": "next", "type": ["null", "LongList"], "default": null} +// ] +// }`) +// if err != nil { +// fmt.Println(err) +// } +// +// // Convert native Go form to binary Avro data +// binary := []byte{0x2, 0x2, 0x0} +// +// native, _, err := codec.NativeFromBinary(binary) +// if err != nil { +// fmt.Println(err) +// } +// +// fmt.Printf("%v", native) +// // Output: map[next:map[LongList:map[next:map[LongList:map[next:]]]]] +// } +func (c *Codec) NativeFromBinary(buf []byte) (interface{}, []byte, error) { + value, newBuf, err := c.nativeFromBinary(buf) + if err != nil { + return nil, buf, err // if error, return original byte slice + } + return value, newBuf, nil +} + +// NativeFromTextual converts Avro data in JSON text format from the provided byte +// slice to Go native data types in accordance with the Avro schema supplied +// when creating the Codec. On success, it returns the decoded datum, along with +// a new byte slice with the decoded bytes consumed, and a nil error value. On +// error, it returns nil for the datum value, the original byte slice, and the +// error message. +// +// func ExampleNativeFromTextual() { +// codec, err := goavro.NewCodec(` +// { +// "type": "record", +// "name": "LongList", +// "fields" : [ +// {"name": "next", "type": ["null", "LongList"], "default": null} +// ] +// }`) +// if err != nil { +// fmt.Println(err) +// } +// +// // Convert native Go form to text Avro data +// text := []byte(`{"next":{"LongList":{"next":{"LongList":{"next":null}}}}}`) +// +// native, _, err := codec.NativeFromTextual(text) +// if err != nil { +// fmt.Println(err) +// } +// +// fmt.Printf("%v", native) +// // Output: map[next:map[LongList:map[next:map[LongList:map[next:]]]]] +// } +func (c *Codec) NativeFromTextual(buf []byte) (interface{}, []byte, error) { + value, newBuf, err := c.nativeFromTextual(buf) + if err != nil { + return nil, buf, err // if error, return original byte slice + } + return value, newBuf, nil +} + +// TextualFromNative converts Go native data types to Avro data in JSON text format in +// accordance with the Avro schema supplied when creating the Codec. It is +// supplied a byte slice to which to append the encoded data and the actual data +// to encode. On success, it returns a new byte slice with the encoded bytes +// appended, and a nil error value. On error, it returns the original byte +// slice, and the error message. +// +// func ExampleTextualFromNative() { +// codec, err := goavro.NewCodec(` +// { +// "type": "record", +// "name": "LongList", +// "fields" : [ +// {"name": "next", "type": ["null", "LongList"], "default": null} +// ] +// }`) +// if err != nil { +// fmt.Println(err) +// } +// +// // Convert native Go form to text Avro data +// text, err := codec.TextualFromNative(nil, map[string]interface{}{ +// "next": map[string]interface{}{ +// "LongList": map[string]interface{}{ +// "next": map[string]interface{}{ +// "LongList": map[string]interface{}{ +// // NOTE: May omit fields when using default value +// }, +// }, +// }, +// }, +// }) +// if err != nil { +// fmt.Println(err) +// } +// +// fmt.Printf("%s", text) +// // Output: {"next":{"LongList":{"next":{"LongList":{"next":null}}}}} +// } +func (c *Codec) TextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + newBuf, err := c.textualFromNative(buf, datum) + if err != nil { + return buf, err // if error, return original byte slice + } + return newBuf, nil +} + +// Schema returns the compact schema used to create the Codec. +// +// func ExampleCodecSchema() { +// schema := `{"type":"map","values":{"type":"enum","name":"foo","symbols":["alpha","bravo"]}}` +// codec, err := goavro.NewCodec(schema) +// if err != nil { +// fmt.Println(err) +// } +// fmt.Println(codec.Schema()) +// // Output: {"type":"map","values":{"name":"foo","type":"enum","symbols":["alpha","bravo"]}} +// } +func (c *Codec) Schema() string { + return c.schema +} + +// convert a schema data structure to a codec, prefixing with specified +// namespace +func buildCodec(st map[string]*Codec, enclosingNamespace string, schema interface{}) (*Codec, error) { + switch schemaType := schema.(type) { + case map[string]interface{}: + return buildCodecForTypeDescribedByMap(st, enclosingNamespace, schemaType) + case string: + return buildCodecForTypeDescribedByString(st, enclosingNamespace, schemaType, nil) + case []interface{}: + return buildCodecForTypeDescribedBySlice(st, enclosingNamespace, schemaType) + default: + return nil, fmt.Errorf("unknown schema type: %T", schema) + } +} + +// Reach into the map, grabbing its "type". Use that to create the codec. +func buildCodecForTypeDescribedByMap(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { + t, ok := schemaMap["type"] + if !ok { + return nil, fmt.Errorf("missing type: %v", schemaMap) + } + switch v := t.(type) { + case string: + // Already defined types may be abbreviated with its string name. + // EXAMPLE: "type":"array" + // EXAMPLE: "type":"enum" + // EXAMPLE: "type":"fixed" + // EXAMPLE: "type":"int" + // EXAMPLE: "type":"record" + // EXAMPLE: "type":"somePreviouslyDefinedCustomTypeString" + return buildCodecForTypeDescribedByString(st, enclosingNamespace, v, schemaMap) + case map[string]interface{}: + return buildCodecForTypeDescribedByMap(st, enclosingNamespace, v) + case []interface{}: + return buildCodecForTypeDescribedBySlice(st, enclosingNamespace, v) + default: + return nil, fmt.Errorf("type ought to be either string, map[string]interface{}, or []interface{}; received: %T", t) + } +} + +func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace string, typeName string, schemaMap map[string]interface{}) (*Codec, error) { + // NOTE: When codec already exists, return it. This includes both primitive + // type codecs added in NewCodec, and user-defined types, added while + // building the codec. + if cd, ok := st[typeName]; ok { + return cd, nil + } + // NOTE: Sometimes schema may abbreviate type name inside a namespace. + if enclosingNamespace != "" { + if cd, ok := st[enclosingNamespace+"."+typeName]; ok { + return cd, nil + } + } + // There are only a small handful of complex Avro data types. + switch typeName { + case "array": + return makeArrayCodec(st, enclosingNamespace, schemaMap) + case "enum": + return makeEnumCodec(st, enclosingNamespace, schemaMap) + case "fixed": + return makeFixedCodec(st, enclosingNamespace, schemaMap) + case "map": + return makeMapCodec(st, enclosingNamespace, schemaMap) + case "record": + return makeRecordCodec(st, enclosingNamespace, schemaMap) + default: + return nil, fmt.Errorf("unknown type name: %q", typeName) + } +} + +// notion of enclosing namespace changes when record, enum, or fixed create a +// new namespace, for child objects. +func registerNewCodec(st map[string]*Codec, schemaMap map[string]interface{}, enclosingNamespace string) (*Codec, error) { + n, err := newNameFromSchemaMap(enclosingNamespace, schemaMap) + if err != nil { + return nil, err + } + c := &Codec{typeName: n} + st[n.fullName] = c + return c, nil +} diff --git a/vendor/github.com/linkedin/goavro/doc.go b/vendor/github.com/linkedin/goavro/doc.go new file mode 100644 index 00000000000..d6d73c01289 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/doc.go @@ -0,0 +1,68 @@ +/* +Package goavro is a library that encodes and decodes Avro data. + +Goavro provides methods to encode native Go data into both binary and textual +JSON Avro data, and methods to decode both binary and textual JSON Avro data to +native Go data. + +Goavro also provides methods to read and write Object Container File (OCF) +formatted files, and the library contains example programs to read and write OCF +files. + +Usage Example: + + package main + + import ( + "fmt" + + "github.com/linkedin/goavro" + ) + + func main() { + codec, err := goavro.NewCodec(` + { + "type": "record", + "name": "LongList", + "fields" : [ + {"name": "next", "type": ["null", "LongList"], "default": null} + ] + }`) + if err != nil { + fmt.Println(err) + } + + // NOTE: May omit fields when using default value + textual := []byte(`{"next":{"LongList":{}}}`) + + // Convert textual Avro data (in Avro JSON format) to native Go form + native, _, err := codec.NativeFromTextual(textual) + if err != nil { + fmt.Println(err) + } + + // Convert native Go form to binary Avro data + binary, err := codec.BinaryFromNative(nil, native) + if err != nil { + fmt.Println(err) + } + + // Convert binary Avro data back to native Go form + native, _, err = codec.NativeFromBinary(binary) + if err != nil { + fmt.Println(err) + } + + // Convert native Go form to textual Avro data + textual, err = codec.TextualFromNative(nil, native) + if err != nil { + fmt.Println(err) + } + + // NOTE: Textual encoding will show all fields, even those with values that + // match their default values + fmt.Println(string(textual)) + // Output: {"next":{"LongList":{"next":null}}} + } +*/ +package goavro diff --git a/vendor/github.com/linkedin/goavro/enum.go b/vendor/github.com/linkedin/goavro/enum.go new file mode 100644 index 00000000000..af9b6f389b4 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/enum.go @@ -0,0 +1,105 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" + "io" +) + +// enum does not have child objects, therefore whatever namespace it defines is +// just to store its name in the symbol table. +func makeEnumCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { + c, err := registerNewCodec(st, schemaMap, enclosingNamespace) + if err != nil { + return nil, fmt.Errorf("Enum ought to have valid name: %s", err) + } + + // enum type must have symbols + s1, ok := schemaMap["symbols"] + if !ok { + return nil, fmt.Errorf("Enum %q ought to have symbols key", c.typeName) + } + s2, ok := s1.([]interface{}) + if !ok || len(s2) == 0 { + return nil, fmt.Errorf("Enum %q symbols ought to be non-empty array of strings: %v", c.typeName, s1) + } + symbols := make([]string, len(s2)) + for i, s := range s2 { + symbol, ok := s.(string) + if !ok { + return nil, fmt.Errorf("Enum %q symbol %d ought to be non-empty string; received: %T", c.typeName, i+1, symbol) + } + if err := checkString(symbol); err != nil { + return nil, fmt.Errorf("Enum %q symbol %d ought to %s", c.typeName, i+1, err) + } + symbols[i] = symbol + } + + c.nativeFromBinary = func(buf []byte) (interface{}, []byte, error) { + var value interface{} + var err error + var index int64 + + if value, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary enum %q index: %s", c.typeName, err) + } + index = value.(int64) + if index < 0 || index >= int64(len(symbols)) { + return nil, nil, fmt.Errorf("cannot decode binary enum %q: index ought to be between 0 and %d; read index: %d", c.typeName, len(symbols)-1, index) + } + return symbols[index], buf, nil + } + c.binaryFromNative = func(buf []byte, datum interface{}) ([]byte, error) { + someString, ok := datum.(string) + if !ok { + return nil, fmt.Errorf("cannot encode binary enum %q: expected string; received: %T", c.typeName, datum) + } + for i, symbol := range symbols { + if symbol == someString { + return longBinaryFromNative(buf, i) + } + } + return nil, fmt.Errorf("cannot encode binary enum %q: value ought to be member of symbols: %v; %q", c.typeName, symbols, someString) + } + c.nativeFromTextual = func(buf []byte) (interface{}, []byte, error) { + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, fmt.Errorf("cannot decode textual enum: %s", io.ErrShortBuffer) + } + // decode enum string + var value interface{} + var err error + value, buf, err = stringNativeFromTextual(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual enum: expected key: %s", err) + } + someString := value.(string) + for _, symbol := range symbols { + if symbol == someString { + return someString, buf, nil + } + } + return nil, nil, fmt.Errorf("cannot decode textual enum %q: value ought to be member of symbols: %v; %q", c.typeName, symbols, someString) + } + c.textualFromNative = func(buf []byte, datum interface{}) ([]byte, error) { + someString, ok := datum.(string) + if !ok { + return nil, fmt.Errorf("cannot encode textual enum %q: expected string; received: %T", c.typeName, datum) + } + for _, symbol := range symbols { + if symbol == someString { + return stringTextualFromNative(buf, someString) + } + } + return nil, fmt.Errorf("cannot encode textual enum %q: value ought to be member of symbols: %v; %q", c.typeName, symbols, someString) + } + + return c, nil +} diff --git a/vendor/github.com/linkedin/goavro/fixed.go b/vendor/github.com/linkedin/goavro/fixed.go new file mode 100644 index 00000000000..e63ebf02763 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/fixed.go @@ -0,0 +1,81 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" +) + +// Fixed does not have child objects, therefore whatever namespace it defines is +// just to store its name in the symbol table. +func makeFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { + c, err := registerNewCodec(st, schemaMap, enclosingNamespace) + if err != nil { + return nil, fmt.Errorf("Fixed ought to have valid name: %s", err) + } + // Fixed type must have size + s1, ok := schemaMap["size"] + if !ok { + return nil, fmt.Errorf("Fixed %q ought to have size key", c.typeName) + } + s2, ok := s1.(float64) + if !ok || s2 <= 0 { + return nil, fmt.Errorf("Fixed %q size ought to be number greater than zero: %v", c.typeName, s1) + } + size := uint(s2) + + c.nativeFromBinary = func(buf []byte) (interface{}, []byte, error) { + if buflen := uint(len(buf)); size > buflen { + return nil, nil, fmt.Errorf("cannot decode binary fixed %q: schema size exceeds remaining buffer size: %d > %d (short buffer)", c.typeName, size, buflen) + } + return buf[:size], buf[size:], nil + } + + c.binaryFromNative = func(buf []byte, datum interface{}) ([]byte, error) { + someBytes, ok := datum.([]byte) + if !ok { + return nil, fmt.Errorf("cannot encode binary fixed %q: expected []byte; received: %T", c.typeName, datum) + } + if count := uint(len(someBytes)); count != size { + return nil, fmt.Errorf("cannot encode binary fixed %q: datum size ought to equal schema size: %d != %d", c.typeName, count, size) + } + return append(buf, someBytes...), nil + } + + c.nativeFromTextual = func(buf []byte) (interface{}, []byte, error) { + if buflen := uint(len(buf)); size > buflen { + return nil, nil, fmt.Errorf("cannot decode textual fixed %q: schema size exceeds remaining buffer size: %d > %d (short buffer)", c.typeName, size, buflen) + } + var datum interface{} + var err error + datum, buf, err = bytesNativeFromTextual(buf) + if err != nil { + return nil, buf, err + } + datumBytes := datum.([]byte) + if count := uint(len(datumBytes)); count != size { + return nil, nil, fmt.Errorf("cannot decode textual fixed %q: datum size ought to equal schema size: %d != %d", c.typeName, count, size) + } + return datum, buf, err + } + + c.textualFromNative = func(buf []byte, datum interface{}) ([]byte, error) { + someBytes, ok := datum.([]byte) + if !ok { + return nil, fmt.Errorf("cannot encode textual fixed %q: expected []byte; received: %T", c.typeName, datum) + } + if count := uint(len(someBytes)); count != size { + return nil, fmt.Errorf("cannot encode textual fixed %q: datum size ought to equal schema size: %d != %d", c.typeName, count, size) + } + return bytesTextualFromNative(buf, someBytes) + } + + return c, nil +} diff --git a/vendor/github.com/linkedin/goavro/floatingPoint.go b/vendor/github.com/linkedin/goavro/floatingPoint.go new file mode 100644 index 00000000000..293967ba80f --- /dev/null +++ b/vendor/github.com/linkedin/goavro/floatingPoint.go @@ -0,0 +1,293 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" + "strconv" +) + +const ( + doubleEncodedLength = 8 // double requires 8 bytes + floatEncodedLength = 4 // float requires 4 bytes +) + +//////////////////////////////////////// +// Binary Decode +//////////////////////////////////////// + +func doubleNativeFromBinary(buf []byte) (interface{}, []byte, error) { + if len(buf) < doubleEncodedLength { + return nil, nil, fmt.Errorf("cannot decode binary double: %s", io.ErrShortBuffer) + } + return math.Float64frombits(binary.LittleEndian.Uint64(buf[:doubleEncodedLength])), buf[doubleEncodedLength:], nil +} + +func floatNativeFromBinary(buf []byte) (interface{}, []byte, error) { + if len(buf) < floatEncodedLength { + return nil, nil, fmt.Errorf("cannot decode binary float: %s", io.ErrShortBuffer) + } + return math.Float32frombits(binary.LittleEndian.Uint32(buf[:floatEncodedLength])), buf[floatEncodedLength:], nil +} + +//////////////////////////////////////// +// Binary Encode +//////////////////////////////////////// + +func doubleBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + var value float64 + switch v := datum.(type) { + case float64: + value = v + case float32: + value = float64(v) + case int: + if value = float64(v); int(value) != v { + return nil, fmt.Errorf("cannot encode binary double: provided Go int would lose precision: %d", v) + } + case int64: + if value = float64(v); int64(value) != v { + return nil, fmt.Errorf("cannot encode binary double: provided Go int64 would lose precision: %d", v) + } + case int32: + if value = float64(v); int32(value) != v { + return nil, fmt.Errorf("cannot encode binary double: provided Go int32 would lose precision: %d", v) + } + default: + return nil, fmt.Errorf("cannot encode binary double: expected: Go numeric; received: %T", datum) + } + buf = append(buf, 0, 0, 0, 0, 0, 0, 0, 0) + binary.LittleEndian.PutUint64(buf[len(buf)-doubleEncodedLength:], math.Float64bits(value)) + return buf, nil +} + +func floatBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + var value float32 + switch v := datum.(type) { + case float32: + value = v + case float64: + // Assume runtime can cast special floats correctly, and if there is a + // loss of precision from float64 and float32, that should be expected + // or at least understood by the client. + value = float32(v) + case int: + if value = float32(v); int(value) != v { + return nil, fmt.Errorf("cannot encode binary float: provided Go int would lose precision: %d", v) + } + case int64: + if value = float32(v); int64(value) != v { + return nil, fmt.Errorf("cannot encode binary float: provided Go int64 would lose precision: %d", v) + } + case int32: + if value = float32(v); int32(value) != v { + return nil, fmt.Errorf("cannot encode binary float: provided Go int32 would lose precision: %d", v) + } + default: + return nil, fmt.Errorf("cannot encode binary float: expected: Go numeric; received: %T", datum) + } + // return floatingBinaryEncoder(buf, uint64(math.Float32bits(value)), floatEncodedLength) + buf = append(buf, 0, 0, 0, 0) + binary.LittleEndian.PutUint32(buf[len(buf)-floatEncodedLength:], uint32(math.Float32bits(value))) + return buf, nil +} + +//////////////////////////////////////// +// Text Decode +//////////////////////////////////////// + +func doubleNativeFromTextual(buf []byte) (interface{}, []byte, error) { + return floatingTextDecoder(buf, 64) +} + +func floatNativeFromTextual(buf []byte) (interface{}, []byte, error) { + return floatingTextDecoder(buf, 32) +} + +func floatingTextDecoder(buf []byte, bitSize int) (interface{}, []byte, error) { + buflen := len(buf) + if buflen >= 4 { + if bytes.Equal(buf[:4], []byte("null")) { + return math.NaN(), buf[4:], nil + } + if buflen >= 5 { + if bytes.Equal(buf[:5], []byte("1e999")) { + return math.Inf(1), buf[5:], nil + } + if buflen >= 6 { + if bytes.Equal(buf[:6], []byte("-1e999")) { + return math.Inf(-1), buf[6:], nil + } + } + } + } + index, err := numberLength(buf, true) // NOTE: floatAllowed = true + if err != nil { + return nil, nil, err + } + datum, err := strconv.ParseFloat(string(buf[:index]), bitSize) + if err != nil { + return nil, nil, err + } + return datum, buf[index:], nil +} + +func numberLength(buf []byte, floatAllowed bool) (int, error) { + // ALGORITHM: increment index as long as bytes are valid for number state engine. + var index, buflen, count int + var b byte + + // STATE 0: begin, optional: - + if buflen = len(buf); index == buflen { + return 0, io.ErrShortBuffer + } + if buf[index] == '-' { + if index++; index == buflen { + return 0, io.ErrShortBuffer + } + } + // STATE 1: if 0, goto 2; otherwise if 1-9, goto 3; otherwise bail + if b = buf[index]; b == '0' { + if index++; index == buflen { + return index, nil // valid number + } + } else if b >= '1' && b <= '9' { + if index++; index == buflen { + return index, nil // valid number + } + // STATE 3: absorb zero or more digits + for { + if b = buf[index]; b < '0' || b > '9' { + break + } + if index++; index == buflen { + return index, nil // valid number + } + } + } else { + return 0, fmt.Errorf("unexpected byte: %q", b) + } + if floatAllowed { + // STATE 2: if ., goto 4; otherwise goto 5 + if buf[index] == '.' { + if index++; index == buflen { + return 0, io.ErrShortBuffer + } + // STATE 4: absorb one or more digits + for { + if b = buf[index]; b < '0' || b > '9' { + break + } + count++ + if index++; index == buflen { + return index, nil // valid number + } + } + if count == 0 { + // did not get at least one digit + return 0, fmt.Errorf("unexpected byte: %q", b) + } + } + // STATE 5: if e|e, goto 6; otherwise goto 7 + if b = buf[index]; b == 'e' || b == 'E' { + if index++; index == buflen { + return 0, io.ErrShortBuffer + } + // STATE 6: if -|+, goto 8; otherwise goto 8 + if b = buf[index]; b == '+' || b == '-' { + if index++; index == buflen { + return 0, io.ErrShortBuffer + } + } + // STATE 8: absorb one or more digits + count = 0 + for { + if b = buf[index]; b < '0' || b > '9' { + break + } + count++ + if index++; index == buflen { + return index, nil // valid number + } + } + if count == 0 { + // did not get at least one digit + return 0, fmt.Errorf("unexpected byte: %q", b) + } + } + } + // STATE 7: end + return index, nil +} + +//////////////////////////////////////// +// Text Encode +//////////////////////////////////////// + +func floatTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + return floatingTextEncoder(buf, datum, 32) +} + +func doubleTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + return floatingTextEncoder(buf, datum, 64) +} + +func floatingTextEncoder(buf []byte, datum interface{}, bitSize int) ([]byte, error) { + var isFloat bool + var someFloat64 float64 + var someInt64 int64 + switch v := datum.(type) { + case float32: + isFloat = true + someFloat64 = float64(v) + case float64: + isFloat = true + someFloat64 = v + case int: + if someInt64 = int64(v); int(someInt64) != v { + if bitSize == 64 { + return nil, fmt.Errorf("cannot encode textual double: provided Go int would lose precision: %d", v) + } + return nil, fmt.Errorf("cannot encode textual float: provided Go int would lose precision: %d", v) + } + case int64: + someInt64 = v + case int32: + if someInt64 = int64(v); int32(someInt64) != v { + if bitSize == 64 { + return nil, fmt.Errorf("cannot encode textual double: provided Go int32 would lose precision: %d", v) + } + return nil, fmt.Errorf("cannot encode textual float: provided Go int32 would lose precision: %d", v) + } + default: + if bitSize == 64 { + return nil, fmt.Errorf("cannot encode textual double: expected: Go numeric; received: %T", datum) + } + return nil, fmt.Errorf("cannot encode textual float: expected: Go numeric; received: %T", datum) + } + + if isFloat { + if math.IsNaN(someFloat64) { + return append(buf, "null"...), nil + } + if math.IsInf(someFloat64, 1) { + return append(buf, "1e999"...), nil + } + if math.IsInf(someFloat64, -1) { + return append(buf, "-1e999"...), nil + } + return strconv.AppendFloat(buf, someFloat64, 'g', -1, bitSize), nil + } + return strconv.AppendInt(buf, someInt64, 10), nil +} diff --git a/vendor/github.com/linkedin/goavro/integer.go b/vendor/github.com/linkedin/goavro/integer.go new file mode 100644 index 00000000000..0355036f459 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/integer.go @@ -0,0 +1,199 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" + "io" + "strconv" +) + +const ( + intDownShift = uint32(31) + intFlag = byte(128) + intMask = byte(127) + longDownShift = uint32(63) +) + +//////////////////////////////////////// +// Binary Decode +//////////////////////////////////////// + +func intNativeFromBinary(buf []byte) (interface{}, []byte, error) { + var offset, value int + var shift uint + for offset = 0; offset < len(buf); offset++ { + b := buf[offset] + value |= int(b&intMask) << shift + if b&intFlag == 0 { + return (int32(value>>1) ^ -int32(value&1)), buf[offset+1:], nil + } + shift += 7 + } + return nil, nil, io.ErrShortBuffer +} + +func longNativeFromBinary(buf []byte) (interface{}, []byte, error) { + var offset int + var value uint64 + var shift uint + for offset = 0; offset < len(buf); offset++ { + b := buf[offset] + value |= uint64(b&intMask) << shift + if b&intFlag == 0 { + return (int64(value>>1) ^ -int64(value&1)), buf[offset+1:], nil + } + shift += 7 + } + return nil, nil, io.ErrShortBuffer +} + +//////////////////////////////////////// +// Binary Encode +//////////////////////////////////////// + +func intBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + var value int32 + switch v := datum.(type) { + case int32: + value = v + case int: + if value = int32(v); int(value) != v { + return nil, fmt.Errorf("cannot encode binary int: provided Go int would lose precision: %d", v) + } + case int64: + if value = int32(v); int64(value) != v { + return nil, fmt.Errorf("cannot encode binary int: provided Go int64 would lose precision: %d", v) + } + case float64: + if value = int32(v); float64(value) != v { + return nil, fmt.Errorf("cannot encode binary int: provided Go float64 would lose precision: %f", v) + } + case float32: + if value = int32(v); float32(value) != v { + return nil, fmt.Errorf("cannot encode binary int: provided Go float32 would lose precision: %f", v) + } + default: + return nil, fmt.Errorf("cannot encode binary int: expected: Go numeric; received: %T", datum) + } + encoded := uint64((uint32(value) << 1) ^ uint32(value>>intDownShift)) + return integerBinaryEncoder(buf, encoded) +} + +func longBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + var value int64 + switch v := datum.(type) { + case int64: + value = v + case int: + value = int64(v) + case int32: + value = int64(v) + case float64: + if value = int64(v); float64(value) != v { + return nil, fmt.Errorf("cannot encode binary long: provided Go float64 would lose precision: %f", v) + } + case float32: + if value = int64(v); float32(value) != v { + return nil, fmt.Errorf("cannot encode binary long: provided Go float32 would lose precision: %f", v) + } + default: + return nil, fmt.Errorf("long: expected: Go numeric; received: %T", datum) + } + encoded := (uint64(value) << 1) ^ uint64(value>>longDownShift) + return integerBinaryEncoder(buf, encoded) +} + +func integerBinaryEncoder(buf []byte, encoded uint64) ([]byte, error) { + // used by both intBinaryEncoder and longBinaryEncoder + if encoded == 0 { + return append(buf, 0), nil + } + for encoded > 0 { + b := byte(encoded) & intMask + encoded = encoded >> 7 + if encoded != 0 { + b |= intFlag // set high bit; we have more bytes + } + buf = append(buf, b) + } + return buf, nil +} + +//////////////////////////////////////// +// Text Decode +//////////////////////////////////////// + +func longNativeFromTextual(buf []byte) (interface{}, []byte, error) { + return integerTextDecoder(buf, 64) +} + +func intNativeFromTextual(buf []byte) (interface{}, []byte, error) { + return integerTextDecoder(buf, 32) +} + +func integerTextDecoder(buf []byte, bitSize int) (interface{}, []byte, error) { + index, err := numberLength(buf, false) // NOTE: floatAllowed = false + if err != nil { + return nil, nil, err + } + datum, err := strconv.ParseInt(string(buf[:index]), 10, bitSize) + if err != nil { + return nil, nil, err + } + if bitSize == 32 { + return int32(datum), buf[index:], nil + } + return datum, buf[index:], nil +} + +//////////////////////////////////////// +// Text Encode +//////////////////////////////////////// + +func longTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + return integerTextEncoder(buf, datum, 64) +} + +func intTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + return integerTextEncoder(buf, datum, 32) +} + +func integerTextEncoder(buf []byte, datum interface{}, bitSize int) ([]byte, error) { + var someInt64 int64 + switch v := datum.(type) { + case int: + someInt64 = int64(v) + case int32: + someInt64 = int64(v) + case int64: + someInt64 = v + case float32: + if someInt64 = int64(v); float32(someInt64) != v { + if bitSize == 64 { + return nil, fmt.Errorf("cannot encode textual long: provided Go float32 would lose precision: %f", v) + } + return nil, fmt.Errorf("cannot encode textual int: provided Go float32 would lose precision: %f", v) + } + case float64: + if someInt64 = int64(v); float64(someInt64) != v { + if bitSize == 64 { + return nil, fmt.Errorf("cannot encode textual long: provided Go float64 would lose precision: %f", v) + } + return nil, fmt.Errorf("cannot encode textual int: provided Go float64 would lose precision: %f", v) + } + default: + if bitSize == 64 { + return nil, fmt.Errorf("cannot encode textual long: expected: Go numeric; received: %T", datum) + } + return nil, fmt.Errorf("cannot encode textual int: expected: Go numeric; received: %T", datum) + } + return strconv.AppendInt(buf, someInt64, 10), nil +} diff --git a/vendor/github.com/linkedin/goavro/map.go b/vendor/github.com/linkedin/goavro/map.go new file mode 100644 index 00000000000..5d330a1483c --- /dev/null +++ b/vendor/github.com/linkedin/goavro/map.go @@ -0,0 +1,307 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "errors" + "fmt" + "io" + "math" + "reflect" +) + +func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]interface{}) (*Codec, error) { + // map type must have values + valueSchema, ok := schemaMap["values"] + if !ok { + return nil, errors.New("Map ought to have values key") + } + valueCodec, err := buildCodec(st, namespace, valueSchema) + if err != nil { + return nil, fmt.Errorf("Map values ought to be valid Avro type: %s", err) + } + + return &Codec{ + typeName: &name{"map", nullNamespace}, + nativeFromBinary: func(buf []byte) (interface{}, []byte, error) { + var err error + var value interface{} + + // block count and block size + if value, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary map block count: %s", err) + } + blockCount := value.(int64) + if blockCount < 0 { + // NOTE: A negative block count implies there is a long encoded + // block size following the negative block count. We have no use + // for the block size in this decoder, so we read and discard + // the value. + if blockCount == math.MinInt64 { + // The minimum number for any signed numerical type can + // never be made positive + return nil, nil, fmt.Errorf("cannot decode binary map with block count: %d", math.MinInt64) + } + blockCount = -blockCount // convert to its positive equivalent + if _, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary map block size: %s", err) + } + } + // Ensure block count does not exceed some sane value. + if blockCount > MaxBlockCount { + return nil, nil, fmt.Errorf("cannot decode binary map when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + // NOTE: While the attempt of a RAM optimization shown below is not + // necessary, many encoders will encode all items in a single block. + // We can optimize amount of RAM allocated by runtime for the array + // by initializing the array for that number of items. + mapValues := make(map[string]interface{}, blockCount) + + for blockCount != 0 { + // Decode `blockCount` datum values from buffer + for i := int64(0); i < blockCount; i++ { + // first decode the key string + if value, buf, err = stringNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary map key: %s", err) + } + key := value.(string) // string decoder always returns a string + if _, ok := mapValues[key]; ok { + return nil, nil, fmt.Errorf("cannot decode binary map: duplicate key: %q", key) + } + // then decode the value + if value, buf, err = valueCodec.nativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary map value for key %q: %s", key, err) + } + mapValues[key] = value + } + // Decode next blockCount from buffer, because there may be more blocks + if value, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary map block count: %s", err) + } + blockCount = value.(int64) + if blockCount < 0 { + // NOTE: A negative block count implies there is a long + // encoded block size following the negative block count. We + // have no use for the block size in this decoder, so we + // read and discard the value. + if blockCount == math.MinInt64 { + // The minimum number for any signed numerical type can + // never be made positive + return nil, nil, fmt.Errorf("cannot decode binary map with block count: %d", math.MinInt64) + } + blockCount = -blockCount // convert to its positive equivalent + if _, buf, err = longNativeFromBinary(buf); err != nil { + return nil, nil, fmt.Errorf("cannot decode binary map block size: %s", err) + } + } + // Ensure block count does not exceed some sane value. + if blockCount > MaxBlockCount { + return nil, nil, fmt.Errorf("cannot decode binary map when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + } + return mapValues, buf, nil + }, + binaryFromNative: func(buf []byte, datum interface{}) ([]byte, error) { + mapValues, err := convertMap(datum) + if err != nil { + return nil, fmt.Errorf("cannot encode binary map: %s", err) + } + + keyCount := int64(len(mapValues)) + var alreadyEncoded, remainingInBlock int64 + + for k, v := range mapValues { + if remainingInBlock == 0 { // start a new block + remainingInBlock = keyCount - alreadyEncoded + if remainingInBlock > MaxBlockCount { + // limit block count to MacBlockCount + remainingInBlock = MaxBlockCount + } + buf, _ = longBinaryFromNative(buf, remainingInBlock) + } + + // only fails when given non string, so elide error checking + buf, _ = stringBinaryFromNative(buf, k) + + // encode the value + if buf, err = valueCodec.binaryFromNative(buf, v); err != nil { + return nil, fmt.Errorf("cannot encode binary map value for key %q: %v: %s", k, v, err) + } + + remainingInBlock-- + alreadyEncoded++ + } + return longBinaryFromNative(buf, 0) // append tailing 0 block count to signal end of Map + }, + nativeFromTextual: func(buf []byte) (interface{}, []byte, error) { + return genericMapTextDecoder(buf, valueCodec, nil) // codecFromKey == nil + }, + textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) { + return genericMapTextEncoder(buf, datum, valueCodec, nil) + }, + }, nil +} + +// genericMapTextDecoder decodes a JSON text blob to a native Go map, using the +// codecs from codecFromKey, and if a key is not found in that map, from +// defaultCodec if provided. If defaultCodec is nil, this function returns an +// error if it encounters a map key that is not present in codecFromKey. If +// codecFromKey is nil, every map value will be decoded using defaultCodec, if +// possible. +func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[string]*Codec) (map[string]interface{}, []byte, error) { + var value interface{} + var err error + var b byte + + lencodec := len(codecFromKey) + mapValues := make(map[string]interface{}, lencodec) + + if buf, err = advanceAndConsume(buf, '{'); err != nil { + return nil, nil, err + } + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } + // NOTE: Special case empty map + if buf[0] == '}' { + return mapValues, buf[1:], nil + } + + // NOTE: Also terminates when read '}' byte. + for len(buf) > 0 { + // decode key string + value, buf, err = stringNativeFromTextual(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual map: expected key: %s", err) + } + key := value.(string) + // Is key already used? + if _, ok := mapValues[key]; ok { + return nil, nil, fmt.Errorf("cannot decode textual map: duplicate key: %q", key) + } + // Find a codec for the key + fieldCodec := codecFromKey[key] + if fieldCodec == nil { + fieldCodec = defaultCodec + } + if fieldCodec == nil { + return nil, nil, fmt.Errorf("cannot decode textual map: cannot determine codec: %q", key) + } + // decode colon + if buf, err = advanceAndConsume(buf, ':'); err != nil { + return nil, nil, err + } + // decode value + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } + value, buf, err = fieldCodec.nativeFromTextual(buf) + if err != nil { + return nil, nil, err + } + // set map value for key + mapValues[key] = value + // either comma or closing curly brace + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } + switch b = buf[0]; b { + case '}': + return mapValues, buf[1:], nil + case ',': + // no-op + default: + return nil, nil, fmt.Errorf("cannot decode textual map: expected ',' or '}'; received: %q", b) + } + // NOTE: consume comma from above + if buf, _ = advanceToNonWhitespace(buf[1:]); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } + } + return nil, nil, io.ErrShortBuffer +} + +// genericMapTextEncoder encodes a native Go map to a JSON text blob, using the +// codecs from codecFromKey, and if a key is not found in that map, from +// defaultCodec if provided. If defaultCodec is nil, this function returns an +// error if it encounters a map key that is not present in codecFromKey. If +// codecFromKey is nil, every map value will be encoded using defaultCodec, if +// possible. +func genericMapTextEncoder(buf []byte, datum interface{}, defaultCodec *Codec, codecFromKey map[string]*Codec) ([]byte, error) { + mapValues, err := convertMap(datum) + if err != nil { + return nil, fmt.Errorf("cannot encode textual map: %s", err) + } + + var atLeastOne bool + + buf = append(buf, '{') + + for key, value := range mapValues { + atLeastOne = true + + // Find a codec for the key + fieldCodec := codecFromKey[key] + if fieldCodec == nil { + fieldCodec = defaultCodec + } + if fieldCodec == nil { + return nil, fmt.Errorf("cannot encode textual map: cannot determine codec: %q", key) + } + // Encode key string + buf, err = stringTextualFromNative(buf, key) + if err != nil { + return nil, err + } + buf = append(buf, ':') + // Encode value + buf, err = fieldCodec.textualFromNative(buf, value) + if err != nil { + // field was specified in datum; therefore its value was invalid + return nil, fmt.Errorf("cannot encode textual map: value for %q does not match its schema: %s", key, err) + } + buf = append(buf, ',') + } + + if atLeastOne { + return append(buf[:len(buf)-1], '}'), nil + } + return append(buf, '}'), nil +} + +// convertMap converts datum to map[string]interface{} if possible. +func convertMap(datum interface{}) (map[string]interface{}, error) { + mapValues, ok := datum.(map[string]interface{}) + if ok { + return mapValues, nil + } + // NOTE: When given a map of any other type, zip values to items as a + // convenience to client. + v := reflect.ValueOf(datum) + if v.Kind() != reflect.Map { + return nil, fmt.Errorf("cannot create map[string]interface{}: expected map[string]...; received: %T", datum) + } + // NOTE: Two better alternatives to the current algorithm are: + // (1) mutate the reflection tuple underneath to convert the + // map[string]int, for example, to map[string]interface{}, with + // O(1) complexity. + // (2) use copy builtin to zip the data items over with O(n) complexity, + // but more efficient than what's below. + mapValues = make(map[string]interface{}, v.Len()) + for _, key := range v.MapKeys() { + k, ok := key.Interface().(string) + if !ok { + // bail when map key type is not string + return nil, fmt.Errorf("cannot create map[string]interface{}: expected map[string]...; received: %T", datum) + } + mapValues[string(k)] = v.MapIndex(key).Interface() + } + return mapValues, nil +} diff --git a/vendor/github.com/linkedin/goavro/name.go b/vendor/github.com/linkedin/goavro/name.go new file mode 100644 index 00000000000..5ca19a4ca17 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/name.go @@ -0,0 +1,143 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "errors" + "fmt" + "strings" +) + +const nullNamespace = "" + +// ErrInvalidName is the error returned when one or more parts of an Avro name +// is invalid. +type ErrInvalidName struct { + Message string +} + +func (e ErrInvalidName) Error() string { + return "schema name ought to " + e.Message +} + +// NOTE: This function designed to work with name components, after they have +// been split on the period rune. +func isRuneInvalidForFirstCharacter(r rune) bool { + return (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && r != '_' +} + +func isRuneInvalidForOtherCharacters(r rune) bool { + return isRuneInvalidForFirstCharacter(r) && (r < '0' || r > '9') +} + +func checkNameComponent(s string) error { + err := checkString(s) + if err != nil { + return &ErrInvalidName{err.Error()} + } + return err +} + +func checkString(s string) error { + if len(s) == 0 { + return errors.New("be non-empty string") + } + if strings.IndexFunc(s[:1], isRuneInvalidForFirstCharacter) != -1 { + return errors.New("start with [A-Za-z_]: " + s) + } + if strings.IndexFunc(s[1:], isRuneInvalidForOtherCharacters) != -1 { + return errors.New("have second and remaining characters contain only [A-Za-z0-9_]: " + s) + } + return nil +} + +// name describes an Avro name in terms of its full name and namespace. +type name struct { + fullName string // the instance's Avro name + namespace string // for use when building new name from existing one +} + +// newName returns a new Name instance after first ensuring the arguments do not +// violate any of the Avro naming rules. +func newName(n, ns, ens string) (*name, error) { + var nn name + + if index := strings.LastIndexByte(n, '.'); index > -1 { + // inputName does contain a dot, so ignore everything else and use it as the full name + nn.fullName = n + nn.namespace = n[:index] + } else { + // inputName does not contain a dot, therefore is not the full name + if ns != nullNamespace { + // if namespace provided in the schema in the same schema level, use it + nn.fullName = ns + "." + n + nn.namespace = ns + } else if ens != nullNamespace { + // otherwise if enclosing namespace provided, use it + nn.fullName = ens + "." + n + nn.namespace = ens + } else { + // otherwise no namespace, so use null namespace, the empty string + nn.fullName = n + } + } + + // verify all components of the full name for adherence to Avro naming rules + for i, component := range strings.Split(nn.fullName, ".") { + if i == 0 && RelaxedNameValidation && component == "" { + continue + } + if err := checkNameComponent(component); err != nil { + return nil, err + } + } + + return &nn, nil +} + +var ( + // RelaxedNameValidation causes name validation to allow the first component + // of an Avro namespace to be the empty string. + RelaxedNameValidation bool +) + +func newNameFromSchemaMap(enclosingNamespace string, schemaMap map[string]interface{}) (*name, error) { + var nameString, namespaceString string + + name, ok := schemaMap["name"] + if !ok { + return nil, errors.New("schema ought to have name key") + } + nameString, ok = name.(string) + if !ok || nameString == nullNamespace { + return nil, fmt.Errorf("schema name ought to be non-empty string; received: %T", name) + } + namespace, ok := schemaMap["namespace"] + if ok { + namespaceString, ok = namespace.(string) + if !ok || namespaceString == nullNamespace { + return nil, fmt.Errorf("schema namespace, if provided, ought to be non-empty string; received: %T", namespace) + } + } + + return newName(nameString, namespaceString, enclosingNamespace) +} + +func (n *name) String() string { + return n.fullName +} + +// short returns the name without the prefixed namespace. +func (n *name) short() string { + if index := strings.LastIndexByte(n.fullName, '.'); index > -1 { + return n.fullName[index+1:] + } + return n.fullName +} diff --git a/vendor/github.com/linkedin/goavro/null.go b/vendor/github.com/linkedin/goavro/null.go new file mode 100644 index 00000000000..08328fc8d7b --- /dev/null +++ b/vendor/github.com/linkedin/goavro/null.go @@ -0,0 +1,45 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "errors" + "fmt" + "io" +) + +var nullBytes = []byte("null") + +func nullNativeFromBinary(buf []byte) (interface{}, []byte, error) { return nil, buf, nil } + +func nullBinaryFromNative(buf []byte, datum interface{}) ([]byte, error) { + if datum != nil { + return nil, fmt.Errorf("cannot encode binary null: expected: Go nil; received: %T", datum) + } + return buf, nil +} + +func nullNativeFromTextual(buf []byte) (interface{}, []byte, error) { + if len(buf) < 4 { + return nil, nil, fmt.Errorf("cannot decode textual null: %s", io.ErrShortBuffer) + } + if bytes.Equal(buf[:4], nullBytes) { + return nil, buf[4:], nil + } + return nil, nil, errors.New("cannot decode textual null: expected: null") +} + +func nullTextualFromNative(buf []byte, datum interface{}) ([]byte, error) { + if datum != nil { + return nil, fmt.Errorf("cannot encode textual null: expected: Go nil; received: %T", datum) + } + return append(buf, nullBytes...), nil +} diff --git a/vendor/github.com/linkedin/goavro/ocf.go b/vendor/github.com/linkedin/goavro/ocf.go new file mode 100644 index 00000000000..53fd0eeea69 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/ocf.go @@ -0,0 +1,240 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "crypto/rand" + "errors" + "fmt" + "io" +) + +const ( + // CompressionNullLabel is used when OCF blocks are not compressed. + CompressionNullLabel = "null" + + // CompressionDeflateLabel is used when OCF blocks are compressed using the + // deflate algorithm. + CompressionDeflateLabel = "deflate" + + // CompressionSnappyLabel is used when OCF blocks are compressed using the + // snappy algorithm. + CompressionSnappyLabel = "snappy" +) + +// compressionID are values used to specify compression algorithm used to compress +// and decompress Avro Object Container File (OCF) streams. +type compressionID uint8 + +const ( + compressionNull compressionID = iota + compressionDeflate + compressionSnappy +) + +const ( + ocfBlockConst = 24 // Each OCF block has two longs prefix, and sync marker suffix + ocfHeaderSizeConst = 48 // OCF header is usually about 48 bytes longer than its compressed schema + ocfMagicString = "Obj\x01" + ocfMetadataSchema = `{"type":"map","values":"bytes"}` + ocfSyncLength = 16 +) + +var ( + ocfMagicBytes = []byte(ocfMagicString) + ocfMetadataCodec *Codec +) + +func init() { + ocfMetadataCodec, _ = NewCodec(ocfMetadataSchema) +} + +type ocfHeader struct { + codec *Codec + compressionID compressionID + syncMarker [ocfSyncLength]byte + metadata map[string][]byte +} + +func newOCFHeader(config OCFConfig) (*ocfHeader, error) { + var err error + + header := new(ocfHeader) + + // + // avro.codec + // + switch config.CompressionName { + case "": + header.compressionID = compressionNull + case CompressionNullLabel: + header.compressionID = compressionNull + case CompressionDeflateLabel: + header.compressionID = compressionDeflate + case CompressionSnappyLabel: + header.compressionID = compressionSnappy + default: + return nil, fmt.Errorf("cannot create OCF header using unrecognized compression algorithm: %q", config.CompressionName) + } + + // + // avro.schema + // + if config.Codec != nil { + header.codec = config.Codec + } else if config.Schema == "" { + return nil, fmt.Errorf("cannot create OCF header without either Codec or Schema specified") + } else { + if header.codec, err = NewCodec(config.Schema); err != nil { + return nil, fmt.Errorf("cannot create OCF header: %s", err) + } + } + + header.metadata = config.MetaData + + // + // The 16-byte, randomly-generated sync marker for this file. + // + _, err = rand.Read(header.syncMarker[:]) + if err != nil { + return nil, err + } + + return header, nil +} + +func readOCFHeader(ior io.Reader) (*ocfHeader, error) { + // + // magic bytes + // + magic := make([]byte, 4) + _, err := io.ReadFull(ior, magic) + if err != nil { + return nil, fmt.Errorf("cannot read OCF header magic bytes: %s", err) + } + if !bytes.Equal(magic, ocfMagicBytes) { + return nil, fmt.Errorf("cannot read OCF header with invalid magic bytes: %#q", magic) + } + + // + // metadata + // + metadata, err := metadataBinaryReader(ior) + if err != nil { + return nil, fmt.Errorf("cannot read OCF header metadata: %s", err) + } + + // + // avro.codec + // + // NOTE: Avro specification states that `null` cID is used by + // default when "avro.codec" was not included in the metadata header. The + // specification does not talk about the case when "avro.codec" was included + // with the empty string as its value. I believe it is an error for an OCF + // file to provide the empty string as the cID algorithm. While it + // is trivially easy to gracefully handle here, I'm not sure whether this + // happens a lot, and don't want to accept bad input unless we have + // significant reason to do so. + var cID compressionID + value, ok := metadata["avro.codec"] + if ok { + switch avroCodec := string(value); avroCodec { + case CompressionNullLabel: + cID = compressionNull + case CompressionDeflateLabel: + cID = compressionDeflate + case CompressionSnappyLabel: + cID = compressionSnappy + default: + return nil, fmt.Errorf("cannot read OCF header using unrecognized compression algorithm from avro.codec: %q", avroCodec) + } + } + + // + // create goavro.Codec from specified avro.schema + // + value, ok = metadata["avro.schema"] + if !ok { + return nil, errors.New("cannot read OCF header without avro.schema") + } + codec, err := NewCodec(string(value)) + if err != nil { + return nil, fmt.Errorf("cannot read OCF header with invalid avro.schema: %s", err) + } + + header := &ocfHeader{codec: codec, compressionID: cID, metadata: metadata} + + // + // read and store sync marker + // + if n, err := io.ReadFull(ior, header.syncMarker[:]); err != nil { + return nil, fmt.Errorf("cannot read OCF header without sync marker: only read %d of %d bytes: %s", n, ocfSyncLength, err) + } + + // + // header is valid + // + return header, nil +} + +func writeOCFHeader(header *ocfHeader, iow io.Writer) (err error) { + // + // avro.codec + // + var avroCodec string + switch header.compressionID { + case compressionNull: + avroCodec = CompressionNullLabel + case compressionDeflate: + avroCodec = CompressionDeflateLabel + case compressionSnappy: + avroCodec = CompressionSnappyLabel + default: + return fmt.Errorf("should not get here: cannot write OCF header using unrecognized compression algorithm: %d", header.compressionID) + } + + // + // avro.schema + // + // Create buffer for OCF header. The first four bytes are magic, and we'll + // use copy to fill them in, so initialize buffer's length with 4, and its + // capacity equal to length of avro schema plus a constant. + schema := header.codec.Schema() + buf := make([]byte, 4, len(schema)+ocfHeaderSizeConst) + _ = copy(buf, ocfMagicBytes) + + // + // file metadata, including the schema + // + meta := make(map[string]interface{}) + for k, v := range header.metadata { + meta[k] = v + } + meta["avro.schema"] = []byte(schema) + meta["avro.codec"] = []byte(avroCodec) + + buf, err = ocfMetadataCodec.BinaryFromNative(buf, meta) + if err != nil { + return fmt.Errorf("should not get here: cannot write OCF header: %s", err) + } + + // + // 16-byte sync marker + // + buf = append(buf, header.syncMarker[:]...) + + // emit OCF header + _, err = iow.Write(buf) + if err != nil { + return fmt.Errorf("cannot write OCF header: %s", err) + } + return nil +} diff --git a/vendor/github.com/linkedin/goavro/ocf_reader.go b/vendor/github.com/linkedin/goavro/ocf_reader.go new file mode 100644 index 00000000000..c2d4a3d29d7 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/ocf_reader.go @@ -0,0 +1,263 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "errors" + "fmt" + "hash/crc32" + "io" + "io/ioutil" + + "github.com/golang/snappy" +) + +// OCFReader structure is used to read Object Container Files (OCF). +type OCFReader struct { + header *ocfHeader + block []byte // buffer from which decoding takes place + rerr error // most recent error that took place while reading bytes (unrecoverable) + ior io.Reader + readReady bool // true after Scan and before Read + remainingBlockItems int64 // count of encoded data items remaining in block buffer to be decoded +} + +// NewOCFReader initializes and returns a new structure used to read an Avro +// Object Container File (OCF). +// +// func example(ior io.Reader) error { +// // NOTE: Wrap provided io.Reader in a buffered reader, which improves the +// // performance of streaming file data. +// br := bufio.NewReader(ior) +// ocfr, err := goavro.NewOCFReader(br) +// if err != nil { +// return err +// } +// for ocfr.Scan() { +// datum, err := ocfr.Read() +// if err != nil { +// return err +// } +// fmt.Println(datum) +// } +// return ocfr.Err() +// } +func NewOCFReader(ior io.Reader) (*OCFReader, error) { + header, err := readOCFHeader(ior) + if err != nil { + return nil, fmt.Errorf("cannot create OCFReader: %s", err) + } + return &OCFReader{header: header, ior: ior}, nil +} + +//MetaData returns the file metadata map found within the OCF file +func (ocfr *OCFReader) MetaData() map[string][]byte { + return ocfr.header.metadata +} + +// Codec returns the codec found within the OCF file. +func (ocfr *OCFReader) Codec() *Codec { + return ocfr.header.codec +} + +// CompressionName returns the name of the compression algorithm found within +// the OCF file. +func (ocfr *OCFReader) CompressionName() string { + switch ocfr.header.compressionID { + case compressionNull: + return CompressionNullLabel + case compressionDeflate: + return CompressionDeflateLabel + case compressionSnappy: + return CompressionSnappyLabel + default: + return "should not get here: unrecognized compression algorithm" + } +} + +// Err returns the last error encountered while reading the OCF file. See +// `NewOCFReader` documentation for an example. +func (ocfr *OCFReader) Err() error { + return ocfr.rerr +} + +// Read consumes one datum value from the Avro OCF stream and returns it. Read +// is designed to be called only once after each invocation of the Scan method. +// See `NewOCFReader` documentation for an example. +func (ocfr *OCFReader) Read() (interface{}, error) { + // NOTE: Test previous error before testing readReady to prevent overwriting + // previous error. + if ocfr.rerr != nil { + return nil, ocfr.rerr + } + if !ocfr.readReady { + ocfr.rerr = errors.New("Read called without successful Scan") + return nil, ocfr.rerr + } + ocfr.readReady = false + + // decode one datum value from block + var datum interface{} + datum, ocfr.block, ocfr.rerr = ocfr.header.codec.NativeFromBinary(ocfr.block) + if ocfr.rerr != nil { + return false, ocfr.rerr + } + ocfr.remainingBlockItems-- + + return datum, nil +} + +// RemainingBlockItems returns the number of items remaining in the block being +// processed. +func (ocfr *OCFReader) RemainingBlockItems() int64 { + return ocfr.remainingBlockItems +} + +// Scan returns true when there is at least one more data item to be read from +// the Avro OCF. Scan ought to be called prior to calling the Read method each +// time the Read method is invoked. See `NewOCFReader` documentation for an +// example. +func (ocfr *OCFReader) Scan() bool { + ocfr.readReady = false + + if ocfr.rerr != nil { + return false + } + + // NOTE: If there are no more remaining data items from the existing block, + // then attempt to slurp in the next block. + if ocfr.remainingBlockItems <= 0 { + if count := len(ocfr.block); count != 0 { + ocfr.rerr = fmt.Errorf("extra bytes between final datum in previous block and block sync marker: %d", count) + return false + } + + // Read the block count and update the number of remaining items for + // this block + ocfr.remainingBlockItems, ocfr.rerr = longBinaryReader(ocfr.ior) + if ocfr.rerr != nil { + if ocfr.rerr == io.EOF { + ocfr.rerr = nil // merely end of file, rather than error + } else { + ocfr.rerr = fmt.Errorf("cannot read block count: %s", ocfr.rerr) + } + return false + } + if ocfr.remainingBlockItems <= 0 { + ocfr.rerr = fmt.Errorf("cannot decode when block count is not greater than 0: %d", ocfr.remainingBlockItems) + return false + } + if ocfr.remainingBlockItems > MaxBlockCount { + ocfr.rerr = fmt.Errorf("cannot decode when block count exceeds MaxBlockCount: %d > %d", ocfr.remainingBlockItems, MaxBlockCount) + } + + var blockSize int64 + blockSize, ocfr.rerr = longBinaryReader(ocfr.ior) + if ocfr.rerr != nil { + ocfr.rerr = fmt.Errorf("cannot read block size: %s", ocfr.rerr) + return false + } + if blockSize <= 0 { + ocfr.rerr = fmt.Errorf("cannot decode when block size is not greater than 0: %d", blockSize) + return false + } + if blockSize > MaxBlockSize { + ocfr.rerr = fmt.Errorf("cannot decode when block size exceeds MaxBlockSize: %d > %d", blockSize, MaxBlockSize) + return false + } + + // read entire block into buffer + ocfr.block = make([]byte, blockSize) + _, ocfr.rerr = io.ReadFull(ocfr.ior, ocfr.block) + if ocfr.rerr != nil { + ocfr.rerr = fmt.Errorf("cannot read block: %s", ocfr.rerr) + return false + } + + switch ocfr.header.compressionID { + case compressionNull: + // no-op + + case compressionDeflate: + // NOTE: flate.NewReader wraps with io.ByteReader if argument does + // not implement that interface. + rc := flate.NewReader(bytes.NewBuffer(ocfr.block)) + ocfr.block, ocfr.rerr = ioutil.ReadAll(rc) + if ocfr.rerr != nil { + _ = rc.Close() + return false + } + if ocfr.rerr = rc.Close(); ocfr.rerr != nil { + return false + } + + case compressionSnappy: + index := len(ocfr.block) - 4 // last 4 bytes is crc32 of decoded block + if index <= 0 { + ocfr.rerr = fmt.Errorf("cannot decompress snappy without CRC32 checksum: %d", len(ocfr.block)) + return false + } + decoded, err := snappy.Decode(nil, ocfr.block[:index]) + if err != nil { + ocfr.rerr = fmt.Errorf("cannot decompress: %s", err) + return false + } + actualCRC := crc32.ChecksumIEEE(decoded) + expectedCRC := binary.BigEndian.Uint32(ocfr.block[index : index+4]) + if actualCRC != expectedCRC { + ocfr.rerr = fmt.Errorf("snappy CRC32 checksum mismatch: %x != %x", actualCRC, expectedCRC) + return false + } + ocfr.block = decoded + + default: + ocfr.rerr = fmt.Errorf("should not get here: cannot compress block using unrecognized compression: %d", ocfr.header.compressionID) + return false + + } + + // read and ensure sync marker matches + sync := make([]byte, ocfSyncLength) + var n int + if n, ocfr.rerr = io.ReadFull(ocfr.ior, sync); ocfr.rerr != nil { + ocfr.rerr = fmt.Errorf("cannot read sync marker: read %d out of %d bytes: %s", n, ocfSyncLength, ocfr.rerr) + return false + } + if !bytes.Equal(sync, ocfr.header.syncMarker[:]) { + ocfr.rerr = fmt.Errorf("sync marker mismatch: %v != %v", sync, ocfr.header.syncMarker) + return false + } + } + + ocfr.readReady = true + return true +} + +// SkipThisBlockAndReset can be called after an error occurs while reading or +// decoding datum values from an OCF stream. OCF specifies each OCF stream +// contain one or more blocks of data. Each block consists of a block count, the +// number of bytes for the block, followed be the possibly compressed +// block. Inside each decompressed block is all of the binary encoded datum +// values concatenated together. In other words, OCF framing is at a block level +// rather than a datum level. If there is an error while reading or decoding a +// datum, the reader is not able to skip to the next datum value, because OCF +// does not have any markers for where each datum ends and the next one +// begins. Therefore, the reader is only able to skip this datum value and all +// subsequent datum values in the current block, move to the next block and +// start decoding datum values there. +func (ocfr *OCFReader) SkipThisBlockAndReset() { + // ??? is it an error to call method unless the reader has had an error + ocfr.remainingBlockItems = 0 + ocfr.block = ocfr.block[:0] + ocfr.rerr = nil +} diff --git a/vendor/github.com/linkedin/goavro/ocf_writer.go b/vendor/github.com/linkedin/goavro/ocf_writer.go new file mode 100644 index 00000000000..bb348a5af68 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/ocf_writer.go @@ -0,0 +1,253 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "errors" + "fmt" + "hash/crc32" + "io" + "io/ioutil" + "os" + + "github.com/golang/snappy" +) + +// OCFConfig is used to specify creation parameters for OCFWriter. +type OCFConfig struct { + // W specifies the `io.Writer` to which to send the encoded data, + // (required). If W is `*os.File`, then creating an OCF for writing will + // attempt to read any existing OCF header and use the schema and + // compression codec specified by the existing header, then advance the file + // position to the tail end of the file for appending. + W io.Writer + + // Codec specifies the Codec to use for the new OCFWriter, (optional). If + // the W parameter above is an `*os.File` which contains a Codec, the Codec + // in the existing file will be used instead. Otherwise if this Codec + // parameter is specified, it will be used. If neither the W parameter above + // is an `*os.File` with an existing Codec, nor this Codec parameter is + // specified, the OCFWriter will create a new Codec from the schema string + // specified by the Schema parameter below. + Codec *Codec + + // Schema specifies the Avro schema for the data to be encoded, (optional). + // If neither the W parameter above is an `*os.File` with an existing Codec, + // nor the Codec parameter above is specified, the OCFWriter will create a + // new Codec from the schema string specified by this Schema parameter. + Schema string + + // CompressionName specifies the compression codec used, (optional). If + // omitted, defaults to "null" codec. When appending to an existing OCF, + // this field is ignored. + CompressionName string + + //MetaData specifies application specific meta data to be added to + //the OCF file. When appending to an existing OCF, this field + //is ignored + MetaData map[string][]byte +} + +// OCFWriter is used to create a new or append to an existing Avro Object +// Container File (OCF). +type OCFWriter struct { + header *ocfHeader + iow io.Writer +} + +// NewOCFWriter returns a new OCFWriter instance that may be used for appending +// binary Avro data, either by appending to an existing OCF file or creating a +// new OCF file. +func NewOCFWriter(config OCFConfig) (*OCFWriter, error) { + var err error + ocf := &OCFWriter{iow: config.W} + + switch config.W.(type) { + case nil: + return nil, errors.New("cannot create OCFWriter when W is nil") + case *os.File: + file := config.W.(*os.File) + stat, err := file.Stat() + if err != nil { + return nil, fmt.Errorf("cannot create OCFWriter: %s", err) + } + // NOTE: When upstream provides a new file, it will already exist but + // have a size of 0 bytes. + if stat.Size() > 0 { + // attempt to read existing OCF header + if ocf.header, err = readOCFHeader(file); err != nil { + return nil, fmt.Errorf("cannot create OCFWriter: %s", err) + } + // prepare for appending data to existing OCF + if err = ocf.quickScanToTail(file); err != nil { + return nil, fmt.Errorf("cannot create OCFWriter: %s", err) + } + return ocf, nil // happy case for appending to existing OCF + } + } + + // create new OCF header based on configuration parameters + if ocf.header, err = newOCFHeader(config); err != nil { + return nil, fmt.Errorf("cannot create OCFWriter: %s", err) + } + if err = writeOCFHeader(ocf.header, config.W); err != nil { + return nil, fmt.Errorf("cannot create OCFWriter: %s", err) + } + return ocf, nil // another happy case for creation of new OCF +} + +// quickScanToTail advances the stream reader to the tail end of the +// file. Rather than reading each encoded block, optionally decompressing it, +// and then decoding it, this method reads the block count, ignoring it, then +// reads the block size, then skips ahead to the followig block. It does this +// repeatedly until attempts to read the file return io.EOF. +func (ocfw *OCFWriter) quickScanToTail(ior io.Reader) error { + sync := make([]byte, ocfSyncLength) + for { + // Read and validate block count + blockCount, err := longBinaryReader(ior) + if err != nil { + if err == io.EOF { + return nil // merely end of file, rather than error + } + return fmt.Errorf("cannot read block count: %s", err) + } + if blockCount <= 0 { + return fmt.Errorf("cannot read when block count is not greater than 0: %d", blockCount) + } + if blockCount > MaxBlockCount { + return fmt.Errorf("cannot read when block count exceeds MaxBlockCount: %d > %d", blockCount, MaxBlockCount) + } + // Read block size + blockSize, err := longBinaryReader(ior) + if err != nil { + return fmt.Errorf("cannot read block size: %s", err) + } + if blockSize <= 0 { + return fmt.Errorf("cannot read when block size is not greater than 0: %d", blockSize) + } + if blockSize > MaxBlockSize { + return fmt.Errorf("cannot read when block size exceeds MaxBlockSize: %d > %d", blockSize, MaxBlockSize) + } + // Advance reader to end of block + if _, err = io.CopyN(ioutil.Discard, ior, blockSize); err != nil { + return fmt.Errorf("cannot seek to next block: %s", err) + } + // Read and validate sync marker + var n int + if n, err = io.ReadFull(ior, sync); err != nil { + return fmt.Errorf("cannot read sync marker: read %d out of %d bytes: %s", n, ocfSyncLength, err) + } + if !bytes.Equal(sync, ocfw.header.syncMarker[:]) { + return fmt.Errorf("sync marker mismatch: %v != %v", sync, ocfw.header.syncMarker) + } + } +} + +// Append appends one or more data items to an OCF file in a block. If there are +// more data items in the slice than MaxBlockCount allows, the data slice will +// be chunked into multiple blocks, each not having more than MaxBlockCount +// items. +func (ocfw *OCFWriter) Append(data interface{}) error { + arrayValues, err := convertArray(data) + if err != nil { + return err + } + + // Chunk data so no block has more than MaxBlockCount items. + for int64(len(arrayValues)) > MaxBlockCount { + if err := ocfw.appendDataIntoBlock(arrayValues[:MaxBlockCount]); err != nil { + return err + } + arrayValues = arrayValues[MaxBlockCount:] + } + return ocfw.appendDataIntoBlock(arrayValues) +} + +func (ocfw *OCFWriter) appendDataIntoBlock(data []interface{}) error { + var block []byte // working buffer for encoding data values + var err error + + // Encode and concatenate each data item into the block + for _, datum := range data { + if block, err = ocfw.header.codec.BinaryFromNative(block, datum); err != nil { + return fmt.Errorf("cannot translate datum to binary: %v; %s", datum, err) + } + } + + switch ocfw.header.compressionID { + case compressionNull: + // no-op + + case compressionDeflate: + // compress into new bytes buffer. + bb := bytes.NewBuffer(make([]byte, 0, len(block))) + + cw, _ := flate.NewWriter(bb, flate.DefaultCompression) + // writing bytes to cw will compress bytes and send to bb. + if _, err := cw.Write(block); err != nil { + return err + } + if err := cw.Close(); err != nil { + return err + } + block = bb.Bytes() + + case compressionSnappy: + compressed := snappy.Encode(nil, block) + + // OCF requires snappy to have CRC32 checksum after each snappy block + compressed = append(compressed, 0, 0, 0, 0) // expand slice by 4 bytes so checksum will fit + binary.BigEndian.PutUint32(compressed[len(compressed)-4:], crc32.ChecksumIEEE(block)) // checksum of decompressed block + + block = compressed + + default: + return fmt.Errorf("should not get here: cannot compress block using unrecognized compression: %d", ocfw.header.compressionID) + + } + + // create file data block + buf := make([]byte, 0, len(block)+ocfBlockConst) // pre-allocate block bytes + buf, _ = longBinaryFromNative(buf, len(data)) // block count (number of data items) + buf, _ = longBinaryFromNative(buf, len(block)) // block size (number of bytes in block) + buf = append(buf, block...) // serialized objects + buf = append(buf, ocfw.header.syncMarker[:]...) // sync marker + + _, err = ocfw.iow.Write(buf) + return err +} + +// Codec returns the codec used by OCFWriter. This function provided because +// upstream may be appending to existing OCF which uses a different schema than +// requested during instantiation. +func (ocfw *OCFWriter) Codec() *Codec { + return ocfw.header.codec +} + +// CompressionName returns the name of the compression algorithm used by +// OCFWriter. This function provided because upstream may be appending to +// existing OCF which uses a different compression algorithm than requested +// during instantiation. the OCF file. +func (ocfw *OCFWriter) CompressionName() string { + switch ocfw.header.compressionID { + case compressionNull: + return CompressionNullLabel + case compressionDeflate: + return CompressionDeflateLabel + case compressionSnappy: + return CompressionSnappyLabel + default: + return "should not get here: unrecognized compression algorithm" + } +} diff --git a/vendor/github.com/linkedin/goavro/record.go b/vendor/github.com/linkedin/goavro/record.go new file mode 100644 index 00000000000..5dd855357ac --- /dev/null +++ b/vendor/github.com/linkedin/goavro/record.go @@ -0,0 +1,185 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" +) + +func makeRecordCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { + // NOTE: To support recursive data types, create the codec and register it + // using the specified name, and fill in the codec functions later. + c, err := registerNewCodec(st, schemaMap, enclosingNamespace) + if err != nil { + return nil, fmt.Errorf("Record ought to have valid name: %s", err) + } + + fields, ok := schemaMap["fields"] + if !ok { + return nil, fmt.Errorf("Record %q ought to have fields key", c.typeName) + } + fieldSchemas, ok := fields.([]interface{}) + if !ok || len(fieldSchemas) == 0 { + return nil, fmt.Errorf("Record %q fields ought to be non-empty array: %v", c.typeName, fields) + } + + codecFromFieldName := make(map[string]*Codec) + codecFromIndex := make([]*Codec, len(fieldSchemas)) + nameFromIndex := make([]string, len(fieldSchemas)) + defaultValueFromName := make(map[string]interface{}, len(fieldSchemas)) + + for i, fieldSchema := range fieldSchemas { + fieldSchemaMap, ok := fieldSchema.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("Record %q field %d ought to be valid Avro named type; received: %v", c.typeName, i+1, fieldSchema) + } + + // NOTE: field names are not registered in the symbol table, because + // field names are not individually addressable codecs. + + fieldCodec, err := buildCodecForTypeDescribedByMap(st, c.typeName.namespace, fieldSchemaMap) + if err != nil { + return nil, fmt.Errorf("Record %q field %d ought to be valid Avro named type: %s", c.typeName, i+1, err) + } + + // However, when creating a full name for the field name, be sure to use + // record's namespace + n, err := newNameFromSchemaMap(c.typeName.namespace, fieldSchemaMap) + if err != nil { + return nil, fmt.Errorf("Record %q field %d ought to have valid name: %v", c.typeName, i+1, fieldSchemaMap) + } + fieldName := n.short() + if _, ok := codecFromFieldName[fieldName]; ok { + return nil, fmt.Errorf("Record %q field %d ought to have unique name: %q", c.typeName, i+1, fieldName) + } + + if defaultValue, ok := fieldSchemaMap["default"]; ok { + // if codec is union, then default value ought to encode using first schema in union + if fieldCodec.typeName.short() == "union" { + // NOTE: To support a null default value, + // the string literal "null" must be coerced to a `nil` + if defaultValue == "null" { + defaultValue = nil + } + // NOTE: To support record field default values, union schema + // set to the type name of first member + defaultValue = Union(fieldCodec.schema, defaultValue) + } + // attempt to encode default value using codec + _, err = fieldCodec.binaryFromNative(nil, defaultValue) + if err != nil { + return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err) + } + defaultValueFromName[fieldName] = defaultValue + } + + nameFromIndex[i] = fieldName + codecFromIndex[i] = fieldCodec + codecFromFieldName[fieldName] = fieldCodec + } + + c.binaryFromNative = func(buf []byte, datum interface{}) ([]byte, error) { + valueMap, ok := datum.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("cannot encode binary record %q: expected map[string]interface{}; received: %T", c.typeName, datum) + } + + // records encoded in order fields were defined in schema + for i, fieldCodec := range codecFromIndex { + fieldName := nameFromIndex[i] + + // NOTE: If field value was not specified in map, then set + // fieldValue to its default value (which may or may not have been + // specified). + fieldValue, ok := valueMap[fieldName] + if !ok { + if fieldValue, ok = defaultValueFromName[fieldName]; !ok { + return nil, fmt.Errorf("cannot encode binary record %q field %q: schema does not specify default value and no value provided", c.typeName, fieldName) + } + } + + var err error + buf, err = fieldCodec.binaryFromNative(buf, fieldValue) + if err != nil { + return nil, fmt.Errorf("cannot encode binary record %q field %q: value does not match its schema: %s", c.typeName, fieldName, err) + } + } + return buf, nil + } + + c.nativeFromBinary = func(buf []byte) (interface{}, []byte, error) { + recordMap := make(map[string]interface{}, len(codecFromIndex)) + for i, fieldCodec := range codecFromIndex { + name := nameFromIndex[i] + var value interface{} + var err error + value, buf, err = fieldCodec.nativeFromBinary(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode binary record %q field %q: %s", c.typeName, name, err) + } + recordMap[name] = value + } + return recordMap, buf, nil + } + + c.nativeFromTextual = func(buf []byte) (interface{}, []byte, error) { + var mapValues map[string]interface{} + var err error + // NOTE: Setting `defaultCodec == nil` instructs genericMapTextDecoder + // to return an error when a field name is not found in the + // codecFromFieldName map. + mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual record %q: %s", c.typeName, err) + } + if actual, expected := len(mapValues), len(codecFromFieldName); actual != expected { + // set missing field keys to their respective default values, then + // re-check number of keys + for fieldName, defaultValue := range defaultValueFromName { + if _, ok := mapValues[fieldName]; !ok { + mapValues[fieldName] = defaultValue + } + } + if actual, expected = len(mapValues), len(codecFromFieldName); actual != expected { + return nil, nil, fmt.Errorf("cannot decode textual record %q: only found %d of %d fields", c.typeName, actual, expected) + } + } + return mapValues, buf, nil + } + + c.textualFromNative = func(buf []byte, datum interface{}) ([]byte, error) { + // NOTE: Ensure only schema defined field names are encoded; and if + // missing in datum, either use the provided field default value or + // return an error. + sourceMap, ok := datum.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("cannot encode textual record %q: expected map[string]interface{}; received: %T", c.typeName, datum) + } + destMap := make(map[string]interface{}, len(codecFromIndex)) + for fieldName := range codecFromFieldName { + fieldValue, ok := sourceMap[fieldName] + if !ok { + defaultValue, ok := defaultValueFromName[fieldName] + if !ok { + return nil, fmt.Errorf("cannot encode textual record %q field %q: schema does not specify default value and no value provided", c.typeName, fieldName) + } + fieldValue = defaultValue + } + destMap[fieldName] = fieldValue + } + datum = destMap + // NOTE: Setting `defaultCodec == nil` instructs genericMapTextEncoder + // to return an error when a field name is not found in the + // codecFromFieldName map. + return genericMapTextEncoder(buf, datum, nil, codecFromFieldName) + } + + return c, nil +} diff --git a/vendor/github.com/linkedin/goavro/text.go b/vendor/github.com/linkedin/goavro/text.go new file mode 100644 index 00000000000..199bbcb7d26 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/text.go @@ -0,0 +1,41 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" + "io" + "unicode" +) + +// advanceAndConsume advances to non whitespace and returns an error if the next +// non whitespace byte is not what is expected. +func advanceAndConsume(buf []byte, expected byte) ([]byte, error) { + var err error + if buf, err = advanceToNonWhitespace(buf); err != nil { + return nil, err + } + if actual := buf[0]; actual != expected { + return nil, fmt.Errorf("expected: %q; actual: %q", expected, actual) + } + return buf[1:], nil +} + +// advanceToNonWhitespace consumes bytes from buf until non-whitespace character +// is found. It returns error when no more bytes remain, because its purpose is +// to scan ahead to the next non-whitespace character. +func advanceToNonWhitespace(buf []byte) ([]byte, error) { + for i, b := range buf { + if !unicode.IsSpace(rune(b)) { + return buf[i:], nil + } + } + return nil, io.ErrShortBuffer +} diff --git a/vendor/github.com/linkedin/goavro/union.go b/vendor/github.com/linkedin/goavro/union.go new file mode 100644 index 00000000000..1e300a2c1a6 --- /dev/null +++ b/vendor/github.com/linkedin/goavro/union.go @@ -0,0 +1,178 @@ +// Copyright [2017] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "bytes" + "errors" + "fmt" +) + +// Union wraps a datum value in a map for encoding as a Union, as required by +// Union encoder. +// +// When providing a value for an Avro union, the encoder will accept `nil` for a +// `null` value. If the value is non-`nil`, it must be a +// `map[string]interface{}` with a single key-value pair, where the key is the +// Avro type name and the value is the datum's value. As a convenience, the +// `Union` function wraps any datum value in a map as specified above. +// +// func ExampleUnion() { +// codec, err := goavro.NewCodec(`["null","string","int"]`) +// if err != nil { +// fmt.Println(err) +// } +// buf, err := codec.TextFromNative(nil, goavro.Union("string", "some string")) +// if err != nil { +// fmt.Println(err) +// } +// fmt.Println(string(buf)) +// // Output: {"string":"some string"} +// } +func Union(name string, datum interface{}) interface{} { + if datum == nil && name == "null" { + return nil + } + return map[string]interface{}{name: datum} +} + +func buildCodecForTypeDescribedBySlice(st map[string]*Codec, enclosingNamespace string, schemaArray []interface{}) (*Codec, error) { + if len(schemaArray) == 0 { + return nil, errors.New("Union ought to have one or more members") + } + + allowedTypes := make([]string, len(schemaArray)) // used for error reporting when encoder receives invalid datum type + codecFromIndex := make([]*Codec, len(schemaArray)) + codecFromName := make(map[string]*Codec, len(schemaArray)) + indexFromName := make(map[string]int, len(schemaArray)) + + for i, unionMemberSchema := range schemaArray { + unionMemberCodec, err := buildCodec(st, enclosingNamespace, unionMemberSchema) + if err != nil { + return nil, fmt.Errorf("Union item %d ought to be valid Avro type: %s", i+1, err) + } + fullName := unionMemberCodec.typeName.fullName + if _, ok := indexFromName[fullName]; ok { + return nil, fmt.Errorf("Union item %d ought to be unique type: %s", i+1, unionMemberCodec.typeName) + } + allowedTypes[i] = fullName + codecFromIndex[i] = unionMemberCodec + codecFromName[fullName] = unionMemberCodec + indexFromName[fullName] = i + } + + return &Codec{ + // NOTE: To support record field default values, union schema set to the + // type name of first member + schema: codecFromIndex[0].typeName.short(), + + typeName: &name{"union", nullNamespace}, + nativeFromBinary: func(buf []byte) (interface{}, []byte, error) { + var decoded interface{} + var err error + + decoded, buf, err = longNativeFromBinary(buf) + if err != nil { + return nil, nil, err + } + index := decoded.(int64) // longDecoder always returns int64, so elide error checking + if index < 0 || index >= int64(len(codecFromIndex)) { + return nil, nil, fmt.Errorf("cannot decode binary union: index ought to be between 0 and %d; read index: %d", len(codecFromIndex)-1, index) + } + c := codecFromIndex[index] + decoded, buf, err = c.nativeFromBinary(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode binary union item %d: %s", index+1, err) + } + if decoded == nil { + // do not wrap a nil value in a map + return nil, buf, nil + } + // Non-nil values are wrapped in a map with single key set to type name of value + return Union(allowedTypes[index], decoded), buf, nil + }, + binaryFromNative: func(buf []byte, datum interface{}) ([]byte, error) { + switch v := datum.(type) { + case nil: + index, ok := indexFromName["null"] + if !ok { + return nil, fmt.Errorf("cannot encode binary union: no member schema types support datum: allowed types: %v; received: %T", allowedTypes, datum) + } + return longBinaryFromNative(buf, index) + case map[string]interface{}: + if len(v) != 1 { + return nil, fmt.Errorf("cannot encode binary union: non-nil Union values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", allowedTypes, datum) + } + // will execute exactly once + for key, value := range v { + index, ok := indexFromName[key] + if !ok { + return nil, fmt.Errorf("cannot encode binary union: no member schema types support datum: allowed types: %v; received: %T", allowedTypes, datum) + } + c := codecFromIndex[index] + buf, _ = longBinaryFromNative(buf, index) + return c.binaryFromNative(buf, value) + } + } + return nil, fmt.Errorf("cannot encode binary union: non-nil Union values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", allowedTypes, datum) + }, + nativeFromTextual: func(buf []byte) (interface{}, []byte, error) { + if len(buf) >= 4 && bytes.Equal(buf[:4], []byte("null")) { + if _, ok := indexFromName["null"]; ok { + return nil, buf[4:], nil + } + } + + var datum interface{} + var err error + datum, buf, err = genericMapTextDecoder(buf, nil, codecFromName) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual union: %s", err) + } + + return datum, buf, nil + }, + textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) { + switch v := datum.(type) { + case nil: + _, ok := indexFromName["null"] + if !ok { + return nil, fmt.Errorf("cannot encode textual union: no member schema types support datum: allowed types: %v; received: %T", allowedTypes, datum) + } + return append(buf, "null"...), nil + case map[string]interface{}: + if len(v) != 1 { + return nil, fmt.Errorf("cannot encode textual union: non-nil Union values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", allowedTypes, datum) + } + // will execute exactly once + for key, value := range v { + index, ok := indexFromName[key] + if !ok { + return nil, fmt.Errorf("cannot encode textual union: no member schema types support datum: allowed types: %v; received: %T", allowedTypes, datum) + } + buf = append(buf, '{') + var err error + buf, err = stringTextualFromNative(buf, key) + if err != nil { + return nil, fmt.Errorf("cannot encode textual union: %s", err) + } + buf = append(buf, ':') + c := codecFromIndex[index] + buf, err = c.textualFromNative(buf, value) + if err != nil { + return nil, fmt.Errorf("cannot encode textual union: %s", err) + } + return append(buf, '}'), nil + } + } + return nil, fmt.Errorf("cannot encode textual union: non-nil values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", allowedTypes, datum) + }, + }, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 302aee8f62d..061ab342cbe 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -120,6 +120,8 @@ github.com/golang/protobuf/ptypes github.com/golang/protobuf/ptypes/any github.com/golang/protobuf/ptypes/duration github.com/golang/protobuf/ptypes/timestamp +# github.com/golang/snappy v0.0.1 +github.com/golang/snappy # github.com/google/flatbuffers v1.11.0 github.com/google/flatbuffers/go # github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c @@ -165,6 +167,8 @@ github.com/klauspost/cpuid github.com/lib/pq github.com/lib/pq/oid github.com/lib/pq/scram +# github.com/linkedin/goavro v2.1.0+incompatible +github.com/linkedin/goavro # github.com/mattetti/filebuffer v1.0.0 github.com/mattetti/filebuffer # github.com/mattn/go-colorable v0.1.4