Move modules/gzip to gitea.com/macaron/gzip (#9058)
* Move modules/gzip to gitea.com/macaron/gzip * Fix vendor
This commit is contained in:
parent
ba4e8f221b
commit
9ff6312627
5
go.mod
5
go.mod
|
@ -9,6 +9,7 @@ require (
|
||||||
gitea.com/macaron/captcha v0.0.0-20190822015246-daa973478bae
|
gitea.com/macaron/captcha v0.0.0-20190822015246-daa973478bae
|
||||||
gitea.com/macaron/cors v0.0.0-20190821152825-7dcef4a17175
|
gitea.com/macaron/cors v0.0.0-20190821152825-7dcef4a17175
|
||||||
gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439
|
gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439
|
||||||
|
gitea.com/macaron/gzip v0.0.0-20191118033930-0c4c5566a0e5
|
||||||
gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223
|
gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223
|
||||||
gitea.com/macaron/inject v0.0.0-20190805023432-d4c86e31027a
|
gitea.com/macaron/inject v0.0.0-20190805023432-d4c86e31027a
|
||||||
gitea.com/macaron/macaron v1.3.3-0.20190821202302-9646c0587edb
|
gitea.com/macaron/macaron v1.3.3-0.20190821202302-9646c0587edb
|
||||||
|
@ -55,9 +56,7 @@ require (
|
||||||
github.com/joho/godotenv v1.3.0 // indirect
|
github.com/joho/godotenv v1.3.0 // indirect
|
||||||
github.com/kballard/go-shellquote v0.0.0-20170619183022-cd60e84ee657
|
github.com/kballard/go-shellquote v0.0.0-20170619183022-cd60e84ee657
|
||||||
github.com/keybase/go-crypto v0.0.0-20170605145657-00ac4db533f6
|
github.com/keybase/go-crypto v0.0.0-20170605145657-00ac4db533f6
|
||||||
github.com/klauspost/compress v0.0.0-20161025140425-8df558b6cb6f
|
github.com/klauspost/compress v1.9.2
|
||||||
github.com/klauspost/cpuid v0.0.0-20160302075316-09cded8978dc // indirect
|
|
||||||
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6 // indirect
|
|
||||||
github.com/lafriks/xormstore v1.3.2
|
github.com/lafriks/xormstore v1.3.2
|
||||||
github.com/lib/pq v1.2.0
|
github.com/lib/pq v1.2.0
|
||||||
github.com/lunny/dingtalk_webhook v0.0.0-20171025031554-e3534c89ef96
|
github.com/lunny/dingtalk_webhook v0.0.0-20171025031554-e3534c89ef96
|
||||||
|
|
10
go.sum
10
go.sum
|
@ -20,6 +20,8 @@ gitea.com/macaron/cors v0.0.0-20190821152825-7dcef4a17175 h1:ikzdAGB6SsUGByW5wKl
|
||||||
gitea.com/macaron/cors v0.0.0-20190821152825-7dcef4a17175/go.mod h1:rtOK4J20kpMD9XcNsnO5YA843YSTe/MUMbDj/TJ/Q7A=
|
gitea.com/macaron/cors v0.0.0-20190821152825-7dcef4a17175/go.mod h1:rtOK4J20kpMD9XcNsnO5YA843YSTe/MUMbDj/TJ/Q7A=
|
||||||
gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439 h1:88c34YM29a1GlWLrLBaG/GTT2htDdJz1u3n9+lmPolg=
|
gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439 h1:88c34YM29a1GlWLrLBaG/GTT2htDdJz1u3n9+lmPolg=
|
||||||
gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439/go.mod h1:IsQPHx73HnnqFBYiVHjg87q4XBZyGXXu77xANukvZuk=
|
gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439/go.mod h1:IsQPHx73HnnqFBYiVHjg87q4XBZyGXXu77xANukvZuk=
|
||||||
|
gitea.com/macaron/gzip v0.0.0-20191118033930-0c4c5566a0e5 h1:G/a7r0r2jEelSynBlv1+PAEZQKfsdRHQUMb1PlNvemM=
|
||||||
|
gitea.com/macaron/gzip v0.0.0-20191118033930-0c4c5566a0e5/go.mod h1:jGHtoovArcQj+sw7NJxyPgjuRxOSG9a/oFu3VkLRTKQ=
|
||||||
gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223 h1:iZWwQif/LHMjBgfY/ua8CFVa4XMDfbbs7EZ0Q1dYguU=
|
gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223 h1:iZWwQif/LHMjBgfY/ua8CFVa4XMDfbbs7EZ0Q1dYguU=
|
||||||
gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223/go.mod h1:+qsc10s4hBsHKU/9luGGumFh4m5FFVc7uih+8/mM1NY=
|
gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223/go.mod h1:+qsc10s4hBsHKU/9luGGumFh4m5FFVc7uih+8/mM1NY=
|
||||||
gitea.com/macaron/inject v0.0.0-20190803172902-8375ba841591/go.mod h1:h6E4kLao1Yko6DOU6QDnQPcuoNzvbZqzj2mtPcEn1aM=
|
gitea.com/macaron/inject v0.0.0-20190803172902-8375ba841591/go.mod h1:h6E4kLao1Yko6DOU6QDnQPcuoNzvbZqzj2mtPcEn1aM=
|
||||||
|
@ -334,12 +336,8 @@ github.com/keybase/go-crypto v0.0.0-20170605145657-00ac4db533f6/go.mod h1:ghbZsc
|
||||||
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
|
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
|
||||||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||||
github.com/klauspost/compress v0.0.0-20161025140425-8df558b6cb6f h1:tCnZKEmDovgV4jmsclh6CuKk9AMzTzyVWfejgkgccVg=
|
github.com/klauspost/compress v1.9.2 h1:LfVyl+ZlLlLDeQ/d2AqfGIIH4qEDu0Ed2S5GyhCWIWY=
|
||||||
github.com/klauspost/compress v0.0.0-20161025140425-8df558b6cb6f/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
github.com/klauspost/compress v1.9.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
github.com/klauspost/cpuid v0.0.0-20160302075316-09cded8978dc h1:WW8B7p7QBnFlqRVv/k6ro/S8Z7tCnYjJHcQNScx9YVs=
|
|
||||||
github.com/klauspost/cpuid v0.0.0-20160302075316-09cded8978dc/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
|
||||||
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6 h1:KAZ1BW2TCmT6PRihDPpocIy1QTtsAsrx6TneU/4+CMg=
|
|
||||||
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
|
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||||
|
|
|
@ -15,10 +15,10 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"code.gitea.io/gitea/models"
|
"code.gitea.io/gitea/models"
|
||||||
"code.gitea.io/gitea/modules/gzip"
|
|
||||||
"code.gitea.io/gitea/modules/lfs"
|
"code.gitea.io/gitea/modules/lfs"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
|
||||||
|
"gitea.com/macaron/gzip"
|
||||||
gzipp "github.com/klauspost/compress/gzip"
|
gzipp "github.com/klauspost/compress/gzip"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,131 +0,0 @@
|
||||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package gzip
|
|
||||||
|
|
||||||
import (
|
|
||||||
"archive/zip"
|
|
||||||
"bytes"
|
|
||||||
"io/ioutil"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"gitea.com/macaron/macaron"
|
|
||||||
gzipp "github.com/klauspost/compress/gzip"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) {
|
|
||||||
m := macaron.New()
|
|
||||||
m.Use(Middleware())
|
|
||||||
m.Get("/", func() *[]byte { return &sampleResponse })
|
|
||||||
return m, &sampleResponse
|
|
||||||
}
|
|
||||||
|
|
||||||
func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) {
|
|
||||||
// Request without accept gzip: Should not gzip
|
|
||||||
resp := httptest.NewRecorder()
|
|
||||||
req, err := http.NewRequest("GET", "/", nil)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
m.ServeHTTP(resp, req)
|
|
||||||
|
|
||||||
_, ok := resp.HeaderMap[contentEncodingHeader]
|
|
||||||
assert.False(t, ok)
|
|
||||||
|
|
||||||
contentEncoding := resp.Header().Get(contentEncodingHeader)
|
|
||||||
assert.NotContains(t, contentEncoding, "gzip")
|
|
||||||
|
|
||||||
result := resp.Body.Bytes()
|
|
||||||
assert.Equal(t, *sampleResponse, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) {
|
|
||||||
// Request without accept gzip: Should not gzip
|
|
||||||
resp := httptest.NewRecorder()
|
|
||||||
req, err := http.NewRequest("GET", "/", nil)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
req.Header.Set(acceptEncodingHeader, "gzip")
|
|
||||||
m.ServeHTTP(resp, req)
|
|
||||||
|
|
||||||
_, ok := resp.HeaderMap[contentEncodingHeader]
|
|
||||||
assert.Equal(t, ok, expectGzip)
|
|
||||||
|
|
||||||
contentEncoding := resp.Header().Get(contentEncodingHeader)
|
|
||||||
if expectGzip {
|
|
||||||
assert.Contains(t, contentEncoding, "gzip")
|
|
||||||
gzippReader, err := gzipp.NewReader(resp.Body)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
result, err := ioutil.ReadAll(gzippReader)
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, *sampleResponse, result)
|
|
||||||
} else {
|
|
||||||
assert.NotContains(t, contentEncoding, "gzip")
|
|
||||||
result := resp.Body.Bytes()
|
|
||||||
assert.Equal(t, *sampleResponse, result)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMiddlewareSmall(t *testing.T) {
|
|
||||||
m, sampleResponse := setup([]byte("Small response"))
|
|
||||||
|
|
||||||
reqNoAcceptGzip(t, m, sampleResponse)
|
|
||||||
|
|
||||||
reqAcceptGzip(t, m, sampleResponse, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMiddlewareLarge(t *testing.T) {
|
|
||||||
b := make([]byte, MinSize+1)
|
|
||||||
for i := range b {
|
|
||||||
b[i] = byte(i % 256)
|
|
||||||
}
|
|
||||||
m, sampleResponse := setup(b)
|
|
||||||
|
|
||||||
reqNoAcceptGzip(t, m, sampleResponse)
|
|
||||||
|
|
||||||
// This should be gzipped as we accept gzip
|
|
||||||
reqAcceptGzip(t, m, sampleResponse, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMiddlewareGzip(t *testing.T) {
|
|
||||||
b := make([]byte, MinSize*10)
|
|
||||||
for i := range b {
|
|
||||||
b[i] = byte(i % 256)
|
|
||||||
}
|
|
||||||
outputBuffer := bytes.NewBuffer([]byte{})
|
|
||||||
gzippWriter := gzipp.NewWriter(outputBuffer)
|
|
||||||
gzippWriter.Write(b)
|
|
||||||
gzippWriter.Flush()
|
|
||||||
gzippWriter.Close()
|
|
||||||
output := outputBuffer.Bytes()
|
|
||||||
|
|
||||||
m, sampleResponse := setup(output)
|
|
||||||
|
|
||||||
reqNoAcceptGzip(t, m, sampleResponse)
|
|
||||||
|
|
||||||
// This should not be gzipped even though we accept gzip
|
|
||||||
reqAcceptGzip(t, m, sampleResponse, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMiddlewareZip(t *testing.T) {
|
|
||||||
b := make([]byte, MinSize*10)
|
|
||||||
for i := range b {
|
|
||||||
b[i] = byte(i % 256)
|
|
||||||
}
|
|
||||||
outputBuffer := bytes.NewBuffer([]byte{})
|
|
||||||
zipWriter := zip.NewWriter(outputBuffer)
|
|
||||||
fileWriter, err := zipWriter.Create("default")
|
|
||||||
assert.NoError(t, err)
|
|
||||||
fileWriter.Write(b)
|
|
||||||
//fileWriter.Close()
|
|
||||||
zipWriter.Close()
|
|
||||||
output := outputBuffer.Bytes()
|
|
||||||
|
|
||||||
m, sampleResponse := setup(output)
|
|
||||||
|
|
||||||
reqNoAcceptGzip(t, m, sampleResponse)
|
|
||||||
|
|
||||||
// This should not be gzipped even though we accept gzip
|
|
||||||
reqAcceptGzip(t, m, sampleResponse, false)
|
|
||||||
}
|
|
|
@ -16,7 +16,6 @@ import (
|
||||||
"code.gitea.io/gitea/models"
|
"code.gitea.io/gitea/models"
|
||||||
"code.gitea.io/gitea/modules/auth"
|
"code.gitea.io/gitea/modules/auth"
|
||||||
"code.gitea.io/gitea/modules/context"
|
"code.gitea.io/gitea/modules/context"
|
||||||
"code.gitea.io/gitea/modules/gzip"
|
|
||||||
"code.gitea.io/gitea/modules/lfs"
|
"code.gitea.io/gitea/modules/lfs"
|
||||||
"code.gitea.io/gitea/modules/log"
|
"code.gitea.io/gitea/modules/log"
|
||||||
"code.gitea.io/gitea/modules/metrics"
|
"code.gitea.io/gitea/modules/metrics"
|
||||||
|
@ -44,6 +43,7 @@ import (
|
||||||
"gitea.com/macaron/captcha"
|
"gitea.com/macaron/captcha"
|
||||||
"gitea.com/macaron/cors"
|
"gitea.com/macaron/cors"
|
||||||
"gitea.com/macaron/csrf"
|
"gitea.com/macaron/csrf"
|
||||||
|
"gitea.com/macaron/gzip"
|
||||||
"gitea.com/macaron/i18n"
|
"gitea.com/macaron/i18n"
|
||||||
"gitea.com/macaron/macaron"
|
"gitea.com/macaron/macaron"
|
||||||
"gitea.com/macaron/session"
|
"gitea.com/macaron/session"
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
module gitea.com/macaron/gzip
|
||||||
|
|
||||||
|
go 1.12
|
||||||
|
|
||||||
|
require (
|
||||||
|
gitea.com/macaron/macaron v1.3.3-0.20190821202302-9646c0587edb
|
||||||
|
github.com/klauspost/compress v1.9.2
|
||||||
|
github.com/stretchr/testify v1.4.0
|
||||||
|
)
|
|
@ -0,0 +1,42 @@
|
||||||
|
gitea.com/macaron/inject v0.0.0-20190803172902-8375ba841591 h1:UbCTjPcLrNxR9LzKDjQBMT2zoxZuEnca1pZCpgeMuhQ=
|
||||||
|
gitea.com/macaron/inject v0.0.0-20190803172902-8375ba841591/go.mod h1:h6E4kLao1Yko6DOU6QDnQPcuoNzvbZqzj2mtPcEn1aM=
|
||||||
|
gitea.com/macaron/macaron v1.3.3-0.20190821202302-9646c0587edb h1:amL0md6orTj1tXY16ANzVU9FmzQB+W7aJwp8pVDbrmA=
|
||||||
|
gitea.com/macaron/macaron v1.3.3-0.20190821202302-9646c0587edb/go.mod h1:0coI+mSPSwbsyAbOuFllVS38awuk9mevhLD52l50Gjs=
|
||||||
|
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||||
|
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e h1:JKmoR8x90Iww1ks85zJ1lfDGgIiMDuIptTOhJq+zKyg=
|
||||||
|
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||||
|
github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||||
|
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||||
|
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||||
|
github.com/klauspost/compress v1.9.2 h1:LfVyl+ZlLlLDeQ/d2AqfGIIH4qEDu0Ed2S5GyhCWIWY=
|
||||||
|
github.com/klauspost/compress v1.9.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
|
||||||
|
github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304 h1:Jpy1PXuP99tXNrhbq2BaPz9B+jNAvH1JPQQpG/9GCXY=
|
||||||
|
github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
|
||||||
|
github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s=
|
||||||
|
github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 h1:WN9BUFbdyOsSH/XohnWpXOlq9NBD5sGAB2FciQMUEe8=
|
||||||
|
github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||||
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
|
github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e h1:GSGeB9EAKY2spCABz6xOX5DbxZEXolK+nBSvmsQwRjM=
|
||||||
|
github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e/go.mod h1:tOOxU81rwgoCLoOVVPHb6T/wt8HZygqH5id+GNnlCXM=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc=
|
||||||
|
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||||
|
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/ini.v1 v1.44.0 h1:YRJzTUp0kSYWUVFF5XAbDFfyiqwsl0Vb9R8TVP5eRi0=
|
||||||
|
gopkg.in/ini.v1 v1.44.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
|
||||||
|
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
|
||||||
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|
@ -1,4 +1,5 @@
|
||||||
Copyright (c) 2012 The Go Authors. All rights reserved.
|
Copyright (c) 2012 The Go Authors. All rights reserved.
|
||||||
|
Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
modification, are permitted provided that the following conditions are
|
modification, are permitted provided that the following conditions are
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
// Copyright 2012 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package flate
|
|
||||||
|
|
||||||
// forwardCopy is like the built-in copy function except that it always goes
|
|
||||||
// forward from the start, even if the dst and src overlap.
|
|
||||||
// It is equivalent to:
|
|
||||||
// for i := 0; i < n; i++ {
|
|
||||||
// mem[dst+i] = mem[src+i]
|
|
||||||
// }
|
|
||||||
func forwardCopy(mem []byte, dst, src, n int) {
|
|
||||||
if dst <= src {
|
|
||||||
copy(mem[dst:dst+n], mem[src:src+n])
|
|
||||||
return
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
if dst >= src+n {
|
|
||||||
copy(mem[dst:dst+n], mem[src:src+n])
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// There is some forward overlap. The destination
|
|
||||||
// will be filled with a repeated pattern of mem[src:src+k].
|
|
||||||
// We copy one instance of the pattern here, then repeat.
|
|
||||||
// Each time around this loop k will double.
|
|
||||||
k := dst - src
|
|
||||||
copy(mem[dst:dst+k], mem[src:src+k])
|
|
||||||
n -= k
|
|
||||||
dst += k
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,41 +0,0 @@
|
||||||
//+build !noasm
|
|
||||||
//+build !appengine
|
|
||||||
|
|
||||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
|
||||||
|
|
||||||
package flate
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/klauspost/cpuid"
|
|
||||||
)
|
|
||||||
|
|
||||||
// crc32sse returns a hash for the first 4 bytes of the slice
|
|
||||||
// len(a) must be >= 4.
|
|
||||||
//go:noescape
|
|
||||||
func crc32sse(a []byte) uint32
|
|
||||||
|
|
||||||
// crc32sseAll calculates hashes for each 4-byte set in a.
|
|
||||||
// dst must be east len(a) - 4 in size.
|
|
||||||
// The size is not checked by the assembly.
|
|
||||||
//go:noescape
|
|
||||||
func crc32sseAll(a []byte, dst []uint32)
|
|
||||||
|
|
||||||
// matchLenSSE4 returns the number of matching bytes in a and b
|
|
||||||
// up to length 'max'. Both slices must be at least 'max'
|
|
||||||
// bytes in size.
|
|
||||||
//
|
|
||||||
// TODO: drop the "SSE4" name, since it doesn't use any SSE instructions.
|
|
||||||
//
|
|
||||||
//go:noescape
|
|
||||||
func matchLenSSE4(a, b []byte, max int) int
|
|
||||||
|
|
||||||
// histogram accumulates a histogram of b in h.
|
|
||||||
// h must be at least 256 entries in length,
|
|
||||||
// and must be cleared before calling this function.
|
|
||||||
//go:noescape
|
|
||||||
func histogram(b []byte, h []int32)
|
|
||||||
|
|
||||||
// Detect SSE 4.2 feature.
|
|
||||||
func init() {
|
|
||||||
useSSE42 = cpuid.CPU.SSE42()
|
|
||||||
}
|
|
|
@ -1,213 +0,0 @@
|
||||||
//+build !noasm
|
|
||||||
//+build !appengine
|
|
||||||
|
|
||||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
|
||||||
|
|
||||||
// func crc32sse(a []byte) uint32
|
|
||||||
TEXT ·crc32sse(SB), 4, $0
|
|
||||||
MOVQ a+0(FP), R10
|
|
||||||
XORQ BX, BX
|
|
||||||
|
|
||||||
// CRC32 dword (R10), EBX
|
|
||||||
BYTE $0xF2; BYTE $0x41; BYTE $0x0f
|
|
||||||
BYTE $0x38; BYTE $0xf1; BYTE $0x1a
|
|
||||||
|
|
||||||
MOVL BX, ret+24(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func crc32sseAll(a []byte, dst []uint32)
|
|
||||||
TEXT ·crc32sseAll(SB), 4, $0
|
|
||||||
MOVQ a+0(FP), R8 // R8: src
|
|
||||||
MOVQ a_len+8(FP), R10 // input length
|
|
||||||
MOVQ dst+24(FP), R9 // R9: dst
|
|
||||||
SUBQ $4, R10
|
|
||||||
JS end
|
|
||||||
JZ one_crc
|
|
||||||
MOVQ R10, R13
|
|
||||||
SHRQ $2, R10 // len/4
|
|
||||||
ANDQ $3, R13 // len&3
|
|
||||||
XORQ BX, BX
|
|
||||||
ADDQ $1, R13
|
|
||||||
TESTQ R10, R10
|
|
||||||
JZ rem_loop
|
|
||||||
|
|
||||||
crc_loop:
|
|
||||||
MOVQ (R8), R11
|
|
||||||
XORQ BX, BX
|
|
||||||
XORQ DX, DX
|
|
||||||
XORQ DI, DI
|
|
||||||
MOVQ R11, R12
|
|
||||||
SHRQ $8, R11
|
|
||||||
MOVQ R12, AX
|
|
||||||
MOVQ R11, CX
|
|
||||||
SHRQ $16, R12
|
|
||||||
SHRQ $16, R11
|
|
||||||
MOVQ R12, SI
|
|
||||||
|
|
||||||
// CRC32 EAX, EBX
|
|
||||||
BYTE $0xF2; BYTE $0x0f
|
|
||||||
BYTE $0x38; BYTE $0xf1; BYTE $0xd8
|
|
||||||
|
|
||||||
// CRC32 ECX, EDX
|
|
||||||
BYTE $0xF2; BYTE $0x0f
|
|
||||||
BYTE $0x38; BYTE $0xf1; BYTE $0xd1
|
|
||||||
|
|
||||||
// CRC32 ESI, EDI
|
|
||||||
BYTE $0xF2; BYTE $0x0f
|
|
||||||
BYTE $0x38; BYTE $0xf1; BYTE $0xfe
|
|
||||||
MOVL BX, (R9)
|
|
||||||
MOVL DX, 4(R9)
|
|
||||||
MOVL DI, 8(R9)
|
|
||||||
|
|
||||||
XORQ BX, BX
|
|
||||||
MOVL R11, AX
|
|
||||||
|
|
||||||
// CRC32 EAX, EBX
|
|
||||||
BYTE $0xF2; BYTE $0x0f
|
|
||||||
BYTE $0x38; BYTE $0xf1; BYTE $0xd8
|
|
||||||
MOVL BX, 12(R9)
|
|
||||||
|
|
||||||
ADDQ $16, R9
|
|
||||||
ADDQ $4, R8
|
|
||||||
XORQ BX, BX
|
|
||||||
SUBQ $1, R10
|
|
||||||
JNZ crc_loop
|
|
||||||
|
|
||||||
rem_loop:
|
|
||||||
MOVL (R8), AX
|
|
||||||
|
|
||||||
// CRC32 EAX, EBX
|
|
||||||
BYTE $0xF2; BYTE $0x0f
|
|
||||||
BYTE $0x38; BYTE $0xf1; BYTE $0xd8
|
|
||||||
|
|
||||||
MOVL BX, (R9)
|
|
||||||
ADDQ $4, R9
|
|
||||||
ADDQ $1, R8
|
|
||||||
XORQ BX, BX
|
|
||||||
SUBQ $1, R13
|
|
||||||
JNZ rem_loop
|
|
||||||
|
|
||||||
end:
|
|
||||||
RET
|
|
||||||
|
|
||||||
one_crc:
|
|
||||||
MOVQ $1, R13
|
|
||||||
XORQ BX, BX
|
|
||||||
JMP rem_loop
|
|
||||||
|
|
||||||
// func matchLenSSE4(a, b []byte, max int) int
|
|
||||||
TEXT ·matchLenSSE4(SB), 4, $0
|
|
||||||
MOVQ a_base+0(FP), SI
|
|
||||||
MOVQ b_base+24(FP), DI
|
|
||||||
MOVQ DI, DX
|
|
||||||
MOVQ max+48(FP), CX
|
|
||||||
|
|
||||||
cmp8:
|
|
||||||
// As long as we are 8 or more bytes before the end of max, we can load and
|
|
||||||
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
|
|
||||||
CMPQ CX, $8
|
|
||||||
JLT cmp1
|
|
||||||
MOVQ (SI), AX
|
|
||||||
MOVQ (DI), BX
|
|
||||||
CMPQ AX, BX
|
|
||||||
JNE bsf
|
|
||||||
ADDQ $8, SI
|
|
||||||
ADDQ $8, DI
|
|
||||||
SUBQ $8, CX
|
|
||||||
JMP cmp8
|
|
||||||
|
|
||||||
bsf:
|
|
||||||
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
|
|
||||||
// the index of the first byte that differs. The BSF instruction finds the
|
|
||||||
// least significant 1 bit, the amd64 architecture is little-endian, and
|
|
||||||
// the shift by 3 converts a bit index to a byte index.
|
|
||||||
XORQ AX, BX
|
|
||||||
BSFQ BX, BX
|
|
||||||
SHRQ $3, BX
|
|
||||||
ADDQ BX, DI
|
|
||||||
|
|
||||||
// Subtract off &b[0] to convert from &b[ret] to ret, and return.
|
|
||||||
SUBQ DX, DI
|
|
||||||
MOVQ DI, ret+56(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
cmp1:
|
|
||||||
// In the slices' tail, compare 1 byte at a time.
|
|
||||||
CMPQ CX, $0
|
|
||||||
JEQ matchLenEnd
|
|
||||||
MOVB (SI), AX
|
|
||||||
MOVB (DI), BX
|
|
||||||
CMPB AX, BX
|
|
||||||
JNE matchLenEnd
|
|
||||||
ADDQ $1, SI
|
|
||||||
ADDQ $1, DI
|
|
||||||
SUBQ $1, CX
|
|
||||||
JMP cmp1
|
|
||||||
|
|
||||||
matchLenEnd:
|
|
||||||
// Subtract off &b[0] to convert from &b[ret] to ret, and return.
|
|
||||||
SUBQ DX, DI
|
|
||||||
MOVQ DI, ret+56(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func histogram(b []byte, h []int32)
|
|
||||||
TEXT ·histogram(SB), 4, $0
|
|
||||||
MOVQ b+0(FP), SI // SI: &b
|
|
||||||
MOVQ b_len+8(FP), R9 // R9: len(b)
|
|
||||||
MOVQ h+24(FP), DI // DI: Histogram
|
|
||||||
MOVQ R9, R8
|
|
||||||
SHRQ $3, R8
|
|
||||||
JZ hist1
|
|
||||||
XORQ R11, R11
|
|
||||||
|
|
||||||
loop_hist8:
|
|
||||||
MOVQ (SI), R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
MOVB R10, R11
|
|
||||||
INCL (DI)(R11*4)
|
|
||||||
SHRQ $8, R10
|
|
||||||
|
|
||||||
INCL (DI)(R10*4)
|
|
||||||
|
|
||||||
ADDQ $8, SI
|
|
||||||
DECQ R8
|
|
||||||
JNZ loop_hist8
|
|
||||||
|
|
||||||
hist1:
|
|
||||||
ANDQ $7, R9
|
|
||||||
JZ end_hist
|
|
||||||
XORQ R10, R10
|
|
||||||
|
|
||||||
loop_hist1:
|
|
||||||
MOVB (SI), R10
|
|
||||||
INCL (DI)(R10*4)
|
|
||||||
INCQ SI
|
|
||||||
DECQ R9
|
|
||||||
JNZ loop_hist1
|
|
||||||
|
|
||||||
end_hist:
|
|
||||||
RET
|
|
|
@ -1,35 +0,0 @@
|
||||||
//+build !amd64 noasm appengine
|
|
||||||
|
|
||||||
// Copyright 2015, Klaus Post, see LICENSE for details.
|
|
||||||
|
|
||||||
package flate
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
useSSE42 = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// crc32sse should never be called.
|
|
||||||
func crc32sse(a []byte) uint32 {
|
|
||||||
panic("no assembler")
|
|
||||||
}
|
|
||||||
|
|
||||||
// crc32sseAll should never be called.
|
|
||||||
func crc32sseAll(a []byte, dst []uint32) {
|
|
||||||
panic("no assembler")
|
|
||||||
}
|
|
||||||
|
|
||||||
// matchLenSSE4 should never be called.
|
|
||||||
func matchLenSSE4(a, b []byte, max int) int {
|
|
||||||
panic("no assembler")
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// histogram accumulates a histogram of b in h.
|
|
||||||
//
|
|
||||||
// len(h) must be >= 256, and h's elements must be all zeroes.
|
|
||||||
func histogram(b []byte, h []int32) {
|
|
||||||
h = h[:256]
|
|
||||||
for _, t := range b {
|
|
||||||
h[t]++
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,257 @@
|
||||||
|
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Modified for deflate by Klaus Post (c) 2015.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package flate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
type fastEnc interface {
|
||||||
|
Encode(dst *tokens, src []byte)
|
||||||
|
Reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFastEnc(level int) fastEnc {
|
||||||
|
switch level {
|
||||||
|
case 1:
|
||||||
|
return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
|
||||||
|
case 2:
|
||||||
|
return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
|
||||||
|
case 3:
|
||||||
|
return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
|
||||||
|
case 4:
|
||||||
|
return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
|
||||||
|
case 5:
|
||||||
|
return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
|
||||||
|
case 6:
|
||||||
|
return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
|
||||||
|
default:
|
||||||
|
panic("invalid level specified")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
tableBits = 16 // Bits used in the table
|
||||||
|
tableSize = 1 << tableBits // Size of the table
|
||||||
|
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
|
||||||
|
baseMatchOffset = 1 // The smallest match offset
|
||||||
|
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
|
||||||
|
maxMatchOffset = 1 << 15 // The largest match offset
|
||||||
|
|
||||||
|
bTableBits = 18 // Bits used in the big tables
|
||||||
|
bTableSize = 1 << bTableBits // Size of the table
|
||||||
|
allocHistory = maxMatchOffset * 10 // Size to preallocate for history.
|
||||||
|
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize // Reset the buffer offset when reaching this.
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
prime3bytes = 506832829
|
||||||
|
prime4bytes = 2654435761
|
||||||
|
prime5bytes = 889523592379
|
||||||
|
prime6bytes = 227718039650203
|
||||||
|
prime7bytes = 58295818150454627
|
||||||
|
prime8bytes = 0xcf1bbcdcb7a56463
|
||||||
|
)
|
||||||
|
|
||||||
|
func load32(b []byte, i int) uint32 {
|
||||||
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
||||||
|
b = b[i:]
|
||||||
|
b = b[:4]
|
||||||
|
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||||
|
}
|
||||||
|
|
||||||
|
func load64(b []byte, i int) uint64 {
|
||||||
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
||||||
|
b = b[i:]
|
||||||
|
b = b[:8]
|
||||||
|
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||||
|
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||||
|
}
|
||||||
|
|
||||||
|
func load3232(b []byte, i int32) uint32 {
|
||||||
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
||||||
|
b = b[i:]
|
||||||
|
b = b[:4]
|
||||||
|
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||||
|
}
|
||||||
|
|
||||||
|
func load6432(b []byte, i int32) uint64 {
|
||||||
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
||||||
|
b = b[i:]
|
||||||
|
b = b[:8]
|
||||||
|
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||||
|
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||||
|
}
|
||||||
|
|
||||||
|
func hash(u uint32) uint32 {
|
||||||
|
return (u * 0x1e35a7bd) >> tableShift
|
||||||
|
}
|
||||||
|
|
||||||
|
type tableEntry struct {
|
||||||
|
val uint32
|
||||||
|
offset int32
|
||||||
|
}
|
||||||
|
|
||||||
|
// fastGen maintains the table for matches,
|
||||||
|
// and the previous byte block for level 2.
|
||||||
|
// This is the generic implementation.
|
||||||
|
type fastGen struct {
|
||||||
|
hist []byte
|
||||||
|
cur int32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *fastGen) addBlock(src []byte) int32 {
|
||||||
|
// check if we have space already
|
||||||
|
if len(e.hist)+len(src) > cap(e.hist) {
|
||||||
|
if cap(e.hist) == 0 {
|
||||||
|
e.hist = make([]byte, 0, allocHistory)
|
||||||
|
} else {
|
||||||
|
if cap(e.hist) < maxMatchOffset*2 {
|
||||||
|
panic("unexpected buffer size")
|
||||||
|
}
|
||||||
|
// Move down
|
||||||
|
offset := int32(len(e.hist)) - maxMatchOffset
|
||||||
|
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
|
||||||
|
e.cur += offset
|
||||||
|
e.hist = e.hist[:maxMatchOffset]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s := int32(len(e.hist))
|
||||||
|
e.hist = append(e.hist, src...)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash4 returns the hash of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <32.
|
||||||
|
func hash4u(u uint32, h uint8) uint32 {
|
||||||
|
return (u * prime4bytes) >> ((32 - h) & 31)
|
||||||
|
}
|
||||||
|
|
||||||
|
type tableEntryPrev struct {
|
||||||
|
Cur tableEntry
|
||||||
|
Prev tableEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <32.
|
||||||
|
func hash4x64(u uint64, h uint8) uint32 {
|
||||||
|
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash7(u uint64, h uint8) uint32 {
|
||||||
|
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash8 returns the hash of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash8(u uint64, h uint8) uint32 {
|
||||||
|
return uint32((u * prime8bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash6(u uint64, h uint8) uint32 {
|
||||||
|
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchlen will return the match length between offsets and t in src.
|
||||||
|
// The maximum length returned is maxMatchLength - 4.
|
||||||
|
// It is assumed that s > t, that t >=0 and s < len(src).
|
||||||
|
func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
|
||||||
|
if debugDecode {
|
||||||
|
if t >= s {
|
||||||
|
panic(fmt.Sprint("t >=s:", t, s))
|
||||||
|
}
|
||||||
|
if int(s) >= len(src) {
|
||||||
|
panic(fmt.Sprint("s >= len(src):", s, len(src)))
|
||||||
|
}
|
||||||
|
if t < 0 {
|
||||||
|
panic(fmt.Sprint("t < 0:", t))
|
||||||
|
}
|
||||||
|
if s-t > maxMatchOffset {
|
||||||
|
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s1 := int(s) + maxMatchLength - 4
|
||||||
|
if s1 > len(src) {
|
||||||
|
s1 = len(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend the match to be as long as possible.
|
||||||
|
return int32(matchLen(src[s:s1], src[t:]))
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchlenLong will return the match length between offsets and t in src.
|
||||||
|
// It is assumed that s > t, that t >=0 and s < len(src).
|
||||||
|
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
|
||||||
|
if debugDecode {
|
||||||
|
if t >= s {
|
||||||
|
panic(fmt.Sprint("t >=s:", t, s))
|
||||||
|
}
|
||||||
|
if int(s) >= len(src) {
|
||||||
|
panic(fmt.Sprint("s >= len(src):", s, len(src)))
|
||||||
|
}
|
||||||
|
if t < 0 {
|
||||||
|
panic(fmt.Sprint("t < 0:", t))
|
||||||
|
}
|
||||||
|
if s-t > maxMatchOffset {
|
||||||
|
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Extend the match to be as long as possible.
|
||||||
|
return int32(matchLen(src[s:], src[t:]))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the encoding table.
|
||||||
|
func (e *fastGen) Reset() {
|
||||||
|
if cap(e.hist) < int(maxMatchOffset*8) {
|
||||||
|
l := maxMatchOffset * 8
|
||||||
|
// Make it at least 1MB.
|
||||||
|
if l < 1<<20 {
|
||||||
|
l = 1 << 20
|
||||||
|
}
|
||||||
|
e.hist = make([]byte, 0, l)
|
||||||
|
}
|
||||||
|
// We offset current position so everything will be out of reach
|
||||||
|
e.cur += maxMatchOffset + int32(len(e.hist))
|
||||||
|
e.hist = e.hist[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchLen returns the maximum length.
|
||||||
|
// 'a' must be the shortest of the two.
|
||||||
|
func matchLen(a, b []byte) int {
|
||||||
|
b = b[:len(a)]
|
||||||
|
var checked int
|
||||||
|
if len(a) > 4 {
|
||||||
|
// Try 4 bytes first
|
||||||
|
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
|
||||||
|
return bits.TrailingZeros32(diff) >> 3
|
||||||
|
}
|
||||||
|
// Switch to 8 byte matching.
|
||||||
|
checked = 4
|
||||||
|
a = a[4:]
|
||||||
|
b = b[4:]
|
||||||
|
for len(a) >= 8 {
|
||||||
|
b = b[:len(a)]
|
||||||
|
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
|
||||||
|
return checked + (bits.TrailingZeros64(diff) >> 3)
|
||||||
|
}
|
||||||
|
checked += 8
|
||||||
|
a = a[8:]
|
||||||
|
b = b[8:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b = b[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return int(i) + checked
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(a) + checked
|
||||||
|
}
|
|
@ -35,7 +35,7 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
// The number of extra bits needed by length code X - LENGTH_CODES_START.
|
// The number of extra bits needed by length code X - LENGTH_CODES_START.
|
||||||
var lengthExtraBits = []int8{
|
var lengthExtraBits = [32]int8{
|
||||||
/* 257 */ 0, 0, 0,
|
/* 257 */ 0, 0, 0,
|
||||||
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
|
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
|
||||||
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
|
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
|
||||||
|
@ -43,14 +43,14 @@ var lengthExtraBits = []int8{
|
||||||
}
|
}
|
||||||
|
|
||||||
// The length indicated by length code X - LENGTH_CODES_START.
|
// The length indicated by length code X - LENGTH_CODES_START.
|
||||||
var lengthBase = []uint32{
|
var lengthBase = [32]uint8{
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
|
||||||
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
|
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
|
||||||
64, 80, 96, 112, 128, 160, 192, 224, 255,
|
64, 80, 96, 112, 128, 160, 192, 224, 255,
|
||||||
}
|
}
|
||||||
|
|
||||||
// offset code word extra bits.
|
// offset code word extra bits.
|
||||||
var offsetExtraBits = []int8{
|
var offsetExtraBits = [64]int8{
|
||||||
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||||
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
|
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
|
||||||
|
@ -58,7 +58,7 @@ var offsetExtraBits = []int8{
|
||||||
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
|
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
|
||||||
}
|
}
|
||||||
|
|
||||||
var offsetBase = []uint32{
|
var offsetBase = [64]uint32{
|
||||||
/* normal deflate */
|
/* normal deflate */
|
||||||
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
|
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
|
||||||
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
|
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
|
||||||
|
@ -85,26 +85,48 @@ type huffmanBitWriter struct {
|
||||||
// Data waiting to be written is bytes[0:nbytes]
|
// Data waiting to be written is bytes[0:nbytes]
|
||||||
// and then the low nbits of bits.
|
// and then the low nbits of bits.
|
||||||
bits uint64
|
bits uint64
|
||||||
nbits uint
|
nbits uint16
|
||||||
bytes [bufferSize]byte
|
nbytes uint8
|
||||||
codegenFreq [codegenCodeCount]int32
|
|
||||||
nbytes int
|
|
||||||
literalFreq []int32
|
|
||||||
offsetFreq []int32
|
|
||||||
codegen []uint8
|
|
||||||
literalEncoding *huffmanEncoder
|
literalEncoding *huffmanEncoder
|
||||||
offsetEncoding *huffmanEncoder
|
offsetEncoding *huffmanEncoder
|
||||||
codegenEncoding *huffmanEncoder
|
codegenEncoding *huffmanEncoder
|
||||||
err error
|
err error
|
||||||
|
lastHeader int
|
||||||
|
// Set between 0 (reused block can be up to 2x the size)
|
||||||
|
logReusePenalty uint
|
||||||
|
lastHuffMan bool
|
||||||
|
bytes [256]byte
|
||||||
|
literalFreq [lengthCodesStart + 32]uint16
|
||||||
|
offsetFreq [32]uint16
|
||||||
|
codegenFreq [codegenCodeCount]uint16
|
||||||
|
|
||||||
|
// codegen must have an extra space for the final symbol.
|
||||||
|
codegen [literalCount + offsetCodeCount + 1]uint8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Huffman reuse.
|
||||||
|
//
|
||||||
|
// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
|
||||||
|
//
|
||||||
|
// This is controlled by several variables:
|
||||||
|
//
|
||||||
|
// If lastHeader is non-zero the Huffman table can be reused.
|
||||||
|
// This also indicates that a Huffman table has been generated that can output all
|
||||||
|
// possible symbols.
|
||||||
|
// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
|
||||||
|
// an EOB with the previous table must be written.
|
||||||
|
//
|
||||||
|
// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
|
||||||
|
//
|
||||||
|
// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
|
||||||
|
// optimal size and adding a penalty in 'logReusePenalty'.
|
||||||
|
// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
|
||||||
|
// is slower both for compression and decompression.
|
||||||
|
|
||||||
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
|
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
|
||||||
return &huffmanBitWriter{
|
return &huffmanBitWriter{
|
||||||
writer: w,
|
writer: w,
|
||||||
literalFreq: make([]int32, maxNumLit),
|
literalEncoding: newHuffmanEncoder(literalCount),
|
||||||
offsetFreq: make([]int32, offsetCodeCount),
|
|
||||||
codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
|
|
||||||
literalEncoding: newHuffmanEncoder(maxNumLit),
|
|
||||||
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
|
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
|
||||||
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
|
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
|
||||||
}
|
}
|
||||||
|
@ -113,7 +135,42 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
|
||||||
func (w *huffmanBitWriter) reset(writer io.Writer) {
|
func (w *huffmanBitWriter) reset(writer io.Writer) {
|
||||||
w.writer = writer
|
w.writer = writer
|
||||||
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
|
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
|
||||||
w.bytes = [bufferSize]byte{}
|
w.bytes = [256]byte{}
|
||||||
|
w.lastHeader = 0
|
||||||
|
w.lastHuffMan = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) {
|
||||||
|
offsets, lits = true, true
|
||||||
|
a := t.offHist[:offsetCodeCount]
|
||||||
|
b := w.offsetFreq[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
if b[i] == 0 && a[i] != 0 {
|
||||||
|
offsets = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
a = t.extraHist[:literalCount-256]
|
||||||
|
b = w.literalFreq[256:literalCount]
|
||||||
|
b = b[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
if b[i] == 0 && a[i] != 0 {
|
||||||
|
lits = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if lits {
|
||||||
|
a = t.litHist[:]
|
||||||
|
b = w.literalFreq[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
if b[i] == 0 && a[i] != 0 {
|
||||||
|
lits = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *huffmanBitWriter) flush() {
|
func (w *huffmanBitWriter) flush() {
|
||||||
|
@ -144,30 +201,11 @@ func (w *huffmanBitWriter) write(b []byte) {
|
||||||
_, w.err = w.writer.Write(b)
|
_, w.err = w.writer.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
|
func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
|
||||||
if w.err != nil {
|
w.bits |= uint64(b) << (w.nbits & 63)
|
||||||
return
|
|
||||||
}
|
|
||||||
w.bits |= uint64(b) << w.nbits
|
|
||||||
w.nbits += nb
|
w.nbits += nb
|
||||||
if w.nbits >= 48 {
|
if w.nbits >= 48 {
|
||||||
bits := w.bits
|
w.writeOutBits()
|
||||||
w.bits >>= 48
|
|
||||||
w.nbits -= 48
|
|
||||||
n := w.nbytes
|
|
||||||
bytes := w.bytes[n : n+6]
|
|
||||||
bytes[0] = byte(bits)
|
|
||||||
bytes[1] = byte(bits >> 8)
|
|
||||||
bytes[2] = byte(bits >> 16)
|
|
||||||
bytes[3] = byte(bits >> 24)
|
|
||||||
bytes[4] = byte(bits >> 32)
|
|
||||||
bytes[5] = byte(bits >> 40)
|
|
||||||
n += 6
|
|
||||||
if n >= bufferFlushSize {
|
|
||||||
w.write(w.bytes[:n])
|
|
||||||
n = 0
|
|
||||||
}
|
|
||||||
w.nbytes = n
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,7 +251,7 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
|
||||||
// a copy of the frequencies, and as the place where we put the result.
|
// a copy of the frequencies, and as the place where we put the result.
|
||||||
// This is fine because the output is always shorter than the input used
|
// This is fine because the output is always shorter than the input used
|
||||||
// so far.
|
// so far.
|
||||||
codegen := w.codegen // cache
|
codegen := w.codegen[:] // cache
|
||||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||||
cgnl := codegen[:numLiterals]
|
cgnl := codegen[:numLiterals]
|
||||||
for i := range cgnl {
|
for i := range cgnl {
|
||||||
|
@ -292,30 +330,54 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
|
||||||
codegen[outIndex] = badCode
|
codegen[outIndex] = badCode
|
||||||
}
|
}
|
||||||
|
|
||||||
// dynamicSize returns the size of dynamically encoded data in bits.
|
func (w *huffmanBitWriter) codegens() int {
|
||||||
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
|
numCodegens := len(w.codegenFreq)
|
||||||
|
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
|
||||||
|
numCodegens--
|
||||||
|
}
|
||||||
|
return numCodegens
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
|
||||||
numCodegens = len(w.codegenFreq)
|
numCodegens = len(w.codegenFreq)
|
||||||
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
|
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
|
||||||
numCodegens--
|
numCodegens--
|
||||||
}
|
}
|
||||||
header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
|
return 3 + 5 + 5 + 4 + (3 * numCodegens) +
|
||||||
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
|
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
|
||||||
int(w.codegenFreq[16])*2 +
|
int(w.codegenFreq[16])*2 +
|
||||||
int(w.codegenFreq[17])*3 +
|
int(w.codegenFreq[17])*3 +
|
||||||
int(w.codegenFreq[18])*7
|
int(w.codegenFreq[18])*7, numCodegens
|
||||||
size = header +
|
}
|
||||||
litEnc.bitLength(w.literalFreq) +
|
|
||||||
offEnc.bitLength(w.offsetFreq) +
|
|
||||||
extraBits
|
|
||||||
|
|
||||||
|
// dynamicSize returns the size of dynamically encoded data in bits.
|
||||||
|
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
|
||||||
|
header, numCodegens := w.headerSize()
|
||||||
|
size = header +
|
||||||
|
litEnc.bitLength(w.literalFreq[:]) +
|
||||||
|
offEnc.bitLength(w.offsetFreq[:]) +
|
||||||
|
extraBits
|
||||||
return size, numCodegens
|
return size, numCodegens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extraBitSize will return the number of bits that will be written
|
||||||
|
// as "extra" bits on matches.
|
||||||
|
func (w *huffmanBitWriter) extraBitSize() int {
|
||||||
|
total := 0
|
||||||
|
for i, n := range w.literalFreq[257:literalCount] {
|
||||||
|
total += int(n) * int(lengthExtraBits[i&31])
|
||||||
|
}
|
||||||
|
for i, n := range w.offsetFreq[:offsetCodeCount] {
|
||||||
|
total += int(n) * int(offsetExtraBits[i&31])
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
// fixedSize returns the size of dynamically encoded data in bits.
|
// fixedSize returns the size of dynamically encoded data in bits.
|
||||||
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
|
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
|
||||||
return 3 +
|
return 3 +
|
||||||
fixedLiteralEncoding.bitLength(w.literalFreq) +
|
fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
|
||||||
fixedOffsetEncoding.bitLength(w.offsetFreq) +
|
fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
|
||||||
extraBits
|
extraBits
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -333,32 +395,38 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *huffmanBitWriter) writeCode(c hcode) {
|
func (w *huffmanBitWriter) writeCode(c hcode) {
|
||||||
if w.err != nil {
|
// The function does not get inlined if we "& 63" the shift.
|
||||||
return
|
|
||||||
}
|
|
||||||
w.bits |= uint64(c.code) << w.nbits
|
w.bits |= uint64(c.code) << w.nbits
|
||||||
w.nbits += uint(c.len)
|
w.nbits += c.len
|
||||||
if w.nbits >= 48 {
|
if w.nbits >= 48 {
|
||||||
bits := w.bits
|
w.writeOutBits()
|
||||||
w.bits >>= 48
|
|
||||||
w.nbits -= 48
|
|
||||||
n := w.nbytes
|
|
||||||
bytes := w.bytes[n : n+6]
|
|
||||||
bytes[0] = byte(bits)
|
|
||||||
bytes[1] = byte(bits >> 8)
|
|
||||||
bytes[2] = byte(bits >> 16)
|
|
||||||
bytes[3] = byte(bits >> 24)
|
|
||||||
bytes[4] = byte(bits >> 32)
|
|
||||||
bytes[5] = byte(bits >> 40)
|
|
||||||
n += 6
|
|
||||||
if n >= bufferFlushSize {
|
|
||||||
w.write(w.bytes[:n])
|
|
||||||
n = 0
|
|
||||||
}
|
|
||||||
w.nbytes = n
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// writeOutBits will write bits to the buffer.
|
||||||
|
func (w *huffmanBitWriter) writeOutBits() {
|
||||||
|
bits := w.bits
|
||||||
|
w.bits >>= 48
|
||||||
|
w.nbits -= 48
|
||||||
|
n := w.nbytes
|
||||||
|
w.bytes[n] = byte(bits)
|
||||||
|
w.bytes[n+1] = byte(bits >> 8)
|
||||||
|
w.bytes[n+2] = byte(bits >> 16)
|
||||||
|
w.bytes[n+3] = byte(bits >> 24)
|
||||||
|
w.bytes[n+4] = byte(bits >> 32)
|
||||||
|
w.bytes[n+5] = byte(bits >> 40)
|
||||||
|
n += 6
|
||||||
|
if n >= bufferFlushSize {
|
||||||
|
if w.err != nil {
|
||||||
|
n = 0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.write(w.bytes[:n])
|
||||||
|
n = 0
|
||||||
|
}
|
||||||
|
w.nbytes = n
|
||||||
|
}
|
||||||
|
|
||||||
// Write the header of a dynamic Huffman block to the output stream.
|
// Write the header of a dynamic Huffman block to the output stream.
|
||||||
//
|
//
|
||||||
// numLiterals The number of literals specified in codegen
|
// numLiterals The number of literals specified in codegen
|
||||||
|
@ -412,6 +480,11 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if w.lastHeader > 0 {
|
||||||
|
// We owe an EOB
|
||||||
|
w.writeCode(w.literalEncoding.codes[endBlockMarker])
|
||||||
|
w.lastHeader = 0
|
||||||
|
}
|
||||||
var flag int32
|
var flag int32
|
||||||
if isEof {
|
if isEof {
|
||||||
flag = 1
|
flag = 1
|
||||||
|
@ -426,6 +499,12 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if w.lastHeader > 0 {
|
||||||
|
// We owe an EOB
|
||||||
|
w.writeCode(w.literalEncoding.codes[endBlockMarker])
|
||||||
|
w.lastHeader = 0
|
||||||
|
}
|
||||||
|
|
||||||
// Indicate that we are a fixed Huffman block
|
// Indicate that we are a fixed Huffman block
|
||||||
var value int32 = 2
|
var value int32 = 2
|
||||||
if isEof {
|
if isEof {
|
||||||
|
@ -439,29 +518,23 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
|
||||||
// is larger than the original bytes, the data will be written as a
|
// is larger than the original bytes, the data will be written as a
|
||||||
// stored block.
|
// stored block.
|
||||||
// If the input is nil, the tokens will always be Huffman encoded.
|
// If the input is nil, the tokens will always be Huffman encoded.
|
||||||
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
|
func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens = append(tokens, endBlockMarker)
|
tokens.AddEOB()
|
||||||
numLiterals, numOffsets := w.indexTokens(tokens)
|
if w.lastHeader > 0 {
|
||||||
|
// We owe an EOB
|
||||||
|
w.writeCode(w.literalEncoding.codes[endBlockMarker])
|
||||||
|
w.lastHeader = 0
|
||||||
|
}
|
||||||
|
numLiterals, numOffsets := w.indexTokens(tokens, false)
|
||||||
|
w.generate(tokens)
|
||||||
var extraBits int
|
var extraBits int
|
||||||
storedSize, storable := w.storedSize(input)
|
storedSize, storable := w.storedSize(input)
|
||||||
if storable {
|
if storable {
|
||||||
// We only bother calculating the costs of the extra bits required by
|
extraBits = w.extraBitSize()
|
||||||
// the length of offset fields (which will be the same for both fixed
|
|
||||||
// and dynamic encoding), if we need to compare those two encodings
|
|
||||||
// against stored encoding.
|
|
||||||
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
|
|
||||||
// First eight length codes have extra size = 0.
|
|
||||||
extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
|
|
||||||
}
|
|
||||||
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
|
|
||||||
// First four offset codes have extra size = 0.
|
|
||||||
extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Figure out smallest code.
|
// Figure out smallest code.
|
||||||
|
@ -500,7 +573,7 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the tokens.
|
// Write the tokens.
|
||||||
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
|
w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||||
|
@ -508,57 +581,103 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
|
||||||
// histogram distribution.
|
// histogram distribution.
|
||||||
// If input is supplied and the compression savings are below 1/16th of the
|
// If input is supplied and the compression savings are below 1/16th of the
|
||||||
// input size the block is stored.
|
// input size the block is stored.
|
||||||
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
|
func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens = append(tokens, endBlockMarker)
|
sync = sync || eof
|
||||||
numLiterals, numOffsets := w.indexTokens(tokens)
|
if sync {
|
||||||
|
tokens.AddEOB()
|
||||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
|
||||||
// the literalEncoding and the offsetEncoding.
|
|
||||||
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
|
|
||||||
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
|
||||||
size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
|
|
||||||
|
|
||||||
// Store bytes, if we don't get a reasonable improvement.
|
|
||||||
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
|
|
||||||
w.writeStoredHeader(len(input), eof)
|
|
||||||
w.writeBytes(input)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write Huffman table.
|
// We cannot reuse pure huffman table.
|
||||||
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
if w.lastHuffMan && w.lastHeader > 0 {
|
||||||
|
// We will not try to reuse.
|
||||||
|
w.writeCode(w.literalEncoding.codes[endBlockMarker])
|
||||||
|
w.lastHeader = 0
|
||||||
|
w.lastHuffMan = false
|
||||||
|
}
|
||||||
|
if !sync {
|
||||||
|
tokens.Fill()
|
||||||
|
}
|
||||||
|
numLiterals, numOffsets := w.indexTokens(tokens, !sync)
|
||||||
|
|
||||||
|
var size int
|
||||||
|
// Check if we should reuse.
|
||||||
|
if w.lastHeader > 0 {
|
||||||
|
// Estimate size for using a new table
|
||||||
|
newSize := w.lastHeader + tokens.EstimatedBits()
|
||||||
|
|
||||||
|
// The estimated size is calculated as an optimal table.
|
||||||
|
// We add a penalty to make it more realistic and re-use a bit more.
|
||||||
|
newSize += newSize >> (w.logReusePenalty & 31)
|
||||||
|
extra := w.extraBitSize()
|
||||||
|
reuseSize, _ := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extra)
|
||||||
|
|
||||||
|
// Check if a new table is better.
|
||||||
|
if newSize < reuseSize {
|
||||||
|
// Write the EOB we owe.
|
||||||
|
w.writeCode(w.literalEncoding.codes[endBlockMarker])
|
||||||
|
size = newSize
|
||||||
|
w.lastHeader = 0
|
||||||
|
} else {
|
||||||
|
size = reuseSize
|
||||||
|
}
|
||||||
|
// Check if we get a reasonable size decrease.
|
||||||
|
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
|
||||||
|
w.writeStoredHeader(len(input), eof)
|
||||||
|
w.writeBytes(input)
|
||||||
|
w.lastHeader = 0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want a new block/table
|
||||||
|
if w.lastHeader == 0 {
|
||||||
|
w.generate(tokens)
|
||||||
|
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||||
|
// the literalEncoding and the offsetEncoding.
|
||||||
|
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
|
||||||
|
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
||||||
|
var numCodegens int
|
||||||
|
size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize())
|
||||||
|
// Store bytes, if we don't get a reasonable improvement.
|
||||||
|
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
|
||||||
|
w.writeStoredHeader(len(input), eof)
|
||||||
|
w.writeBytes(input)
|
||||||
|
w.lastHeader = 0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write Huffman table.
|
||||||
|
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
||||||
|
w.lastHeader, _ = w.headerSize()
|
||||||
|
w.lastHuffMan = false
|
||||||
|
}
|
||||||
|
|
||||||
|
if sync {
|
||||||
|
w.lastHeader = 0
|
||||||
|
}
|
||||||
// Write the tokens.
|
// Write the tokens.
|
||||||
w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
|
w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// indexTokens indexes a slice of tokens, and updates
|
// indexTokens indexes a slice of tokens, and updates
|
||||||
// literalFreq and offsetFreq, and generates literalEncoding
|
// literalFreq and offsetFreq, and generates literalEncoding
|
||||||
// and offsetEncoding.
|
// and offsetEncoding.
|
||||||
// The number of literal and offset tokens is returned.
|
// The number of literal and offset tokens is returned.
|
||||||
func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
|
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
|
||||||
for i := range w.literalFreq {
|
copy(w.literalFreq[:], t.litHist[:])
|
||||||
w.literalFreq[i] = 0
|
copy(w.literalFreq[256:], t.extraHist[:])
|
||||||
}
|
copy(w.offsetFreq[:], t.offHist[:offsetCodeCount])
|
||||||
for i := range w.offsetFreq {
|
|
||||||
w.offsetFreq[i] = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, t := range tokens {
|
if t.n == 0 {
|
||||||
if t < matchType {
|
return
|
||||||
w.literalFreq[t.literal()]++
|
}
|
||||||
continue
|
if filled {
|
||||||
}
|
return maxNumLit, maxNumDist
|
||||||
length := t.length()
|
|
||||||
offset := t.offset()
|
|
||||||
w.literalFreq[lengthCodesStart+lengthCode(length)]++
|
|
||||||
w.offsetFreq[offsetCode(offset)]++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the number of literals
|
// get the number of literals
|
||||||
numLiterals = len(w.literalFreq)
|
numLiterals = len(w.literalFreq)
|
||||||
for w.literalFreq[numLiterals-1] == 0 {
|
for w.literalFreq[numLiterals-1] == 0 {
|
||||||
|
@ -575,41 +694,85 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
|
||||||
w.offsetFreq[0] = 1
|
w.offsetFreq[0] = 1
|
||||||
numOffsets = 1
|
numOffsets = 1
|
||||||
}
|
}
|
||||||
w.literalEncoding.generate(w.literalFreq, 15)
|
|
||||||
w.offsetEncoding.generate(w.offsetFreq, 15)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (w *huffmanBitWriter) generate(t *tokens) {
|
||||||
|
w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
|
||||||
|
w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
|
||||||
|
}
|
||||||
|
|
||||||
// writeTokens writes a slice of tokens to the output.
|
// writeTokens writes a slice of tokens to the output.
|
||||||
// codes for literal and offset encoding must be supplied.
|
// codes for literal and offset encoding must be supplied.
|
||||||
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
|
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if len(tokens) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only last token should be endBlockMarker.
|
||||||
|
var deferEOB bool
|
||||||
|
if tokens[len(tokens)-1] == endBlockMarker {
|
||||||
|
tokens = tokens[:len(tokens)-1]
|
||||||
|
deferEOB = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create slices up to the next power of two to avoid bounds checks.
|
||||||
|
lits := leCodes[:256]
|
||||||
|
offs := oeCodes[:32]
|
||||||
|
lengths := leCodes[lengthCodesStart:]
|
||||||
|
lengths = lengths[:32]
|
||||||
for _, t := range tokens {
|
for _, t := range tokens {
|
||||||
if t < matchType {
|
if t < matchType {
|
||||||
w.writeCode(leCodes[t.literal()])
|
w.writeCode(lits[t.literal()])
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the length
|
// Write the length
|
||||||
length := t.length()
|
length := t.length()
|
||||||
lengthCode := lengthCode(length)
|
lengthCode := lengthCode(length)
|
||||||
w.writeCode(leCodes[lengthCode+lengthCodesStart])
|
if false {
|
||||||
extraLengthBits := uint(lengthExtraBits[lengthCode])
|
w.writeCode(lengths[lengthCode&31])
|
||||||
|
} else {
|
||||||
|
// inlined
|
||||||
|
c := lengths[lengthCode&31]
|
||||||
|
w.bits |= uint64(c.code) << (w.nbits & 63)
|
||||||
|
w.nbits += c.len
|
||||||
|
if w.nbits >= 48 {
|
||||||
|
w.writeOutBits()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
|
||||||
if extraLengthBits > 0 {
|
if extraLengthBits > 0 {
|
||||||
extraLength := int32(length - lengthBase[lengthCode])
|
extraLength := int32(length - lengthBase[lengthCode&31])
|
||||||
w.writeBits(extraLength, extraLengthBits)
|
w.writeBits(extraLength, extraLengthBits)
|
||||||
}
|
}
|
||||||
// Write the offset
|
// Write the offset
|
||||||
offset := t.offset()
|
offset := t.offset()
|
||||||
offsetCode := offsetCode(offset)
|
offsetCode := offsetCode(offset)
|
||||||
w.writeCode(oeCodes[offsetCode])
|
if false {
|
||||||
extraOffsetBits := uint(offsetExtraBits[offsetCode])
|
w.writeCode(offs[offsetCode&31])
|
||||||
|
} else {
|
||||||
|
// inlined
|
||||||
|
c := offs[offsetCode&31]
|
||||||
|
w.bits |= uint64(c.code) << (w.nbits & 63)
|
||||||
|
w.nbits += c.len
|
||||||
|
if w.nbits >= 48 {
|
||||||
|
w.writeOutBits()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
|
||||||
if extraOffsetBits > 0 {
|
if extraOffsetBits > 0 {
|
||||||
extraOffset := int32(offset - offsetBase[offsetCode])
|
extraOffset := int32(offset - offsetBase[offsetCode&63])
|
||||||
w.writeBits(extraOffset, extraOffsetBits)
|
w.writeBits(extraOffset, extraOffsetBits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if deferEOB {
|
||||||
|
w.writeCode(leCodes[endBlockMarker])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// huffOffset is a static offset encoder used for huffman only encoding.
|
// huffOffset is a static offset encoder used for huffman only encoding.
|
||||||
|
@ -620,82 +783,99 @@ func init() {
|
||||||
w := newHuffmanBitWriter(nil)
|
w := newHuffmanBitWriter(nil)
|
||||||
w.offsetFreq[0] = 1
|
w.offsetFreq[0] = 1
|
||||||
huffOffset = newHuffmanEncoder(offsetCodeCount)
|
huffOffset = newHuffmanEncoder(offsetCodeCount)
|
||||||
huffOffset.generate(w.offsetFreq, 15)
|
huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeBlockHuff encodes a block of bytes as either
|
// writeBlockHuff encodes a block of bytes as either
|
||||||
// Huffman encoded literals or uncompressed bytes if the
|
// Huffman encoded literals or uncompressed bytes if the
|
||||||
// results only gains very little from compression.
|
// results only gains very little from compression.
|
||||||
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
|
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
|
||||||
if w.err != nil {
|
if w.err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear histogram
|
// Clear histogram
|
||||||
for i := range w.literalFreq {
|
for i := range w.literalFreq[:] {
|
||||||
w.literalFreq[i] = 0
|
w.literalFreq[i] = 0
|
||||||
}
|
}
|
||||||
|
if !w.lastHuffMan {
|
||||||
|
for i := range w.offsetFreq[:] {
|
||||||
|
w.offsetFreq[i] = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Add everything as literals
|
// Add everything as literals
|
||||||
histogram(input, w.literalFreq)
|
estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15
|
||||||
|
|
||||||
w.literalFreq[endBlockMarker] = 1
|
|
||||||
|
|
||||||
const numLiterals = endBlockMarker + 1
|
|
||||||
const numOffsets = 1
|
|
||||||
|
|
||||||
w.literalEncoding.generate(w.literalFreq, 15)
|
|
||||||
|
|
||||||
// Figure out smallest code.
|
|
||||||
// Always use dynamic Huffman or Store
|
|
||||||
var numCodegens int
|
|
||||||
|
|
||||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
|
||||||
// the literalEncoding and the offsetEncoding.
|
|
||||||
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
|
|
||||||
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
|
||||||
size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
|
|
||||||
|
|
||||||
// Store bytes, if we don't get a reasonable improvement.
|
// Store bytes, if we don't get a reasonable improvement.
|
||||||
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
|
ssize, storable := w.storedSize(input)
|
||||||
|
if storable && ssize < (estBits+estBits>>4) {
|
||||||
w.writeStoredHeader(len(input), eof)
|
w.writeStoredHeader(len(input), eof)
|
||||||
w.writeBytes(input)
|
w.writeBytes(input)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Huffman.
|
if w.lastHeader > 0 {
|
||||||
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
size, _ := w.dynamicSize(w.literalEncoding, huffOffset, w.lastHeader)
|
||||||
|
estBits += estBits >> (w.logReusePenalty)
|
||||||
|
|
||||||
|
if estBits < size {
|
||||||
|
// We owe an EOB
|
||||||
|
w.writeCode(w.literalEncoding.codes[endBlockMarker])
|
||||||
|
w.lastHeader = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const numLiterals = endBlockMarker + 1
|
||||||
|
const numOffsets = 1
|
||||||
|
if w.lastHeader == 0 {
|
||||||
|
w.literalFreq[endBlockMarker] = 1
|
||||||
|
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
|
||||||
|
|
||||||
|
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||||
|
// the literalEncoding and the offsetEncoding.
|
||||||
|
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
|
||||||
|
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
||||||
|
numCodegens := w.codegens()
|
||||||
|
|
||||||
|
// Huffman.
|
||||||
|
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
||||||
|
w.lastHuffMan = true
|
||||||
|
w.lastHeader, _ = w.headerSize()
|
||||||
|
}
|
||||||
|
|
||||||
encoding := w.literalEncoding.codes[:257]
|
encoding := w.literalEncoding.codes[:257]
|
||||||
n := w.nbytes
|
|
||||||
for _, t := range input {
|
for _, t := range input {
|
||||||
// Bitwriting inlined, ~30% speedup
|
// Bitwriting inlined, ~30% speedup
|
||||||
c := encoding[t]
|
c := encoding[t]
|
||||||
w.bits |= uint64(c.code) << w.nbits
|
w.bits |= uint64(c.code) << ((w.nbits) & 63)
|
||||||
w.nbits += uint(c.len)
|
w.nbits += c.len
|
||||||
if w.nbits < 48 {
|
if w.nbits >= 48 {
|
||||||
continue
|
bits := w.bits
|
||||||
|
w.bits >>= 48
|
||||||
|
w.nbits -= 48
|
||||||
|
n := w.nbytes
|
||||||
|
w.bytes[n] = byte(bits)
|
||||||
|
w.bytes[n+1] = byte(bits >> 8)
|
||||||
|
w.bytes[n+2] = byte(bits >> 16)
|
||||||
|
w.bytes[n+3] = byte(bits >> 24)
|
||||||
|
w.bytes[n+4] = byte(bits >> 32)
|
||||||
|
w.bytes[n+5] = byte(bits >> 40)
|
||||||
|
n += 6
|
||||||
|
if n >= bufferFlushSize {
|
||||||
|
if w.err != nil {
|
||||||
|
n = 0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.write(w.bytes[:n])
|
||||||
|
n = 0
|
||||||
|
}
|
||||||
|
w.nbytes = n
|
||||||
}
|
}
|
||||||
// Store 6 bytes
|
|
||||||
bits := w.bits
|
|
||||||
w.bits >>= 48
|
|
||||||
w.nbits -= 48
|
|
||||||
bytes := w.bytes[n : n+6]
|
|
||||||
bytes[0] = byte(bits)
|
|
||||||
bytes[1] = byte(bits >> 8)
|
|
||||||
bytes[2] = byte(bits >> 16)
|
|
||||||
bytes[3] = byte(bits >> 24)
|
|
||||||
bytes[4] = byte(bits >> 32)
|
|
||||||
bytes[5] = byte(bits >> 40)
|
|
||||||
n += 6
|
|
||||||
if n < bufferFlushSize {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
w.write(w.bytes[:n])
|
|
||||||
if w.err != nil {
|
|
||||||
return // Return early in the event of write failures
|
|
||||||
}
|
|
||||||
n = 0
|
|
||||||
}
|
}
|
||||||
w.nbytes = n
|
if eof || sync {
|
||||||
w.writeCode(encoding[endBlockMarker])
|
w.writeCode(encoding[endBlockMarker])
|
||||||
|
w.lastHeader = 0
|
||||||
|
w.lastHuffMan = false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,9 +6,16 @@ package flate
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
"math/bits"
|
||||||
"sort"
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
maxBitsLimit = 16
|
||||||
|
// number of valid literals
|
||||||
|
literalCount = 286
|
||||||
|
)
|
||||||
|
|
||||||
// hcode is a huffman code with a bit code and bit length.
|
// hcode is a huffman code with a bit code and bit length.
|
||||||
type hcode struct {
|
type hcode struct {
|
||||||
code, len uint16
|
code, len uint16
|
||||||
|
@ -24,7 +31,7 @@ type huffmanEncoder struct {
|
||||||
|
|
||||||
type literalNode struct {
|
type literalNode struct {
|
||||||
literal uint16
|
literal uint16
|
||||||
freq int32
|
freq uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
// A levelInfo describes the state of the constructed tree for a given depth.
|
// A levelInfo describes the state of the constructed tree for a given depth.
|
||||||
|
@ -53,18 +60,24 @@ func (h *hcode) set(code uint16, length uint16) {
|
||||||
h.code = code
|
h.code = code
|
||||||
}
|
}
|
||||||
|
|
||||||
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
|
func reverseBits(number uint16, bitLength byte) uint16 {
|
||||||
|
return bits.Reverse16(number << ((16 - bitLength) & 15))
|
||||||
|
}
|
||||||
|
|
||||||
|
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
|
||||||
|
|
||||||
func newHuffmanEncoder(size int) *huffmanEncoder {
|
func newHuffmanEncoder(size int) *huffmanEncoder {
|
||||||
return &huffmanEncoder{codes: make([]hcode, size)}
|
// Make capacity to next power of two.
|
||||||
|
c := uint(bits.Len32(uint32(size - 1)))
|
||||||
|
return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generates a HuffmanCode corresponding to the fixed literal table
|
// Generates a HuffmanCode corresponding to the fixed literal table
|
||||||
func generateFixedLiteralEncoding() *huffmanEncoder {
|
func generateFixedLiteralEncoding() *huffmanEncoder {
|
||||||
h := newHuffmanEncoder(maxNumLit)
|
h := newHuffmanEncoder(literalCount)
|
||||||
codes := h.codes
|
codes := h.codes
|
||||||
var ch uint16
|
var ch uint16
|
||||||
for ch = 0; ch < maxNumLit; ch++ {
|
for ch = 0; ch < literalCount; ch++ {
|
||||||
var bits uint16
|
var bits uint16
|
||||||
var size uint16
|
var size uint16
|
||||||
switch {
|
switch {
|
||||||
|
@ -105,7 +118,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
|
||||||
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
|
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
|
||||||
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
|
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
|
||||||
|
|
||||||
func (h *huffmanEncoder) bitLength(freq []int32) int {
|
func (h *huffmanEncoder) bitLength(freq []uint16) int {
|
||||||
var total int
|
var total int
|
||||||
for i, f := range freq {
|
for i, f := range freq {
|
||||||
if f != 0 {
|
if f != 0 {
|
||||||
|
@ -115,8 +128,6 @@ func (h *huffmanEncoder) bitLength(freq []int32) int {
|
||||||
return total
|
return total
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxBitsLimit = 16
|
|
||||||
|
|
||||||
// Return the number of literals assigned to each bit size in the Huffman encoding
|
// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||||
//
|
//
|
||||||
// This method is only called when list.length >= 3
|
// This method is only called when list.length >= 3
|
||||||
|
@ -160,9 +171,9 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
|
||||||
// We initialize the levels as if we had already figured this out.
|
// We initialize the levels as if we had already figured this out.
|
||||||
levels[level] = levelInfo{
|
levels[level] = levelInfo{
|
||||||
level: level,
|
level: level,
|
||||||
lastFreq: list[1].freq,
|
lastFreq: int32(list[1].freq),
|
||||||
nextCharFreq: list[2].freq,
|
nextCharFreq: int32(list[2].freq),
|
||||||
nextPairFreq: list[0].freq + list[1].freq,
|
nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
|
||||||
}
|
}
|
||||||
leafCounts[level][level] = 2
|
leafCounts[level][level] = 2
|
||||||
if level == 1 {
|
if level == 1 {
|
||||||
|
@ -194,7 +205,12 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
|
||||||
l.lastFreq = l.nextCharFreq
|
l.lastFreq = l.nextCharFreq
|
||||||
// Lower leafCounts are the same of the previous node.
|
// Lower leafCounts are the same of the previous node.
|
||||||
leafCounts[level][level] = n
|
leafCounts[level][level] = n
|
||||||
l.nextCharFreq = list[n].freq
|
e := list[n]
|
||||||
|
if e.literal < math.MaxUint16 {
|
||||||
|
l.nextCharFreq = int32(e.freq)
|
||||||
|
} else {
|
||||||
|
l.nextCharFreq = math.MaxInt32
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// The next item on this row is a pair from the previous row.
|
// The next item on this row is a pair from the previous row.
|
||||||
// nextPairFreq isn't valid until we generate two
|
// nextPairFreq isn't valid until we generate two
|
||||||
|
@ -270,12 +286,12 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
|
||||||
//
|
//
|
||||||
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||||
// maxBits The maximum number of bits to use for any literal.
|
// maxBits The maximum number of bits to use for any literal.
|
||||||
func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
|
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
|
||||||
if h.freqcache == nil {
|
if h.freqcache == nil {
|
||||||
// Allocate a reusable buffer with the longest possible frequency table.
|
// Allocate a reusable buffer with the longest possible frequency table.
|
||||||
// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
|
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
|
||||||
// The largest of these is maxNumLit, so we allocate for that case.
|
// The largest of these is literalCount, so we allocate for that case.
|
||||||
h.freqcache = make([]literalNode, maxNumLit+1)
|
h.freqcache = make([]literalNode, literalCount+1)
|
||||||
}
|
}
|
||||||
list := h.freqcache[:len(freq)+1]
|
list := h.freqcache[:len(freq)+1]
|
||||||
// Number of non-zero literals
|
// Number of non-zero literals
|
||||||
|
@ -342,3 +358,27 @@ func (s byFreq) Less(i, j int) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
|
||||||
|
// histogramSize accumulates a histogram of b in h.
|
||||||
|
// An estimated size in bits is returned.
|
||||||
|
// Unassigned values are assigned '1' in the histogram.
|
||||||
|
// len(h) must be >= 256, and h's elements must be all zeroes.
|
||||||
|
func histogramSize(b []byte, h []uint16, fill bool) int {
|
||||||
|
h = h[:256]
|
||||||
|
for _, t := range b {
|
||||||
|
h[t]++
|
||||||
|
}
|
||||||
|
invTotal := 1.0 / float64(len(b))
|
||||||
|
shannon := 0.0
|
||||||
|
single := math.Ceil(-math.Log2(invTotal))
|
||||||
|
for i, v := range h[:] {
|
||||||
|
if v > 0 {
|
||||||
|
n := float64(v)
|
||||||
|
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
|
||||||
|
} else if fill {
|
||||||
|
shannon += single
|
||||||
|
h[i] = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return int(shannon + 0.99)
|
||||||
|
}
|
||||||
|
|
|
@ -9,19 +9,24 @@ package flate
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"math/bits"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
maxCodeLen = 16 // max length of Huffman code
|
maxCodeLen = 16 // max length of Huffman code
|
||||||
|
maxCodeLenMask = 15 // mask for max length of Huffman code
|
||||||
// The next three numbers come from the RFC section 3.2.7, with the
|
// The next three numbers come from the RFC section 3.2.7, with the
|
||||||
// additional proviso in section 3.2.5 which implies that distance codes
|
// additional proviso in section 3.2.5 which implies that distance codes
|
||||||
// 30 and 31 should never occur in compressed data.
|
// 30 and 31 should never occur in compressed data.
|
||||||
maxNumLit = 286
|
maxNumLit = 286
|
||||||
maxNumDist = 30
|
maxNumDist = 30
|
||||||
numCodes = 19 // number of codes in Huffman meta-code
|
numCodes = 19 // number of codes in Huffman meta-code
|
||||||
|
|
||||||
|
debugDecode = false
|
||||||
)
|
)
|
||||||
|
|
||||||
// Initialize the fixedHuffmanDecoder only once upon first use.
|
// Initialize the fixedHuffmanDecoder only once upon first use.
|
||||||
|
@ -101,10 +106,10 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
type huffmanDecoder struct {
|
type huffmanDecoder struct {
|
||||||
min int // the minimum code length
|
min int // the minimum code length
|
||||||
chunks [huffmanNumChunks]uint32 // chunks as described above
|
chunks *[huffmanNumChunks]uint16 // chunks as described above
|
||||||
links [][]uint32 // overflow links
|
links [][]uint16 // overflow links
|
||||||
linkMask uint32 // mask the width of the link table
|
linkMask uint32 // mask the width of the link table
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize Huffman decoding tables from array of code lengths.
|
// Initialize Huffman decoding tables from array of code lengths.
|
||||||
|
@ -112,21 +117,24 @@ type huffmanDecoder struct {
|
||||||
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
|
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
|
||||||
// degenerate case where the tree has only a single symbol with length 1. Empty
|
// degenerate case where the tree has only a single symbol with length 1. Empty
|
||||||
// trees are permitted.
|
// trees are permitted.
|
||||||
func (h *huffmanDecoder) init(bits []int) bool {
|
func (h *huffmanDecoder) init(lengths []int) bool {
|
||||||
// Sanity enables additional runtime tests during Huffman
|
// Sanity enables additional runtime tests during Huffman
|
||||||
// table construction. It's intended to be used during
|
// table construction. It's intended to be used during
|
||||||
// development to supplement the currently ad-hoc unit tests.
|
// development to supplement the currently ad-hoc unit tests.
|
||||||
const sanity = false
|
const sanity = false
|
||||||
|
|
||||||
|
if h.chunks == nil {
|
||||||
|
h.chunks = &[huffmanNumChunks]uint16{}
|
||||||
|
}
|
||||||
if h.min != 0 {
|
if h.min != 0 {
|
||||||
*h = huffmanDecoder{}
|
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count number of codes of each length,
|
// Count number of codes of each length,
|
||||||
// compute min and max length.
|
// compute min and max length.
|
||||||
var count [maxCodeLen]int
|
var count [maxCodeLen]int
|
||||||
var min, max int
|
var min, max int
|
||||||
for _, n := range bits {
|
for _, n := range lengths {
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -136,7 +144,7 @@ func (h *huffmanDecoder) init(bits []int) bool {
|
||||||
if n > max {
|
if n > max {
|
||||||
max = n
|
max = n
|
||||||
}
|
}
|
||||||
count[n]++
|
count[n&maxCodeLenMask]++
|
||||||
}
|
}
|
||||||
|
|
||||||
// Empty tree. The decompressor.huffSym function will fail later if the tree
|
// Empty tree. The decompressor.huffSym function will fail later if the tree
|
||||||
|
@ -154,8 +162,8 @@ func (h *huffmanDecoder) init(bits []int) bool {
|
||||||
var nextcode [maxCodeLen]int
|
var nextcode [maxCodeLen]int
|
||||||
for i := min; i <= max; i++ {
|
for i := min; i <= max; i++ {
|
||||||
code <<= 1
|
code <<= 1
|
||||||
nextcode[i] = code
|
nextcode[i&maxCodeLenMask] = code
|
||||||
code += count[i]
|
code += count[i&maxCodeLenMask]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that the coding is complete (i.e., that we've
|
// Check that the coding is complete (i.e., that we've
|
||||||
|
@ -164,37 +172,56 @@ func (h *huffmanDecoder) init(bits []int) bool {
|
||||||
// accept degenerate single-code codings. See also
|
// accept degenerate single-code codings. See also
|
||||||
// TestDegenerateHuffmanCoding.
|
// TestDegenerateHuffmanCoding.
|
||||||
if code != 1<<uint(max) && !(code == 1 && max == 1) {
|
if code != 1<<uint(max) && !(code == 1 && max == 1) {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
h.min = min
|
h.min = min
|
||||||
|
chunks := h.chunks[:]
|
||||||
|
for i := range chunks {
|
||||||
|
chunks[i] = 0
|
||||||
|
}
|
||||||
|
|
||||||
if max > huffmanChunkBits {
|
if max > huffmanChunkBits {
|
||||||
numLinks := 1 << (uint(max) - huffmanChunkBits)
|
numLinks := 1 << (uint(max) - huffmanChunkBits)
|
||||||
h.linkMask = uint32(numLinks - 1)
|
h.linkMask = uint32(numLinks - 1)
|
||||||
|
|
||||||
// create link tables
|
// create link tables
|
||||||
link := nextcode[huffmanChunkBits+1] >> 1
|
link := nextcode[huffmanChunkBits+1] >> 1
|
||||||
h.links = make([][]uint32, huffmanNumChunks-link)
|
if cap(h.links) < huffmanNumChunks-link {
|
||||||
|
h.links = make([][]uint16, huffmanNumChunks-link)
|
||||||
|
} else {
|
||||||
|
h.links = h.links[:huffmanNumChunks-link]
|
||||||
|
}
|
||||||
for j := uint(link); j < huffmanNumChunks; j++ {
|
for j := uint(link); j < huffmanNumChunks; j++ {
|
||||||
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
|
reverse := int(bits.Reverse16(uint16(j)))
|
||||||
reverse >>= uint(16 - huffmanChunkBits)
|
reverse >>= uint(16 - huffmanChunkBits)
|
||||||
off := j - uint(link)
|
off := j - uint(link)
|
||||||
if sanity && h.chunks[reverse] != 0 {
|
if sanity && h.chunks[reverse] != 0 {
|
||||||
panic("impossible: overwriting existing chunk")
|
panic("impossible: overwriting existing chunk")
|
||||||
}
|
}
|
||||||
h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
|
h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
|
||||||
h.links[off] = make([]uint32, numLinks)
|
if cap(h.links[off]) < numLinks {
|
||||||
|
h.links[off] = make([]uint16, numLinks)
|
||||||
|
} else {
|
||||||
|
links := h.links[off][:0]
|
||||||
|
h.links[off] = links[:numLinks]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
h.links = h.links[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, n := range bits {
|
for i, n := range lengths {
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
code := nextcode[n]
|
code := nextcode[n]
|
||||||
nextcode[n]++
|
nextcode[n]++
|
||||||
chunk := uint32(i<<huffmanValueShift | n)
|
chunk := uint16(i<<huffmanValueShift | n)
|
||||||
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
|
reverse := int(bits.Reverse16(uint16(code)))
|
||||||
reverse >>= uint(16 - n)
|
reverse >>= uint(16 - n)
|
||||||
if n <= huffmanChunkBits {
|
if n <= huffmanChunkBits {
|
||||||
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
|
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
|
||||||
|
@ -326,6 +353,9 @@ func (f *decompressor) nextBlock() {
|
||||||
f.huffmanBlock()
|
f.huffmanBlock()
|
||||||
default:
|
default:
|
||||||
// 3 is reserved.
|
// 3 is reserved.
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("reserved data block encountered")
|
||||||
|
}
|
||||||
f.err = CorruptInputError(f.roffset)
|
f.err = CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -404,11 +434,17 @@ func (f *decompressor) readHuffman() error {
|
||||||
}
|
}
|
||||||
nlit := int(f.b&0x1F) + 257
|
nlit := int(f.b&0x1F) + 257
|
||||||
if nlit > maxNumLit {
|
if nlit > maxNumLit {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("nlit > maxNumLit", nlit)
|
||||||
|
}
|
||||||
return CorruptInputError(f.roffset)
|
return CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
f.b >>= 5
|
f.b >>= 5
|
||||||
ndist := int(f.b&0x1F) + 1
|
ndist := int(f.b&0x1F) + 1
|
||||||
if ndist > maxNumDist {
|
if ndist > maxNumDist {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("ndist > maxNumDist", ndist)
|
||||||
|
}
|
||||||
return CorruptInputError(f.roffset)
|
return CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
f.b >>= 5
|
f.b >>= 5
|
||||||
|
@ -432,6 +468,9 @@ func (f *decompressor) readHuffman() error {
|
||||||
f.codebits[codeOrder[i]] = 0
|
f.codebits[codeOrder[i]] = 0
|
||||||
}
|
}
|
||||||
if !f.h1.init(f.codebits[0:]) {
|
if !f.h1.init(f.codebits[0:]) {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("init codebits failed")
|
||||||
|
}
|
||||||
return CorruptInputError(f.roffset)
|
return CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -459,6 +498,9 @@ func (f *decompressor) readHuffman() error {
|
||||||
rep = 3
|
rep = 3
|
||||||
nb = 2
|
nb = 2
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("i==0")
|
||||||
|
}
|
||||||
return CorruptInputError(f.roffset)
|
return CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
b = f.bits[i-1]
|
b = f.bits[i-1]
|
||||||
|
@ -473,6 +515,9 @@ func (f *decompressor) readHuffman() error {
|
||||||
}
|
}
|
||||||
for f.nb < nb {
|
for f.nb < nb {
|
||||||
if err := f.moreBits(); err != nil {
|
if err := f.moreBits(); err != nil {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("morebits:", err)
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -480,6 +525,9 @@ func (f *decompressor) readHuffman() error {
|
||||||
f.b >>= nb
|
f.b >>= nb
|
||||||
f.nb -= nb
|
f.nb -= nb
|
||||||
if i+rep > n {
|
if i+rep > n {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("i+rep > n", i, rep, n)
|
||||||
|
}
|
||||||
return CorruptInputError(f.roffset)
|
return CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
for j := 0; j < rep; j++ {
|
for j := 0; j < rep; j++ {
|
||||||
|
@ -489,6 +537,9 @@ func (f *decompressor) readHuffman() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
|
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("init2 failed")
|
||||||
|
}
|
||||||
return CorruptInputError(f.roffset)
|
return CorruptInputError(f.roffset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -566,12 +617,18 @@ readLiteral:
|
||||||
length = 258
|
length = 258
|
||||||
n = 0
|
n = 0
|
||||||
default:
|
default:
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println(v, ">= maxNumLit")
|
||||||
|
}
|
||||||
f.err = CorruptInputError(f.roffset)
|
f.err = CorruptInputError(f.roffset)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
for f.nb < n {
|
for f.nb < n {
|
||||||
if err = f.moreBits(); err != nil {
|
if err = f.moreBits(); err != nil {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("morebits n>0:", err)
|
||||||
|
}
|
||||||
f.err = err
|
f.err = err
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -585,15 +642,21 @@ readLiteral:
|
||||||
if f.hd == nil {
|
if f.hd == nil {
|
||||||
for f.nb < 5 {
|
for f.nb < 5 {
|
||||||
if err = f.moreBits(); err != nil {
|
if err = f.moreBits(); err != nil {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("morebits f.nb<5:", err)
|
||||||
|
}
|
||||||
f.err = err
|
f.err = err
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dist = int(reverseByte[(f.b&0x1F)<<3])
|
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
|
||||||
f.b >>= 5
|
f.b >>= 5
|
||||||
f.nb -= 5
|
f.nb -= 5
|
||||||
} else {
|
} else {
|
||||||
if dist, err = f.huffSym(f.hd); err != nil {
|
if dist, err = f.huffSym(f.hd); err != nil {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("huffsym:", err)
|
||||||
|
}
|
||||||
f.err = err
|
f.err = err
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -608,6 +671,9 @@ readLiteral:
|
||||||
extra := (dist & 1) << nb
|
extra := (dist & 1) << nb
|
||||||
for f.nb < nb {
|
for f.nb < nb {
|
||||||
if err = f.moreBits(); err != nil {
|
if err = f.moreBits(); err != nil {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("morebits f.nb<nb:", err)
|
||||||
|
}
|
||||||
f.err = err
|
f.err = err
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -617,12 +683,18 @@ readLiteral:
|
||||||
f.nb -= nb
|
f.nb -= nb
|
||||||
dist = 1<<(nb+1) + 1 + extra
|
dist = 1<<(nb+1) + 1 + extra
|
||||||
default:
|
default:
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("dist too big:", dist, maxNumDist)
|
||||||
|
}
|
||||||
f.err = CorruptInputError(f.roffset)
|
f.err = CorruptInputError(f.roffset)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// No check on length; encoding can be prescient.
|
// No check on length; encoding can be prescient.
|
||||||
if dist > f.dict.histSize() {
|
if dist > f.dict.histSize() {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
|
||||||
|
}
|
||||||
f.err = CorruptInputError(f.roffset)
|
f.err = CorruptInputError(f.roffset)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -661,15 +733,15 @@ func (f *decompressor) dataBlock() {
|
||||||
nr, err := io.ReadFull(f.r, f.buf[0:4])
|
nr, err := io.ReadFull(f.r, f.buf[0:4])
|
||||||
f.roffset += int64(nr)
|
f.roffset += int64(nr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == io.EOF {
|
f.err = noEOF(err)
|
||||||
err = io.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
f.err = err
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
n := int(f.buf[0]) | int(f.buf[1])<<8
|
n := int(f.buf[0]) | int(f.buf[1])<<8
|
||||||
nn := int(f.buf[2]) | int(f.buf[3])<<8
|
nn := int(f.buf[2]) | int(f.buf[3])<<8
|
||||||
if uint16(nn) != uint16(^n) {
|
if uint16(nn) != uint16(^n) {
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
|
||||||
|
}
|
||||||
f.err = CorruptInputError(f.roffset)
|
f.err = CorruptInputError(f.roffset)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -697,10 +769,7 @@ func (f *decompressor) copyData() {
|
||||||
f.copyLen -= cnt
|
f.copyLen -= cnt
|
||||||
f.dict.writeMark(cnt)
|
f.dict.writeMark(cnt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == io.EOF {
|
f.err = noEOF(err)
|
||||||
err = io.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
f.err = err
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -722,13 +791,18 @@ func (f *decompressor) finishBlock() {
|
||||||
f.step = (*decompressor).nextBlock
|
f.step = (*decompressor).nextBlock
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
|
||||||
|
func noEOF(e error) error {
|
||||||
|
if e == io.EOF {
|
||||||
|
return io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
func (f *decompressor) moreBits() error {
|
func (f *decompressor) moreBits() error {
|
||||||
c, err := f.r.ReadByte()
|
c, err := f.r.ReadByte()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == io.EOF {
|
return noEOF(err)
|
||||||
err = io.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
f.roffset++
|
f.roffset++
|
||||||
f.b |= uint32(c) << f.nb
|
f.b |= uint32(c) << f.nb
|
||||||
|
@ -743,25 +817,40 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
|
||||||
// cases, the chunks slice will be 0 for the invalid sequence, leading it
|
// cases, the chunks slice will be 0 for the invalid sequence, leading it
|
||||||
// satisfy the n == 0 check below.
|
// satisfy the n == 0 check below.
|
||||||
n := uint(h.min)
|
n := uint(h.min)
|
||||||
|
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
|
||||||
|
// but is smart enough to keep local variables in registers, so use nb and b,
|
||||||
|
// inline call to moreBits and reassign b,nb back to f on return.
|
||||||
|
nb, b := f.nb, f.b
|
||||||
for {
|
for {
|
||||||
for f.nb < n {
|
for nb < n {
|
||||||
if err := f.moreBits(); err != nil {
|
c, err := f.r.ReadByte()
|
||||||
return 0, err
|
if err != nil {
|
||||||
|
f.b = b
|
||||||
|
f.nb = nb
|
||||||
|
return 0, noEOF(err)
|
||||||
}
|
}
|
||||||
|
f.roffset++
|
||||||
|
b |= uint32(c) << (nb & 31)
|
||||||
|
nb += 8
|
||||||
}
|
}
|
||||||
chunk := h.chunks[f.b&(huffmanNumChunks-1)]
|
chunk := h.chunks[b&(huffmanNumChunks-1)]
|
||||||
n = uint(chunk & huffmanCountMask)
|
n = uint(chunk & huffmanCountMask)
|
||||||
if n > huffmanChunkBits {
|
if n > huffmanChunkBits {
|
||||||
chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
|
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
|
||||||
n = uint(chunk & huffmanCountMask)
|
n = uint(chunk & huffmanCountMask)
|
||||||
}
|
}
|
||||||
if n <= f.nb {
|
if n <= nb {
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
|
f.b = b
|
||||||
|
f.nb = nb
|
||||||
|
if debugDecode {
|
||||||
|
fmt.Println("huffsym: n==0")
|
||||||
|
}
|
||||||
f.err = CorruptInputError(f.roffset)
|
f.err = CorruptInputError(f.roffset)
|
||||||
return 0, f.err
|
return 0, f.err
|
||||||
}
|
}
|
||||||
f.b >>= n
|
f.b = b >> (n & 31)
|
||||||
f.nb -= n
|
f.nb = nb - n
|
||||||
return int(chunk >> huffmanValueShift), nil
|
return int(chunk >> huffmanValueShift), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -799,6 +888,8 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
|
||||||
r: makeReader(r),
|
r: makeReader(r),
|
||||||
bits: f.bits,
|
bits: f.bits,
|
||||||
codebits: f.codebits,
|
codebits: f.codebits,
|
||||||
|
h1: f.h1,
|
||||||
|
h2: f.h2,
|
||||||
dict: f.dict,
|
dict: f.dict,
|
||||||
step: (*decompressor).nextBlock,
|
step: (*decompressor).nextBlock,
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
// fastGen maintains the table for matches,
|
||||||
|
// and the previous byte block for level 2.
|
||||||
|
// This is the generic implementation.
|
||||||
|
type fastEncL1 struct {
|
||||||
|
fastGen
|
||||||
|
table [tableSize]tableEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// EncodeL1 uses a similar algorithm to level 1
|
||||||
|
func (e *fastEncL1) Encode(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 12 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
// Protect against e.cur wraparound.
|
||||||
|
for e.cur >= bufferReset {
|
||||||
|
if len(e.hist) == 0 {
|
||||||
|
for i := range e.table[:] {
|
||||||
|
e.table[i] = tableEntry{}
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
|
||||||
|
for i := range e.table[:] {
|
||||||
|
v := e.table[i].offset
|
||||||
|
if v <= minOff {
|
||||||
|
v = 0
|
||||||
|
} else {
|
||||||
|
v = v - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.table[i].offset = v
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
s := e.addBlock(src)
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override src
|
||||||
|
src = e.hist
|
||||||
|
nextEmit := s
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int32(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load3232(src, s)
|
||||||
|
|
||||||
|
for {
|
||||||
|
const skipLog = 5
|
||||||
|
const doEvery = 2
|
||||||
|
|
||||||
|
nextS := s
|
||||||
|
var candidate tableEntry
|
||||||
|
for {
|
||||||
|
nextHash := hash(cv)
|
||||||
|
candidate = e.table[nextHash]
|
||||||
|
nextS = s + doEvery + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
now := load6432(src, nextS)
|
||||||
|
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
|
||||||
|
nextHash = hash(uint32(now))
|
||||||
|
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset < maxMatchOffset && cv == candidate.val {
|
||||||
|
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do one right away...
|
||||||
|
cv = uint32(now)
|
||||||
|
s = nextS
|
||||||
|
nextS++
|
||||||
|
candidate = e.table[nextHash]
|
||||||
|
now >>= 8
|
||||||
|
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
|
||||||
|
|
||||||
|
offset = s - (candidate.offset - e.cur)
|
||||||
|
if offset < maxMatchOffset && cv == candidate.val {
|
||||||
|
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
cv = uint32(now)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
for {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
t := candidate.offset - e.cur
|
||||||
|
l := e.matchlenLong(s+4, t+4, src) + 4
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the match found
|
||||||
|
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
if s >= sLimit {
|
||||||
|
// Index first pair after match end.
|
||||||
|
if int(s+l+4) < len(src) {
|
||||||
|
cv := load3232(src, s)
|
||||||
|
e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
|
||||||
|
}
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-2 and at s. If
|
||||||
|
// another emitCopy is not our next move, also calculate nextHash
|
||||||
|
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
||||||
|
// are faster as one load64 call (with some shifts) instead of
|
||||||
|
// three load32 calls.
|
||||||
|
x := load6432(src, s-2)
|
||||||
|
o := e.cur + s - 2
|
||||||
|
prevHash := hash(uint32(x))
|
||||||
|
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
|
||||||
|
x >>= 16
|
||||||
|
currHash := hash(uint32(x))
|
||||||
|
candidate = e.table[currHash]
|
||||||
|
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
|
||||||
|
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset > maxMatchOffset || uint32(x) != candidate.val {
|
||||||
|
cv = uint32(x >> 8)
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,199 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
// fastGen maintains the table for matches,
|
||||||
|
// and the previous byte block for level 2.
|
||||||
|
// This is the generic implementation.
|
||||||
|
type fastEncL2 struct {
|
||||||
|
fastGen
|
||||||
|
table [bTableSize]tableEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// EncodeL2 uses a similar algorithm to level 1, but is capable
|
||||||
|
// of matching across blocks giving better compression at a small slowdown.
|
||||||
|
func (e *fastEncL2) Encode(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 12 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
// Protect against e.cur wraparound.
|
||||||
|
for e.cur >= bufferReset {
|
||||||
|
if len(e.hist) == 0 {
|
||||||
|
for i := range e.table[:] {
|
||||||
|
e.table[i] = tableEntry{}
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
|
||||||
|
for i := range e.table[:] {
|
||||||
|
v := e.table[i].offset
|
||||||
|
if v <= minOff {
|
||||||
|
v = 0
|
||||||
|
} else {
|
||||||
|
v = v - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.table[i].offset = v
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
s := e.addBlock(src)
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override src
|
||||||
|
src = e.hist
|
||||||
|
nextEmit := s
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int32(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load3232(src, s)
|
||||||
|
for {
|
||||||
|
// When should we start skipping if we haven't found matches in a long while.
|
||||||
|
const skipLog = 5
|
||||||
|
const doEvery = 2
|
||||||
|
|
||||||
|
nextS := s
|
||||||
|
var candidate tableEntry
|
||||||
|
for {
|
||||||
|
nextHash := hash4u(cv, bTableBits)
|
||||||
|
s = nextS
|
||||||
|
nextS = s + doEvery + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
candidate = e.table[nextHash]
|
||||||
|
now := load6432(src, nextS)
|
||||||
|
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
|
||||||
|
nextHash = hash4u(uint32(now), bTableBits)
|
||||||
|
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset < maxMatchOffset && cv == candidate.val {
|
||||||
|
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do one right away...
|
||||||
|
cv = uint32(now)
|
||||||
|
s = nextS
|
||||||
|
nextS++
|
||||||
|
candidate = e.table[nextHash]
|
||||||
|
now >>= 8
|
||||||
|
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
|
||||||
|
|
||||||
|
offset = s - (candidate.offset - e.cur)
|
||||||
|
if offset < maxMatchOffset && cv == candidate.val {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
cv = uint32(now)
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
|
||||||
|
// Call emitCopy, and then see if another emitCopy could be our next
|
||||||
|
// move. Repeat until we find no match for the input immediately after
|
||||||
|
// what was consumed by the last emitCopy call.
|
||||||
|
//
|
||||||
|
// If we exit this loop normally then we need to call emitLiteral next,
|
||||||
|
// though we don't yet know how big the literal will be. We handle that
|
||||||
|
// by proceeding to the next iteration of the main loop. We also can
|
||||||
|
// exit this loop via goto if we get close to exhausting the input.
|
||||||
|
for {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
t := candidate.offset - e.cur
|
||||||
|
l := e.matchlenLong(s+4, t+4, src) + 4
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if s >= sLimit {
|
||||||
|
// Index first pair after match end.
|
||||||
|
if int(s+l+4) < len(src) {
|
||||||
|
cv := load3232(src, s)
|
||||||
|
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
|
||||||
|
}
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store every second hash in-between, but offset by 1.
|
||||||
|
for i := s - l + 2; i < s-5; i += 7 {
|
||||||
|
x := load6432(src, int32(i))
|
||||||
|
nextHash := hash4u(uint32(x), bTableBits)
|
||||||
|
e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
|
||||||
|
// Skip one
|
||||||
|
x >>= 16
|
||||||
|
nextHash = hash4u(uint32(x), bTableBits)
|
||||||
|
e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
|
||||||
|
// Skip one
|
||||||
|
x >>= 16
|
||||||
|
nextHash = hash4u(uint32(x), bTableBits)
|
||||||
|
e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-2 to s. If
|
||||||
|
// another emitCopy is not our next move, also calculate nextHash
|
||||||
|
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
||||||
|
// are faster as one load64 call (with some shifts) instead of
|
||||||
|
// three load32 calls.
|
||||||
|
x := load6432(src, s-2)
|
||||||
|
o := e.cur + s - 2
|
||||||
|
prevHash := hash4u(uint32(x), bTableBits)
|
||||||
|
prevHash2 := hash4u(uint32(x>>8), bTableBits)
|
||||||
|
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
|
||||||
|
e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
|
||||||
|
currHash := hash4u(uint32(x>>16), bTableBits)
|
||||||
|
candidate = e.table[currHash]
|
||||||
|
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
|
||||||
|
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
|
||||||
|
cv = uint32(x >> 24)
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,225 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
// fastEncL3
|
||||||
|
type fastEncL3 struct {
|
||||||
|
fastGen
|
||||||
|
table [tableSize]tableEntryPrev
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode uses a similar algorithm to level 2, will check up to two candidates.
|
||||||
|
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 8 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
// Protect against e.cur wraparound.
|
||||||
|
for e.cur >= bufferReset {
|
||||||
|
if len(e.hist) == 0 {
|
||||||
|
for i := range e.table[:] {
|
||||||
|
e.table[i] = tableEntryPrev{}
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
|
||||||
|
for i := range e.table[:] {
|
||||||
|
v := e.table[i]
|
||||||
|
if v.Cur.offset <= minOff {
|
||||||
|
v.Cur.offset = 0
|
||||||
|
} else {
|
||||||
|
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
if v.Prev.offset <= minOff {
|
||||||
|
v.Prev.offset = 0
|
||||||
|
} else {
|
||||||
|
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.table[i] = v
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
s := e.addBlock(src)
|
||||||
|
|
||||||
|
// Skip if too small.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override src
|
||||||
|
src = e.hist
|
||||||
|
nextEmit := s
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int32(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load3232(src, s)
|
||||||
|
for {
|
||||||
|
const skipLog = 6
|
||||||
|
nextS := s
|
||||||
|
var candidate tableEntry
|
||||||
|
for {
|
||||||
|
nextHash := hash(cv)
|
||||||
|
s = nextS
|
||||||
|
nextS = s + 1 + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
candidates := e.table[nextHash]
|
||||||
|
now := load3232(src, nextS)
|
||||||
|
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
|
||||||
|
|
||||||
|
// Check both candidates
|
||||||
|
candidate = candidates.Cur
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if cv == candidate.val {
|
||||||
|
if offset > maxMatchOffset {
|
||||||
|
cv = now
|
||||||
|
// Previous will also be invalid, we have nothing.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
o2 := s - (candidates.Prev.offset - e.cur)
|
||||||
|
if cv != candidates.Prev.val || o2 > maxMatchOffset {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Both match and are valid, pick longest.
|
||||||
|
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
|
||||||
|
if l2 > l1 {
|
||||||
|
candidate = candidates.Prev
|
||||||
|
}
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
// We only check if value mismatches.
|
||||||
|
// Offset will always be invalid in other cases.
|
||||||
|
candidate = candidates.Prev
|
||||||
|
if cv == candidate.val {
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset <= maxMatchOffset {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv = now
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call emitCopy, and then see if another emitCopy could be our next
|
||||||
|
// move. Repeat until we find no match for the input immediately after
|
||||||
|
// what was consumed by the last emitCopy call.
|
||||||
|
//
|
||||||
|
// If we exit this loop normally then we need to call emitLiteral next,
|
||||||
|
// though we don't yet know how big the literal will be. We handle that
|
||||||
|
// by proceeding to the next iteration of the main loop. We also can
|
||||||
|
// exit this loop via goto if we get close to exhausting the input.
|
||||||
|
for {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
//
|
||||||
|
t := candidate.offset - e.cur
|
||||||
|
l := e.matchlenLong(s+4, t+4, src) + 4
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if s >= sLimit {
|
||||||
|
t += l
|
||||||
|
// Index first pair after match end.
|
||||||
|
if int(t+4) < len(src) && t > 0 {
|
||||||
|
cv := load3232(src, t)
|
||||||
|
nextHash := hash(cv)
|
||||||
|
e.table[nextHash] = tableEntryPrev{
|
||||||
|
Prev: e.table[nextHash].Cur,
|
||||||
|
Cur: tableEntry{offset: e.cur + t, val: cv},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-3 to s.
|
||||||
|
x := load6432(src, s-3)
|
||||||
|
prevHash := hash(uint32(x))
|
||||||
|
e.table[prevHash] = tableEntryPrev{
|
||||||
|
Prev: e.table[prevHash].Cur,
|
||||||
|
Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
|
||||||
|
}
|
||||||
|
x >>= 8
|
||||||
|
prevHash = hash(uint32(x))
|
||||||
|
|
||||||
|
e.table[prevHash] = tableEntryPrev{
|
||||||
|
Prev: e.table[prevHash].Cur,
|
||||||
|
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
|
||||||
|
}
|
||||||
|
x >>= 8
|
||||||
|
prevHash = hash(uint32(x))
|
||||||
|
|
||||||
|
e.table[prevHash] = tableEntryPrev{
|
||||||
|
Prev: e.table[prevHash].Cur,
|
||||||
|
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
|
||||||
|
}
|
||||||
|
x >>= 8
|
||||||
|
currHash := hash(uint32(x))
|
||||||
|
candidates := e.table[currHash]
|
||||||
|
cv = uint32(x)
|
||||||
|
e.table[currHash] = tableEntryPrev{
|
||||||
|
Prev: candidates.Cur,
|
||||||
|
Cur: tableEntry{offset: s + e.cur, val: cv},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check both candidates
|
||||||
|
candidate = candidates.Cur
|
||||||
|
if cv == candidate.val {
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset <= maxMatchOffset {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// We only check if value mismatches.
|
||||||
|
// Offset will always be invalid in other cases.
|
||||||
|
candidate = candidates.Prev
|
||||||
|
if cv == candidate.val {
|
||||||
|
offset := s - (candidate.offset - e.cur)
|
||||||
|
if offset <= maxMatchOffset {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cv = uint32(x >> 8)
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,210 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
type fastEncL4 struct {
|
||||||
|
fastGen
|
||||||
|
table [tableSize]tableEntry
|
||||||
|
bTable [tableSize]tableEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *fastEncL4) Encode(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 12 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
// Protect against e.cur wraparound.
|
||||||
|
for e.cur >= bufferReset {
|
||||||
|
if len(e.hist) == 0 {
|
||||||
|
for i := range e.table[:] {
|
||||||
|
e.table[i] = tableEntry{}
|
||||||
|
}
|
||||||
|
for i := range e.bTable[:] {
|
||||||
|
e.bTable[i] = tableEntry{}
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
|
||||||
|
for i := range e.table[:] {
|
||||||
|
v := e.table[i].offset
|
||||||
|
if v <= minOff {
|
||||||
|
v = 0
|
||||||
|
} else {
|
||||||
|
v = v - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.table[i].offset = v
|
||||||
|
}
|
||||||
|
for i := range e.bTable[:] {
|
||||||
|
v := e.bTable[i].offset
|
||||||
|
if v <= minOff {
|
||||||
|
v = 0
|
||||||
|
} else {
|
||||||
|
v = v - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.bTable[i].offset = v
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
s := e.addBlock(src)
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override src
|
||||||
|
src = e.hist
|
||||||
|
nextEmit := s
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int32(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load6432(src, s)
|
||||||
|
for {
|
||||||
|
const skipLog = 6
|
||||||
|
const doEvery = 1
|
||||||
|
|
||||||
|
nextS := s
|
||||||
|
var t int32
|
||||||
|
for {
|
||||||
|
nextHashS := hash4x64(cv, tableBits)
|
||||||
|
nextHashL := hash7(cv, tableBits)
|
||||||
|
|
||||||
|
s = nextS
|
||||||
|
nextS = s + doEvery + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
// Fetch a short+long candidate
|
||||||
|
sCandidate := e.table[nextHashS]
|
||||||
|
lCandidate := e.bTable[nextHashL]
|
||||||
|
next := load6432(src, nextS)
|
||||||
|
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
|
||||||
|
e.table[nextHashS] = entry
|
||||||
|
e.bTable[nextHashL] = entry
|
||||||
|
|
||||||
|
t = lCandidate.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset && uint32(cv) == lCandidate.val {
|
||||||
|
// We got a long match. Use that.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
t = sCandidate.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
|
||||||
|
// Found a 4 match...
|
||||||
|
lCandidate = e.bTable[hash7(next, tableBits)]
|
||||||
|
|
||||||
|
// If the next long is a candidate, check if we should use that instead...
|
||||||
|
lOff := nextS - (lCandidate.offset - e.cur)
|
||||||
|
if lOff < maxMatchOffset && lCandidate.val == uint32(next) {
|
||||||
|
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
|
||||||
|
if l2 > l1 {
|
||||||
|
s = nextS
|
||||||
|
t = lCandidate.offset - e.cur
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
cv = next
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
l := e.matchlenLong(s+4, t+4, src) + 4
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
if false {
|
||||||
|
if t >= s {
|
||||||
|
panic("s-t")
|
||||||
|
}
|
||||||
|
if (s - t) > maxMatchOffset {
|
||||||
|
panic(fmt.Sprintln("mmo", t))
|
||||||
|
}
|
||||||
|
if l < baseMatchLength {
|
||||||
|
panic("bml")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if s >= sLimit {
|
||||||
|
// Index first pair after match end.
|
||||||
|
if int(s+8) < len(src) {
|
||||||
|
cv := load6432(src, s)
|
||||||
|
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
|
||||||
|
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
|
||||||
|
}
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store every 3rd hash in-between
|
||||||
|
if true {
|
||||||
|
i := nextS
|
||||||
|
if i < s-1 {
|
||||||
|
cv := load6432(src, i)
|
||||||
|
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
|
||||||
|
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
|
||||||
|
e.bTable[hash7(cv, tableBits)] = t
|
||||||
|
e.bTable[hash7(cv>>8, tableBits)] = t2
|
||||||
|
e.table[hash4u(t2.val, tableBits)] = t2
|
||||||
|
|
||||||
|
i += 3
|
||||||
|
for ; i < s-1; i += 3 {
|
||||||
|
cv := load6432(src, i)
|
||||||
|
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
|
||||||
|
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
|
||||||
|
e.bTable[hash7(cv, tableBits)] = t
|
||||||
|
e.bTable[hash7(cv>>8, tableBits)] = t2
|
||||||
|
e.table[hash4u(t2.val, tableBits)] = t2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-1 and at s.
|
||||||
|
x := load6432(src, s-1)
|
||||||
|
o := e.cur + s - 1
|
||||||
|
prevHashS := hash4x64(x, tableBits)
|
||||||
|
prevHashL := hash7(x, tableBits)
|
||||||
|
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
|
||||||
|
e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)}
|
||||||
|
cv = x >> 8
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,276 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
type fastEncL5 struct {
|
||||||
|
fastGen
|
||||||
|
table [tableSize]tableEntry
|
||||||
|
bTable [tableSize]tableEntryPrev
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *fastEncL5) Encode(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 12 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
// Protect against e.cur wraparound.
|
||||||
|
for e.cur >= bufferReset {
|
||||||
|
if len(e.hist) == 0 {
|
||||||
|
for i := range e.table[:] {
|
||||||
|
e.table[i] = tableEntry{}
|
||||||
|
}
|
||||||
|
for i := range e.bTable[:] {
|
||||||
|
e.bTable[i] = tableEntryPrev{}
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
|
||||||
|
for i := range e.table[:] {
|
||||||
|
v := e.table[i].offset
|
||||||
|
if v <= minOff {
|
||||||
|
v = 0
|
||||||
|
} else {
|
||||||
|
v = v - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.table[i].offset = v
|
||||||
|
}
|
||||||
|
for i := range e.bTable[:] {
|
||||||
|
v := e.bTable[i]
|
||||||
|
if v.Cur.offset <= minOff {
|
||||||
|
v.Cur.offset = 0
|
||||||
|
v.Prev.offset = 0
|
||||||
|
} else {
|
||||||
|
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
|
||||||
|
if v.Prev.offset <= minOff {
|
||||||
|
v.Prev.offset = 0
|
||||||
|
} else {
|
||||||
|
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
e.bTable[i] = v
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
s := e.addBlock(src)
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override src
|
||||||
|
src = e.hist
|
||||||
|
nextEmit := s
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int32(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load6432(src, s)
|
||||||
|
for {
|
||||||
|
const skipLog = 6
|
||||||
|
const doEvery = 1
|
||||||
|
|
||||||
|
nextS := s
|
||||||
|
var l int32
|
||||||
|
var t int32
|
||||||
|
for {
|
||||||
|
nextHashS := hash4x64(cv, tableBits)
|
||||||
|
nextHashL := hash7(cv, tableBits)
|
||||||
|
|
||||||
|
s = nextS
|
||||||
|
nextS = s + doEvery + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
// Fetch a short+long candidate
|
||||||
|
sCandidate := e.table[nextHashS]
|
||||||
|
lCandidate := e.bTable[nextHashL]
|
||||||
|
next := load6432(src, nextS)
|
||||||
|
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
|
||||||
|
e.table[nextHashS] = entry
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = entry, eLong.Cur
|
||||||
|
|
||||||
|
nextHashS = hash4x64(next, tableBits)
|
||||||
|
nextHashL = hash7(next, tableBits)
|
||||||
|
|
||||||
|
t = lCandidate.Cur.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset {
|
||||||
|
if uint32(cv) == lCandidate.Cur.val {
|
||||||
|
// Store the next match
|
||||||
|
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
|
||||||
|
|
||||||
|
t2 := lCandidate.Prev.offset - e.cur
|
||||||
|
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
|
||||||
|
l = e.matchlen(s+4, t+4, src) + 4
|
||||||
|
ml1 := e.matchlen(s+4, t2+4, src) + 4
|
||||||
|
if ml1 > l {
|
||||||
|
t = t2
|
||||||
|
l = ml1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t = lCandidate.Prev.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
|
||||||
|
// Store the next match
|
||||||
|
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t = sCandidate.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
|
||||||
|
// Found a 4 match...
|
||||||
|
l = e.matchlen(s+4, t+4, src) + 4
|
||||||
|
lCandidate = e.bTable[nextHashL]
|
||||||
|
// Store the next match
|
||||||
|
|
||||||
|
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
|
||||||
|
|
||||||
|
// If the next long is a candidate, use that...
|
||||||
|
t2 := lCandidate.Cur.offset - e.cur
|
||||||
|
if nextS-t2 < maxMatchOffset {
|
||||||
|
if lCandidate.Cur.val == uint32(next) {
|
||||||
|
ml := e.matchlen(nextS+4, t2+4, src) + 4
|
||||||
|
if ml > l {
|
||||||
|
t = t2
|
||||||
|
s = nextS
|
||||||
|
l = ml
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If the previous long is a candidate, use that...
|
||||||
|
t2 = lCandidate.Prev.offset - e.cur
|
||||||
|
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
|
||||||
|
ml := e.matchlen(nextS+4, t2+4, src) + 4
|
||||||
|
if ml > l {
|
||||||
|
t = t2
|
||||||
|
s = nextS
|
||||||
|
l = ml
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
cv = next
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
if l == 0 {
|
||||||
|
l = e.matchlenLong(s+4, t+4, src) + 4
|
||||||
|
} else if l == maxMatchLength {
|
||||||
|
l += e.matchlenLong(s+l, t+l, src)
|
||||||
|
}
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
if false {
|
||||||
|
if t >= s {
|
||||||
|
panic(fmt.Sprintln("s-t", s, t))
|
||||||
|
}
|
||||||
|
if (s - t) > maxMatchOffset {
|
||||||
|
panic(fmt.Sprintln("mmo", s-t))
|
||||||
|
}
|
||||||
|
if l < baseMatchLength {
|
||||||
|
panic("bml")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store every 3rd hash in-between.
|
||||||
|
if true {
|
||||||
|
const hashEvery = 3
|
||||||
|
i := s - l + 1
|
||||||
|
if i < s-1 {
|
||||||
|
cv := load6432(src, i)
|
||||||
|
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
|
||||||
|
e.table[hash4x64(cv, tableBits)] = t
|
||||||
|
eLong := &e.bTable[hash7(cv, tableBits)]
|
||||||
|
eLong.Cur, eLong.Prev = t, eLong.Cur
|
||||||
|
|
||||||
|
// Do an long at i+1
|
||||||
|
cv >>= 8
|
||||||
|
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
|
||||||
|
eLong = &e.bTable[hash7(cv, tableBits)]
|
||||||
|
eLong.Cur, eLong.Prev = t, eLong.Cur
|
||||||
|
|
||||||
|
// We only have enough bits for a short entry at i+2
|
||||||
|
cv >>= 8
|
||||||
|
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
|
||||||
|
e.table[hash4x64(cv, tableBits)] = t
|
||||||
|
|
||||||
|
// Skip one - otherwise we risk hitting 's'
|
||||||
|
i += 4
|
||||||
|
for ; i < s-1; i += hashEvery {
|
||||||
|
cv := load6432(src, i)
|
||||||
|
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
|
||||||
|
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
|
||||||
|
eLong := &e.bTable[hash7(cv, tableBits)]
|
||||||
|
eLong.Cur, eLong.Prev = t, eLong.Cur
|
||||||
|
e.table[hash4u(t2.val, tableBits)] = t2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-1 and at s.
|
||||||
|
x := load6432(src, s-1)
|
||||||
|
o := e.cur + s - 1
|
||||||
|
prevHashS := hash4x64(x, tableBits)
|
||||||
|
prevHashL := hash7(x, tableBits)
|
||||||
|
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
|
||||||
|
eLong := &e.bTable[prevHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur
|
||||||
|
cv = x >> 8
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,279 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
type fastEncL6 struct {
|
||||||
|
fastGen
|
||||||
|
table [tableSize]tableEntry
|
||||||
|
bTable [tableSize]tableEntryPrev
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *fastEncL6) Encode(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 12 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
// Protect against e.cur wraparound.
|
||||||
|
for e.cur >= bufferReset {
|
||||||
|
if len(e.hist) == 0 {
|
||||||
|
for i := range e.table[:] {
|
||||||
|
e.table[i] = tableEntry{}
|
||||||
|
}
|
||||||
|
for i := range e.bTable[:] {
|
||||||
|
e.bTable[i] = tableEntryPrev{}
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
|
||||||
|
for i := range e.table[:] {
|
||||||
|
v := e.table[i].offset
|
||||||
|
if v <= minOff {
|
||||||
|
v = 0
|
||||||
|
} else {
|
||||||
|
v = v - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
e.table[i].offset = v
|
||||||
|
}
|
||||||
|
for i := range e.bTable[:] {
|
||||||
|
v := e.bTable[i]
|
||||||
|
if v.Cur.offset <= minOff {
|
||||||
|
v.Cur.offset = 0
|
||||||
|
v.Prev.offset = 0
|
||||||
|
} else {
|
||||||
|
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
|
||||||
|
if v.Prev.offset <= minOff {
|
||||||
|
v.Prev.offset = 0
|
||||||
|
} else {
|
||||||
|
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
e.bTable[i] = v
|
||||||
|
}
|
||||||
|
e.cur = maxMatchOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
s := e.addBlock(src)
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Override src
|
||||||
|
src = e.hist
|
||||||
|
nextEmit := s
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int32(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load6432(src, s)
|
||||||
|
// Repeat MUST be > 1 and within range
|
||||||
|
repeat := int32(1)
|
||||||
|
for {
|
||||||
|
const skipLog = 7
|
||||||
|
const doEvery = 1
|
||||||
|
|
||||||
|
nextS := s
|
||||||
|
var l int32
|
||||||
|
var t int32
|
||||||
|
for {
|
||||||
|
nextHashS := hash4x64(cv, tableBits)
|
||||||
|
nextHashL := hash7(cv, tableBits)
|
||||||
|
s = nextS
|
||||||
|
nextS = s + doEvery + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
// Fetch a short+long candidate
|
||||||
|
sCandidate := e.table[nextHashS]
|
||||||
|
lCandidate := e.bTable[nextHashL]
|
||||||
|
next := load6432(src, nextS)
|
||||||
|
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
|
||||||
|
e.table[nextHashS] = entry
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = entry, eLong.Cur
|
||||||
|
|
||||||
|
// Calculate hashes of 'next'
|
||||||
|
nextHashS = hash4x64(next, tableBits)
|
||||||
|
nextHashL = hash7(next, tableBits)
|
||||||
|
|
||||||
|
t = lCandidate.Cur.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset {
|
||||||
|
if uint32(cv) == lCandidate.Cur.val {
|
||||||
|
// Long candidate matches at least 4 bytes.
|
||||||
|
|
||||||
|
// Store the next match
|
||||||
|
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
|
||||||
|
|
||||||
|
// Check the previous long candidate as well.
|
||||||
|
t2 := lCandidate.Prev.offset - e.cur
|
||||||
|
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
|
||||||
|
l = e.matchlen(s+4, t+4, src) + 4
|
||||||
|
ml1 := e.matchlen(s+4, t2+4, src) + 4
|
||||||
|
if ml1 > l {
|
||||||
|
t = t2
|
||||||
|
l = ml1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Current value did not match, but check if previous long value does.
|
||||||
|
t = lCandidate.Prev.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
|
||||||
|
// Store the next match
|
||||||
|
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t = sCandidate.offset - e.cur
|
||||||
|
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
|
||||||
|
// Found a 4 match...
|
||||||
|
l = e.matchlen(s+4, t+4, src) + 4
|
||||||
|
|
||||||
|
// Look up next long candidate (at nextS)
|
||||||
|
lCandidate = e.bTable[nextHashL]
|
||||||
|
|
||||||
|
// Store the next match
|
||||||
|
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
|
||||||
|
eLong := &e.bTable[nextHashL]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
|
||||||
|
|
||||||
|
// Check repeat at s + repOff
|
||||||
|
const repOff = 1
|
||||||
|
t2 := s - repeat + repOff
|
||||||
|
if load3232(src, t2) == uint32(cv>>(8*repOff)) {
|
||||||
|
ml := e.matchlen(s+4+repOff, t2+4, src) + 4
|
||||||
|
if ml > l {
|
||||||
|
t = t2
|
||||||
|
l = ml
|
||||||
|
s += repOff
|
||||||
|
// Not worth checking more.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the next long is a candidate, use that...
|
||||||
|
t2 = lCandidate.Cur.offset - e.cur
|
||||||
|
if nextS-t2 < maxMatchOffset {
|
||||||
|
if lCandidate.Cur.val == uint32(next) {
|
||||||
|
ml := e.matchlen(nextS+4, t2+4, src) + 4
|
||||||
|
if ml > l {
|
||||||
|
t = t2
|
||||||
|
s = nextS
|
||||||
|
l = ml
|
||||||
|
// This is ok, but check previous as well.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If the previous long is a candidate, use that...
|
||||||
|
t2 = lCandidate.Prev.offset - e.cur
|
||||||
|
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
|
||||||
|
ml := e.matchlen(nextS+4, t2+4, src) + 4
|
||||||
|
if ml > l {
|
||||||
|
t = t2
|
||||||
|
s = nextS
|
||||||
|
l = ml
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
cv = next
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
if l == 0 {
|
||||||
|
l = e.matchlenLong(s+4, t+4, src) + 4
|
||||||
|
} else if l == maxMatchLength {
|
||||||
|
l += e.matchlenLong(s+l, t+l, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
if false {
|
||||||
|
if t >= s {
|
||||||
|
panic(fmt.Sprintln("s-t", s, t))
|
||||||
|
}
|
||||||
|
if (s - t) > maxMatchOffset {
|
||||||
|
panic(fmt.Sprintln("mmo", s-t))
|
||||||
|
}
|
||||||
|
if l < baseMatchLength {
|
||||||
|
panic("bml")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
|
||||||
|
repeat = s - t
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if s >= sLimit {
|
||||||
|
// Index after match end.
|
||||||
|
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
|
||||||
|
cv := load6432(src, i)
|
||||||
|
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)}
|
||||||
|
eLong := &e.bTable[hash7(cv, tableBits)]
|
||||||
|
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur
|
||||||
|
}
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store every long hash in-between and every second short.
|
||||||
|
if true {
|
||||||
|
for i := nextS + 1; i < s-1; i += 2 {
|
||||||
|
cv := load6432(src, i)
|
||||||
|
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
|
||||||
|
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
|
||||||
|
eLong := &e.bTable[hash7(cv, tableBits)]
|
||||||
|
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
|
||||||
|
e.table[hash4x64(cv, tableBits)] = t
|
||||||
|
eLong.Cur, eLong.Prev = t, eLong.Cur
|
||||||
|
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-1 and at s.
|
||||||
|
cv = load6432(src, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,48 +0,0 @@
|
||||||
// Copyright 2009 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package flate
|
|
||||||
|
|
||||||
var reverseByte = [256]byte{
|
|
||||||
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
|
|
||||||
0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
|
|
||||||
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
|
|
||||||
0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
|
|
||||||
0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
|
|
||||||
0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
|
|
||||||
0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
|
|
||||||
0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
|
|
||||||
0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
|
|
||||||
0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
|
|
||||||
0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
|
|
||||||
0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
|
|
||||||
0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
|
|
||||||
0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
|
|
||||||
0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
|
|
||||||
0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
|
|
||||||
0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
|
|
||||||
0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
|
|
||||||
0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
|
|
||||||
0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
|
|
||||||
0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
|
|
||||||
0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
|
|
||||||
0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
|
|
||||||
0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
|
|
||||||
0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
|
|
||||||
0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
|
|
||||||
0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
|
|
||||||
0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
|
|
||||||
0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
|
|
||||||
0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
|
|
||||||
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
|
|
||||||
0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
|
|
||||||
}
|
|
||||||
|
|
||||||
func reverseUint16(v uint16) uint16 {
|
|
||||||
return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8
|
|
||||||
}
|
|
||||||
|
|
||||||
func reverseBits(number uint16, bitLength byte) uint16 {
|
|
||||||
return reverseUint16(number << uint8(16-bitLength))
|
|
||||||
}
|
|
|
@ -1,856 +0,0 @@
|
||||||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
|
||||||
// Modified for deflate by Klaus Post (c) 2015.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package flate
|
|
||||||
|
|
||||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
|
||||||
func emitLiteral(dst *tokens, lit []byte) {
|
|
||||||
ol := int(dst.n)
|
|
||||||
for i, v := range lit {
|
|
||||||
dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
|
|
||||||
}
|
|
||||||
dst.n += uint16(len(lit))
|
|
||||||
}
|
|
||||||
|
|
||||||
// emitCopy writes a copy chunk and returns the number of bytes written.
|
|
||||||
func emitCopy(dst *tokens, offset, length int) {
|
|
||||||
dst.tokens[dst.n] = matchToken(uint32(length-3), uint32(offset-minOffsetSize))
|
|
||||||
dst.n++
|
|
||||||
}
|
|
||||||
|
|
||||||
type snappyEnc interface {
|
|
||||||
Encode(dst *tokens, src []byte)
|
|
||||||
Reset()
|
|
||||||
}
|
|
||||||
|
|
||||||
func newSnappy(level int) snappyEnc {
|
|
||||||
switch level {
|
|
||||||
case 1:
|
|
||||||
return &snappyL1{}
|
|
||||||
case 2:
|
|
||||||
return &snappyL2{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
|
|
||||||
case 3:
|
|
||||||
return &snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
|
|
||||||
case 4:
|
|
||||||
return &snappyL4{snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}}
|
|
||||||
default:
|
|
||||||
panic("invalid level specified")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
tableBits = 14 // Bits used in the table
|
|
||||||
tableSize = 1 << tableBits // Size of the table
|
|
||||||
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
|
|
||||||
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
|
|
||||||
baseMatchOffset = 1 // The smallest match offset
|
|
||||||
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
|
|
||||||
maxMatchOffset = 1 << 15 // The largest match offset
|
|
||||||
)
|
|
||||||
|
|
||||||
func load32(b []byte, i int) uint32 {
|
|
||||||
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
|
||||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
|
||||||
}
|
|
||||||
|
|
||||||
func load64(b []byte, i int) uint64 {
|
|
||||||
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
|
||||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
|
||||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
|
||||||
}
|
|
||||||
|
|
||||||
func hash(u uint32) uint32 {
|
|
||||||
return (u * 0x1e35a7bd) >> tableShift
|
|
||||||
}
|
|
||||||
|
|
||||||
// snappyL1 encapsulates level 1 compression
|
|
||||||
type snappyL1 struct{}
|
|
||||||
|
|
||||||
func (e *snappyL1) Reset() {}
|
|
||||||
|
|
||||||
func (e *snappyL1) Encode(dst *tokens, src []byte) {
|
|
||||||
const (
|
|
||||||
inputMargin = 16 - 1
|
|
||||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
|
||||||
)
|
|
||||||
|
|
||||||
// This check isn't in the Snappy implementation, but there, the caller
|
|
||||||
// instead of the callee handles this case.
|
|
||||||
if len(src) < minNonLiteralBlockSize {
|
|
||||||
// We do not fill the token table.
|
|
||||||
// This will be picked up by caller.
|
|
||||||
dst.n = uint16(len(src))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the hash table.
|
|
||||||
//
|
|
||||||
// The table element type is uint16, as s < sLimit and sLimit < len(src)
|
|
||||||
// and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
|
|
||||||
var table [tableSize]uint16
|
|
||||||
|
|
||||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
|
||||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
|
||||||
// looking for copies.
|
|
||||||
sLimit := len(src) - inputMargin
|
|
||||||
|
|
||||||
// nextEmit is where in src the next emitLiteral should start from.
|
|
||||||
nextEmit := 0
|
|
||||||
|
|
||||||
// The encoded form must start with a literal, as there are no previous
|
|
||||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
|
||||||
s := 1
|
|
||||||
nextHash := hash(load32(src, s))
|
|
||||||
|
|
||||||
for {
|
|
||||||
// Copied from the C++ snappy implementation:
|
|
||||||
//
|
|
||||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
|
||||||
// found, start looking only at every other byte. If 32 more bytes are
|
|
||||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
|
||||||
// is found, immediately go back to looking at every byte. This is a
|
|
||||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
|
||||||
// due to more bookkeeping, but for non-compressible data (such as
|
|
||||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
|
||||||
// data is incompressible and doesn't bother looking for matches
|
|
||||||
// everywhere.
|
|
||||||
//
|
|
||||||
// The "skip" variable keeps track of how many bytes there are since
|
|
||||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
|
||||||
// the number of bytes to move ahead for each iteration.
|
|
||||||
skip := 32
|
|
||||||
|
|
||||||
nextS := s
|
|
||||||
candidate := 0
|
|
||||||
for {
|
|
||||||
s = nextS
|
|
||||||
bytesBetweenHashLookups := skip >> 5
|
|
||||||
nextS = s + bytesBetweenHashLookups
|
|
||||||
skip += bytesBetweenHashLookups
|
|
||||||
if nextS > sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
candidate = int(table[nextHash&tableMask])
|
|
||||||
table[nextHash&tableMask] = uint16(s)
|
|
||||||
nextHash = hash(load32(src, nextS))
|
|
||||||
// TODO: < should be <=, and add a test for that.
|
|
||||||
if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
|
||||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
|
||||||
// them as literal bytes.
|
|
||||||
emitLiteral(dst, src[nextEmit:s])
|
|
||||||
|
|
||||||
// Call emitCopy, and then see if another emitCopy could be our next
|
|
||||||
// move. Repeat until we find no match for the input immediately after
|
|
||||||
// what was consumed by the last emitCopy call.
|
|
||||||
//
|
|
||||||
// If we exit this loop normally then we need to call emitLiteral next,
|
|
||||||
// though we don't yet know how big the literal will be. We handle that
|
|
||||||
// by proceeding to the next iteration of the main loop. We also can
|
|
||||||
// exit this loop via goto if we get close to exhausting the input.
|
|
||||||
for {
|
|
||||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
|
||||||
// literal bytes prior to s.
|
|
||||||
base := s
|
|
||||||
|
|
||||||
// Extend the 4-byte match as long as possible.
|
|
||||||
//
|
|
||||||
// This is an inlined version of Snappy's:
|
|
||||||
// s = extendMatch(src, candidate+4, s+4)
|
|
||||||
s += 4
|
|
||||||
s1 := base + maxMatchLength
|
|
||||||
if s1 > len(src) {
|
|
||||||
s1 = len(src)
|
|
||||||
}
|
|
||||||
a := src[s:s1]
|
|
||||||
b := src[candidate+4:]
|
|
||||||
b = b[:len(a)]
|
|
||||||
l := len(a)
|
|
||||||
for i := range a {
|
|
||||||
if a[i] != b[i] {
|
|
||||||
l = i
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s += l
|
|
||||||
|
|
||||||
// matchToken is flate's equivalent of Snappy's emitCopy.
|
|
||||||
dst.tokens[dst.n] = matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset))
|
|
||||||
dst.n++
|
|
||||||
nextEmit = s
|
|
||||||
if s >= sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
|
|
||||||
// We could immediately start working at s now, but to improve
|
|
||||||
// compression we first update the hash table at s-1 and at s. If
|
|
||||||
// another emitCopy is not our next move, also calculate nextHash
|
|
||||||
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
|
||||||
// are faster as one load64 call (with some shifts) instead of
|
|
||||||
// three load32 calls.
|
|
||||||
x := load64(src, s-1)
|
|
||||||
prevHash := hash(uint32(x >> 0))
|
|
||||||
table[prevHash&tableMask] = uint16(s - 1)
|
|
||||||
currHash := hash(uint32(x >> 8))
|
|
||||||
candidate = int(table[currHash&tableMask])
|
|
||||||
table[currHash&tableMask] = uint16(s)
|
|
||||||
// TODO: >= should be >, and add a test for that.
|
|
||||||
if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
|
|
||||||
nextHash = hash(uint32(x >> 16))
|
|
||||||
s++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
emitRemainder:
|
|
||||||
if nextEmit < len(src) {
|
|
||||||
emitLiteral(dst, src[nextEmit:])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type tableEntry struct {
|
|
||||||
val uint32
|
|
||||||
offset int32
|
|
||||||
}
|
|
||||||
|
|
||||||
func load3232(b []byte, i int32) uint32 {
|
|
||||||
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
|
||||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
|
||||||
}
|
|
||||||
|
|
||||||
func load6432(b []byte, i int32) uint64 {
|
|
||||||
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
|
||||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
|
||||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
|
||||||
}
|
|
||||||
|
|
||||||
// snappyGen maintains the table for matches,
|
|
||||||
// and the previous byte block for level 2.
|
|
||||||
// This is the generic implementation.
|
|
||||||
type snappyGen struct {
|
|
||||||
prev []byte
|
|
||||||
cur int32
|
|
||||||
}
|
|
||||||
|
|
||||||
// snappyGen maintains the table for matches,
|
|
||||||
// and the previous byte block for level 2.
|
|
||||||
// This is the generic implementation.
|
|
||||||
type snappyL2 struct {
|
|
||||||
snappyGen
|
|
||||||
table [tableSize]tableEntry
|
|
||||||
}
|
|
||||||
|
|
||||||
// EncodeL2 uses a similar algorithm to level 1, but is capable
|
|
||||||
// of matching across blocks giving better compression at a small slowdown.
|
|
||||||
func (e *snappyL2) Encode(dst *tokens, src []byte) {
|
|
||||||
const (
|
|
||||||
inputMargin = 16 - 1
|
|
||||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
|
||||||
)
|
|
||||||
|
|
||||||
// Ensure that e.cur doesn't wrap, mainly an issue on 32 bits.
|
|
||||||
if e.cur > 1<<30 {
|
|
||||||
for i := range e.table {
|
|
||||||
e.table[i] = tableEntry{}
|
|
||||||
}
|
|
||||||
e.cur = maxStoreBlockSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// This check isn't in the Snappy implementation, but there, the caller
|
|
||||||
// instead of the callee handles this case.
|
|
||||||
if len(src) < minNonLiteralBlockSize {
|
|
||||||
// We do not fill the token table.
|
|
||||||
// This will be picked up by caller.
|
|
||||||
dst.n = uint16(len(src))
|
|
||||||
e.cur += maxStoreBlockSize
|
|
||||||
e.prev = e.prev[:0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
|
||||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
|
||||||
// looking for copies.
|
|
||||||
sLimit := int32(len(src) - inputMargin)
|
|
||||||
|
|
||||||
// nextEmit is where in src the next emitLiteral should start from.
|
|
||||||
nextEmit := int32(0)
|
|
||||||
s := int32(0)
|
|
||||||
cv := load3232(src, s)
|
|
||||||
nextHash := hash(cv)
|
|
||||||
|
|
||||||
for {
|
|
||||||
// Copied from the C++ snappy implementation:
|
|
||||||
//
|
|
||||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
|
||||||
// found, start looking only at every other byte. If 32 more bytes are
|
|
||||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
|
||||||
// is found, immediately go back to looking at every byte. This is a
|
|
||||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
|
||||||
// due to more bookkeeping, but for non-compressible data (such as
|
|
||||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
|
||||||
// data is incompressible and doesn't bother looking for matches
|
|
||||||
// everywhere.
|
|
||||||
//
|
|
||||||
// The "skip" variable keeps track of how many bytes there are since
|
|
||||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
|
||||||
// the number of bytes to move ahead for each iteration.
|
|
||||||
skip := int32(32)
|
|
||||||
|
|
||||||
nextS := s
|
|
||||||
var candidate tableEntry
|
|
||||||
for {
|
|
||||||
s = nextS
|
|
||||||
bytesBetweenHashLookups := skip >> 5
|
|
||||||
nextS = s + bytesBetweenHashLookups
|
|
||||||
skip += bytesBetweenHashLookups
|
|
||||||
if nextS > sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
candidate = e.table[nextHash&tableMask]
|
|
||||||
now := load3232(src, nextS)
|
|
||||||
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
|
|
||||||
nextHash = hash(now)
|
|
||||||
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset >= maxMatchOffset || cv != candidate.val {
|
|
||||||
// Out of range or not matched.
|
|
||||||
cv = now
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
|
||||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
|
||||||
// them as literal bytes.
|
|
||||||
emitLiteral(dst, src[nextEmit:s])
|
|
||||||
|
|
||||||
// Call emitCopy, and then see if another emitCopy could be our next
|
|
||||||
// move. Repeat until we find no match for the input immediately after
|
|
||||||
// what was consumed by the last emitCopy call.
|
|
||||||
//
|
|
||||||
// If we exit this loop normally then we need to call emitLiteral next,
|
|
||||||
// though we don't yet know how big the literal will be. We handle that
|
|
||||||
// by proceeding to the next iteration of the main loop. We also can
|
|
||||||
// exit this loop via goto if we get close to exhausting the input.
|
|
||||||
for {
|
|
||||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
|
||||||
// literal bytes prior to s.
|
|
||||||
|
|
||||||
// Extend the 4-byte match as long as possible.
|
|
||||||
//
|
|
||||||
s += 4
|
|
||||||
t := candidate.offset - e.cur + 4
|
|
||||||
l := e.matchlen(s, t, src)
|
|
||||||
|
|
||||||
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
|
||||||
dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
|
|
||||||
dst.n++
|
|
||||||
s += l
|
|
||||||
nextEmit = s
|
|
||||||
if s >= sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
|
|
||||||
// We could immediately start working at s now, but to improve
|
|
||||||
// compression we first update the hash table at s-1 and at s. If
|
|
||||||
// another emitCopy is not our next move, also calculate nextHash
|
|
||||||
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
|
||||||
// are faster as one load64 call (with some shifts) instead of
|
|
||||||
// three load32 calls.
|
|
||||||
x := load6432(src, s-1)
|
|
||||||
prevHash := hash(uint32(x))
|
|
||||||
e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
|
|
||||||
x >>= 8
|
|
||||||
currHash := hash(uint32(x))
|
|
||||||
candidate = e.table[currHash&tableMask]
|
|
||||||
e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
|
|
||||||
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset >= maxMatchOffset || uint32(x) != candidate.val {
|
|
||||||
cv = uint32(x >> 8)
|
|
||||||
nextHash = hash(cv)
|
|
||||||
s++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
emitRemainder:
|
|
||||||
if int(nextEmit) < len(src) {
|
|
||||||
emitLiteral(dst, src[nextEmit:])
|
|
||||||
}
|
|
||||||
e.cur += int32(len(src))
|
|
||||||
e.prev = e.prev[:len(src)]
|
|
||||||
copy(e.prev, src)
|
|
||||||
}
|
|
||||||
|
|
||||||
type tableEntryPrev struct {
|
|
||||||
Cur tableEntry
|
|
||||||
Prev tableEntry
|
|
||||||
}
|
|
||||||
|
|
||||||
// snappyL3
|
|
||||||
type snappyL3 struct {
|
|
||||||
snappyGen
|
|
||||||
table [tableSize]tableEntryPrev
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode uses a similar algorithm to level 2, will check up to two candidates.
|
|
||||||
func (e *snappyL3) Encode(dst *tokens, src []byte) {
|
|
||||||
const (
|
|
||||||
inputMargin = 16 - 1
|
|
||||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
|
||||||
)
|
|
||||||
|
|
||||||
// Ensure that e.cur doesn't wrap, mainly an issue on 32 bits.
|
|
||||||
if e.cur > 1<<30 {
|
|
||||||
for i := range e.table {
|
|
||||||
e.table[i] = tableEntryPrev{}
|
|
||||||
}
|
|
||||||
e.cur = maxStoreBlockSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// This check isn't in the Snappy implementation, but there, the caller
|
|
||||||
// instead of the callee handles this case.
|
|
||||||
if len(src) < minNonLiteralBlockSize {
|
|
||||||
// We do not fill the token table.
|
|
||||||
// This will be picked up by caller.
|
|
||||||
dst.n = uint16(len(src))
|
|
||||||
e.cur += maxStoreBlockSize
|
|
||||||
e.prev = e.prev[:0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
|
||||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
|
||||||
// looking for copies.
|
|
||||||
sLimit := int32(len(src) - inputMargin)
|
|
||||||
|
|
||||||
// nextEmit is where in src the next emitLiteral should start from.
|
|
||||||
nextEmit := int32(0)
|
|
||||||
s := int32(0)
|
|
||||||
cv := load3232(src, s)
|
|
||||||
nextHash := hash(cv)
|
|
||||||
|
|
||||||
for {
|
|
||||||
// Copied from the C++ snappy implementation:
|
|
||||||
//
|
|
||||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
|
||||||
// found, start looking only at every other byte. If 32 more bytes are
|
|
||||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
|
||||||
// is found, immediately go back to looking at every byte. This is a
|
|
||||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
|
||||||
// due to more bookkeeping, but for non-compressible data (such as
|
|
||||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
|
||||||
// data is incompressible and doesn't bother looking for matches
|
|
||||||
// everywhere.
|
|
||||||
//
|
|
||||||
// The "skip" variable keeps track of how many bytes there are since
|
|
||||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
|
||||||
// the number of bytes to move ahead for each iteration.
|
|
||||||
skip := int32(32)
|
|
||||||
|
|
||||||
nextS := s
|
|
||||||
var candidate tableEntry
|
|
||||||
for {
|
|
||||||
s = nextS
|
|
||||||
bytesBetweenHashLookups := skip >> 5
|
|
||||||
nextS = s + bytesBetweenHashLookups
|
|
||||||
skip += bytesBetweenHashLookups
|
|
||||||
if nextS > sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
candidates := e.table[nextHash&tableMask]
|
|
||||||
now := load3232(src, nextS)
|
|
||||||
e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
|
|
||||||
nextHash = hash(now)
|
|
||||||
|
|
||||||
// Check both candidates
|
|
||||||
candidate = candidates.Cur
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// We only check if value mismatches.
|
|
||||||
// Offset will always be invalid in other cases.
|
|
||||||
candidate = candidates.Prev
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cv = now
|
|
||||||
}
|
|
||||||
|
|
||||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
|
||||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
|
||||||
// them as literal bytes.
|
|
||||||
emitLiteral(dst, src[nextEmit:s])
|
|
||||||
|
|
||||||
// Call emitCopy, and then see if another emitCopy could be our next
|
|
||||||
// move. Repeat until we find no match for the input immediately after
|
|
||||||
// what was consumed by the last emitCopy call.
|
|
||||||
//
|
|
||||||
// If we exit this loop normally then we need to call emitLiteral next,
|
|
||||||
// though we don't yet know how big the literal will be. We handle that
|
|
||||||
// by proceeding to the next iteration of the main loop. We also can
|
|
||||||
// exit this loop via goto if we get close to exhausting the input.
|
|
||||||
for {
|
|
||||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
|
||||||
// literal bytes prior to s.
|
|
||||||
|
|
||||||
// Extend the 4-byte match as long as possible.
|
|
||||||
//
|
|
||||||
s += 4
|
|
||||||
t := candidate.offset - e.cur + 4
|
|
||||||
l := e.matchlen(s, t, src)
|
|
||||||
|
|
||||||
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
|
||||||
dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
|
|
||||||
dst.n++
|
|
||||||
s += l
|
|
||||||
nextEmit = s
|
|
||||||
if s >= sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
|
|
||||||
// We could immediately start working at s now, but to improve
|
|
||||||
// compression we first update the hash table at s-2, s-1 and at s. If
|
|
||||||
// another emitCopy is not our next move, also calculate nextHash
|
|
||||||
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
|
||||||
// are faster as one load64 call (with some shifts) instead of
|
|
||||||
// three load32 calls.
|
|
||||||
x := load6432(src, s-2)
|
|
||||||
prevHash := hash(uint32(x))
|
|
||||||
|
|
||||||
e.table[prevHash&tableMask] = tableEntryPrev{
|
|
||||||
Prev: e.table[prevHash&tableMask].Cur,
|
|
||||||
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
|
|
||||||
}
|
|
||||||
x >>= 8
|
|
||||||
prevHash = hash(uint32(x))
|
|
||||||
|
|
||||||
e.table[prevHash&tableMask] = tableEntryPrev{
|
|
||||||
Prev: e.table[prevHash&tableMask].Cur,
|
|
||||||
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
|
|
||||||
}
|
|
||||||
x >>= 8
|
|
||||||
currHash := hash(uint32(x))
|
|
||||||
candidates := e.table[currHash&tableMask]
|
|
||||||
cv = uint32(x)
|
|
||||||
e.table[currHash&tableMask] = tableEntryPrev{
|
|
||||||
Prev: candidates.Cur,
|
|
||||||
Cur: tableEntry{offset: s + e.cur, val: cv},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check both candidates
|
|
||||||
candidate = candidates.Cur
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// We only check if value mismatches.
|
|
||||||
// Offset will always be invalid in other cases.
|
|
||||||
candidate = candidates.Prev
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cv = uint32(x >> 8)
|
|
||||||
nextHash = hash(cv)
|
|
||||||
s++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
emitRemainder:
|
|
||||||
if int(nextEmit) < len(src) {
|
|
||||||
emitLiteral(dst, src[nextEmit:])
|
|
||||||
}
|
|
||||||
e.cur += int32(len(src))
|
|
||||||
e.prev = e.prev[:len(src)]
|
|
||||||
copy(e.prev, src)
|
|
||||||
}
|
|
||||||
|
|
||||||
// snappyL4
|
|
||||||
type snappyL4 struct {
|
|
||||||
snappyL3
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode uses a similar algorithm to level 3,
|
|
||||||
// but will check up to two candidates if first isn't long enough.
|
|
||||||
func (e *snappyL4) Encode(dst *tokens, src []byte) {
|
|
||||||
const (
|
|
||||||
inputMargin = 16 - 1
|
|
||||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
|
||||||
matchLenGood = 12
|
|
||||||
)
|
|
||||||
|
|
||||||
// Ensure that e.cur doesn't wrap, mainly an issue on 32 bits.
|
|
||||||
if e.cur > 1<<30 {
|
|
||||||
for i := range e.table {
|
|
||||||
e.table[i] = tableEntryPrev{}
|
|
||||||
}
|
|
||||||
e.cur = maxStoreBlockSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// This check isn't in the Snappy implementation, but there, the caller
|
|
||||||
// instead of the callee handles this case.
|
|
||||||
if len(src) < minNonLiteralBlockSize {
|
|
||||||
// We do not fill the token table.
|
|
||||||
// This will be picked up by caller.
|
|
||||||
dst.n = uint16(len(src))
|
|
||||||
e.cur += maxStoreBlockSize
|
|
||||||
e.prev = e.prev[:0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
|
||||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
|
||||||
// looking for copies.
|
|
||||||
sLimit := int32(len(src) - inputMargin)
|
|
||||||
|
|
||||||
// nextEmit is where in src the next emitLiteral should start from.
|
|
||||||
nextEmit := int32(0)
|
|
||||||
s := int32(0)
|
|
||||||
cv := load3232(src, s)
|
|
||||||
nextHash := hash(cv)
|
|
||||||
|
|
||||||
for {
|
|
||||||
// Copied from the C++ snappy implementation:
|
|
||||||
//
|
|
||||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
|
||||||
// found, start looking only at every other byte. If 32 more bytes are
|
|
||||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
|
||||||
// is found, immediately go back to looking at every byte. This is a
|
|
||||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
|
||||||
// due to more bookkeeping, but for non-compressible data (such as
|
|
||||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
|
||||||
// data is incompressible and doesn't bother looking for matches
|
|
||||||
// everywhere.
|
|
||||||
//
|
|
||||||
// The "skip" variable keeps track of how many bytes there are since
|
|
||||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
|
||||||
// the number of bytes to move ahead for each iteration.
|
|
||||||
skip := int32(32)
|
|
||||||
|
|
||||||
nextS := s
|
|
||||||
var candidate tableEntry
|
|
||||||
var candidateAlt tableEntry
|
|
||||||
for {
|
|
||||||
s = nextS
|
|
||||||
bytesBetweenHashLookups := skip >> 5
|
|
||||||
nextS = s + bytesBetweenHashLookups
|
|
||||||
skip += bytesBetweenHashLookups
|
|
||||||
if nextS > sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
candidates := e.table[nextHash&tableMask]
|
|
||||||
now := load3232(src, nextS)
|
|
||||||
e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
|
|
||||||
nextHash = hash(now)
|
|
||||||
|
|
||||||
// Check both candidates
|
|
||||||
candidate = candidates.Cur
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
offset = s - (candidates.Prev.offset - e.cur)
|
|
||||||
if cv == candidates.Prev.val && offset < maxMatchOffset {
|
|
||||||
candidateAlt = candidates.Prev
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// We only check if value mismatches.
|
|
||||||
// Offset will always be invalid in other cases.
|
|
||||||
candidate = candidates.Prev
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cv = now
|
|
||||||
}
|
|
||||||
|
|
||||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
|
||||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
|
||||||
// them as literal bytes.
|
|
||||||
emitLiteral(dst, src[nextEmit:s])
|
|
||||||
|
|
||||||
// Call emitCopy, and then see if another emitCopy could be our next
|
|
||||||
// move. Repeat until we find no match for the input immediately after
|
|
||||||
// what was consumed by the last emitCopy call.
|
|
||||||
//
|
|
||||||
// If we exit this loop normally then we need to call emitLiteral next,
|
|
||||||
// though we don't yet know how big the literal will be. We handle that
|
|
||||||
// by proceeding to the next iteration of the main loop. We also can
|
|
||||||
// exit this loop via goto if we get close to exhausting the input.
|
|
||||||
for {
|
|
||||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
|
||||||
// literal bytes prior to s.
|
|
||||||
|
|
||||||
// Extend the 4-byte match as long as possible.
|
|
||||||
//
|
|
||||||
s += 4
|
|
||||||
t := candidate.offset - e.cur + 4
|
|
||||||
l := e.matchlen(s, t, src)
|
|
||||||
// Try alternative candidate if match length < matchLenGood.
|
|
||||||
if l < matchLenGood-4 && candidateAlt.offset != 0 {
|
|
||||||
t2 := candidateAlt.offset - e.cur + 4
|
|
||||||
l2 := e.matchlen(s, t2, src)
|
|
||||||
if l2 > l {
|
|
||||||
l = l2
|
|
||||||
t = t2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
|
||||||
dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
|
|
||||||
dst.n++
|
|
||||||
s += l
|
|
||||||
nextEmit = s
|
|
||||||
if s >= sLimit {
|
|
||||||
goto emitRemainder
|
|
||||||
}
|
|
||||||
|
|
||||||
// We could immediately start working at s now, but to improve
|
|
||||||
// compression we first update the hash table at s-2, s-1 and at s. If
|
|
||||||
// another emitCopy is not our next move, also calculate nextHash
|
|
||||||
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
|
||||||
// are faster as one load64 call (with some shifts) instead of
|
|
||||||
// three load32 calls.
|
|
||||||
x := load6432(src, s-2)
|
|
||||||
prevHash := hash(uint32(x))
|
|
||||||
|
|
||||||
e.table[prevHash&tableMask] = tableEntryPrev{
|
|
||||||
Prev: e.table[prevHash&tableMask].Cur,
|
|
||||||
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
|
|
||||||
}
|
|
||||||
x >>= 8
|
|
||||||
prevHash = hash(uint32(x))
|
|
||||||
|
|
||||||
e.table[prevHash&tableMask] = tableEntryPrev{
|
|
||||||
Prev: e.table[prevHash&tableMask].Cur,
|
|
||||||
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
|
|
||||||
}
|
|
||||||
x >>= 8
|
|
||||||
currHash := hash(uint32(x))
|
|
||||||
candidates := e.table[currHash&tableMask]
|
|
||||||
cv = uint32(x)
|
|
||||||
e.table[currHash&tableMask] = tableEntryPrev{
|
|
||||||
Prev: candidates.Cur,
|
|
||||||
Cur: tableEntry{offset: s + e.cur, val: cv},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check both candidates
|
|
||||||
candidate = candidates.Cur
|
|
||||||
candidateAlt = tableEntry{}
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
offset = s - (candidates.Prev.offset - e.cur)
|
|
||||||
if cv == candidates.Prev.val && offset < maxMatchOffset {
|
|
||||||
candidateAlt = candidates.Prev
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// We only check if value mismatches.
|
|
||||||
// Offset will always be invalid in other cases.
|
|
||||||
candidate = candidates.Prev
|
|
||||||
if cv == candidate.val {
|
|
||||||
offset := s - (candidate.offset - e.cur)
|
|
||||||
if offset < maxMatchOffset {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cv = uint32(x >> 8)
|
|
||||||
nextHash = hash(cv)
|
|
||||||
s++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
emitRemainder:
|
|
||||||
if int(nextEmit) < len(src) {
|
|
||||||
emitLiteral(dst, src[nextEmit:])
|
|
||||||
}
|
|
||||||
e.cur += int32(len(src))
|
|
||||||
e.prev = e.prev[:len(src)]
|
|
||||||
copy(e.prev, src)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *snappyGen) matchlen(s, t int32, src []byte) int32 {
|
|
||||||
s1 := int(s) + maxMatchLength - 4
|
|
||||||
if s1 > len(src) {
|
|
||||||
s1 = len(src)
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we are inside the current block
|
|
||||||
if t >= 0 {
|
|
||||||
b := src[t:]
|
|
||||||
a := src[s:s1]
|
|
||||||
b = b[:len(a)]
|
|
||||||
// Extend the match to be as long as possible.
|
|
||||||
for i := range a {
|
|
||||||
if a[i] != b[i] {
|
|
||||||
return int32(i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return int32(len(a))
|
|
||||||
}
|
|
||||||
|
|
||||||
// We found a match in the previous block.
|
|
||||||
tp := int32(len(e.prev)) + t
|
|
||||||
if tp < 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extend the match to be as long as possible.
|
|
||||||
a := src[s:s1]
|
|
||||||
b := e.prev[tp:]
|
|
||||||
if len(b) > len(a) {
|
|
||||||
b = b[:len(a)]
|
|
||||||
}
|
|
||||||
a = a[:len(b)]
|
|
||||||
for i := range b {
|
|
||||||
if a[i] != b[i] {
|
|
||||||
return int32(i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
n := int32(len(b))
|
|
||||||
a = src[s+n : s1]
|
|
||||||
b = src[:len(a)]
|
|
||||||
for i := range a {
|
|
||||||
if a[i] != b[i] {
|
|
||||||
return int32(i) + n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return int32(len(a)) + n
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset the encoding table.
|
|
||||||
func (e *snappyGen) Reset() {
|
|
||||||
e.prev = e.prev[:0]
|
|
||||||
e.cur += maxMatchOffset + 1
|
|
||||||
}
|
|
|
@ -0,0 +1,252 @@
|
||||||
|
package flate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
maxStatelessBlock = math.MaxInt16
|
||||||
|
|
||||||
|
slTableBits = 13
|
||||||
|
slTableSize = 1 << slTableBits
|
||||||
|
slTableShift = 32 - slTableBits
|
||||||
|
)
|
||||||
|
|
||||||
|
type statelessWriter struct {
|
||||||
|
dst io.Writer
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *statelessWriter) Close() error {
|
||||||
|
if s.closed {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
s.closed = true
|
||||||
|
// Emit EOF block
|
||||||
|
return StatelessDeflate(s.dst, nil, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *statelessWriter) Write(p []byte) (n int, err error) {
|
||||||
|
err = StatelessDeflate(s.dst, p, false)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return len(p), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *statelessWriter) Reset(w io.Writer) {
|
||||||
|
s.dst = w
|
||||||
|
s.closed = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStatelessWriter will do compression but without maintaining any state
|
||||||
|
// between Write calls.
|
||||||
|
// There will be no memory kept between Write calls,
|
||||||
|
// but compression and speed will be suboptimal.
|
||||||
|
// Because of this, the size of actual Write calls will affect output size.
|
||||||
|
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
|
||||||
|
return &statelessWriter{dst: dst}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StatelessDeflate allows to compress directly to a Writer without retaining state.
|
||||||
|
// When returning everything will be flushed.
|
||||||
|
func StatelessDeflate(out io.Writer, in []byte, eof bool) error {
|
||||||
|
var dst tokens
|
||||||
|
bw := newHuffmanBitWriter(out)
|
||||||
|
if eof && len(in) == 0 {
|
||||||
|
// Just write an EOF block.
|
||||||
|
// Could be faster...
|
||||||
|
bw.writeStoredHeader(0, true)
|
||||||
|
bw.flush()
|
||||||
|
return bw.err
|
||||||
|
}
|
||||||
|
|
||||||
|
for len(in) > 0 {
|
||||||
|
todo := in
|
||||||
|
if len(todo) > maxStatelessBlock {
|
||||||
|
todo = todo[:maxStatelessBlock]
|
||||||
|
}
|
||||||
|
in = in[len(todo):]
|
||||||
|
// Compress
|
||||||
|
statelessEnc(&dst, todo)
|
||||||
|
isEof := eof && len(in) == 0
|
||||||
|
|
||||||
|
if dst.n == 0 {
|
||||||
|
bw.writeStoredHeader(len(todo), isEof)
|
||||||
|
if bw.err != nil {
|
||||||
|
return bw.err
|
||||||
|
}
|
||||||
|
bw.writeBytes(todo)
|
||||||
|
} else if int(dst.n) > len(todo)-len(todo)>>4 {
|
||||||
|
// If we removed less than 1/16th, huffman compress the block.
|
||||||
|
bw.writeBlockHuff(isEof, todo, false)
|
||||||
|
} else {
|
||||||
|
bw.writeBlockDynamic(&dst, isEof, todo, false)
|
||||||
|
}
|
||||||
|
if bw.err != nil {
|
||||||
|
return bw.err
|
||||||
|
}
|
||||||
|
dst.Reset()
|
||||||
|
}
|
||||||
|
if !eof {
|
||||||
|
// Align.
|
||||||
|
bw.writeStoredHeader(0, false)
|
||||||
|
}
|
||||||
|
bw.flush()
|
||||||
|
return bw.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func hashSL(u uint32) uint32 {
|
||||||
|
return (u * 0x1e35a7bd) >> slTableShift
|
||||||
|
}
|
||||||
|
|
||||||
|
func load3216(b []byte, i int16) uint32 {
|
||||||
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
||||||
|
b = b[i:]
|
||||||
|
b = b[:4]
|
||||||
|
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||||
|
}
|
||||||
|
|
||||||
|
func load6416(b []byte, i int16) uint64 {
|
||||||
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
||||||
|
b = b[i:]
|
||||||
|
b = b[:8]
|
||||||
|
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||||
|
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||||
|
}
|
||||||
|
|
||||||
|
func statelessEnc(dst *tokens, src []byte) {
|
||||||
|
const (
|
||||||
|
inputMargin = 12 - 1
|
||||||
|
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||||
|
)
|
||||||
|
|
||||||
|
type tableEntry struct {
|
||||||
|
offset int16
|
||||||
|
}
|
||||||
|
|
||||||
|
var table [slTableSize]tableEntry
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
// We do not fill the token table.
|
||||||
|
// This will be picked up by caller.
|
||||||
|
dst.n = uint16(len(src))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
s := int16(1)
|
||||||
|
nextEmit := int16(0)
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := int16(len(src) - inputMargin)
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
cv := load3216(src, s)
|
||||||
|
|
||||||
|
for {
|
||||||
|
const skipLog = 5
|
||||||
|
const doEvery = 2
|
||||||
|
|
||||||
|
nextS := s
|
||||||
|
var candidate tableEntry
|
||||||
|
for {
|
||||||
|
nextHash := hashSL(cv)
|
||||||
|
candidate = table[nextHash]
|
||||||
|
nextS = s + doEvery + (s-nextEmit)>>skipLog
|
||||||
|
if nextS > sLimit || nextS <= 0 {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
now := load6416(src, nextS)
|
||||||
|
table[nextHash] = tableEntry{offset: s}
|
||||||
|
nextHash = hashSL(uint32(now))
|
||||||
|
|
||||||
|
if cv == load3216(src, candidate.offset) {
|
||||||
|
table[nextHash] = tableEntry{offset: nextS}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do one right away...
|
||||||
|
cv = uint32(now)
|
||||||
|
s = nextS
|
||||||
|
nextS++
|
||||||
|
candidate = table[nextHash]
|
||||||
|
now >>= 8
|
||||||
|
table[nextHash] = tableEntry{offset: s}
|
||||||
|
|
||||||
|
if cv == load3216(src, candidate.offset) {
|
||||||
|
table[nextHash] = tableEntry{offset: nextS}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
cv = uint32(now)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
for {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
t := candidate.offset
|
||||||
|
l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
|
||||||
|
s--
|
||||||
|
t--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
if nextEmit < s {
|
||||||
|
emitLiteral(dst, src[nextEmit:s])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the match found
|
||||||
|
dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
|
||||||
|
s += l
|
||||||
|
nextEmit = s
|
||||||
|
if nextS >= s {
|
||||||
|
s = nextS + 1
|
||||||
|
}
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-2 and at s. If
|
||||||
|
// another emitCopy is not our next move, also calculate nextHash
|
||||||
|
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
||||||
|
// are faster as one load64 call (with some shifts) instead of
|
||||||
|
// three load32 calls.
|
||||||
|
x := load6416(src, s-2)
|
||||||
|
o := s - 2
|
||||||
|
prevHash := hashSL(uint32(x))
|
||||||
|
table[prevHash] = tableEntry{offset: o}
|
||||||
|
x >>= 16
|
||||||
|
currHash := hashSL(uint32(x))
|
||||||
|
candidate = table[currHash]
|
||||||
|
table[currHash] = tableEntry{offset: o + 2}
|
||||||
|
|
||||||
|
if uint32(x) != load3216(src, candidate.offset) {
|
||||||
|
cv = uint32(x >> 8)
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if int(nextEmit) < len(src) {
|
||||||
|
// If nothing was added, don't encode literals.
|
||||||
|
if dst.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
emitLiteral(dst, src[nextEmit:])
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,7 +4,13 @@
|
||||||
|
|
||||||
package flate
|
package flate
|
||||||
|
|
||||||
import "fmt"
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
|
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
|
||||||
|
@ -19,7 +25,7 @@ const (
|
||||||
|
|
||||||
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
|
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
|
||||||
// is lengthCodes[length - MIN_MATCH_LENGTH]
|
// is lengthCodes[length - MIN_MATCH_LENGTH]
|
||||||
var lengthCodes = [...]uint32{
|
var lengthCodes = [256]uint8{
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
||||||
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
||||||
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
||||||
|
@ -48,7 +54,37 @@ var lengthCodes = [...]uint32{
|
||||||
27, 27, 27, 27, 27, 28,
|
27, 27, 27, 27, 27, 28,
|
||||||
}
|
}
|
||||||
|
|
||||||
var offsetCodes = [...]uint32{
|
// lengthCodes1 is length codes, but starting at 1.
|
||||||
|
var lengthCodes1 = [256]uint8{
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
|
||||||
|
10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
|
||||||
|
14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
|
||||||
|
16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
|
18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
|
||||||
|
19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
|
||||||
|
20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
|
||||||
|
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||||
|
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||||
|
22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
|
||||||
|
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||||
|
23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
|
||||||
|
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||||
|
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||||
|
28, 28, 28, 28, 28, 29,
|
||||||
|
}
|
||||||
|
|
||||||
|
var offsetCodes = [256]uint32{
|
||||||
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||||
|
@ -67,49 +103,265 @@ var offsetCodes = [...]uint32{
|
||||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// offsetCodes14 are offsetCodes, but with 14 added.
|
||||||
|
var offsetCodes14 = [256]uint32{
|
||||||
|
14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
|
||||||
|
22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||||
|
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||||
|
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||||
|
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||||
|
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
||||||
|
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
||||||
|
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
||||||
|
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
|
||||||
|
}
|
||||||
|
|
||||||
type token uint32
|
type token uint32
|
||||||
|
|
||||||
type tokens struct {
|
type tokens struct {
|
||||||
tokens [maxStoreBlockSize + 1]token
|
nLits int
|
||||||
n uint16 // Must be able to contain maxStoreBlockSize
|
extraHist [32]uint16 // codes 256->maxnumlit
|
||||||
|
offHist [32]uint16 // offset codes
|
||||||
|
litHist [256]uint16 // codes 0->255
|
||||||
|
n uint16 // Must be able to contain maxStoreBlockSize
|
||||||
|
tokens [maxStoreBlockSize + 1]token
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert a literal into a literal token.
|
func (t *tokens) Reset() {
|
||||||
func literalToken(literal uint32) token { return token(literalType + literal) }
|
if t.n == 0 {
|
||||||
|
return
|
||||||
// Convert a < xlength, xoffset > pair into a match token.
|
|
||||||
func matchToken(xlength uint32, xoffset uint32) token {
|
|
||||||
return token(matchType + xlength<<lengthShift + xoffset)
|
|
||||||
}
|
|
||||||
|
|
||||||
func matchTokend(xlength uint32, xoffset uint32) token {
|
|
||||||
if xlength > maxMatchLength || xoffset > maxMatchOffset {
|
|
||||||
panic(fmt.Sprintf("Invalid match: len: %d, offset: %d\n", xlength, xoffset))
|
|
||||||
return token(matchType)
|
|
||||||
}
|
}
|
||||||
return token(matchType + xlength<<lengthShift + xoffset)
|
t.n = 0
|
||||||
|
t.nLits = 0
|
||||||
|
for i := range t.litHist[:] {
|
||||||
|
t.litHist[i] = 0
|
||||||
|
}
|
||||||
|
for i := range t.extraHist[:] {
|
||||||
|
t.extraHist[i] = 0
|
||||||
|
}
|
||||||
|
for i := range t.offHist[:] {
|
||||||
|
t.offHist[i] = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *tokens) Fill() {
|
||||||
|
if t.n == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i, v := range t.litHist[:] {
|
||||||
|
if v == 0 {
|
||||||
|
t.litHist[i] = 1
|
||||||
|
t.nLits++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, v := range t.extraHist[:literalCount-256] {
|
||||||
|
if v == 0 {
|
||||||
|
t.nLits++
|
||||||
|
t.extraHist[i] = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i, v := range t.offHist[:offsetCodeCount] {
|
||||||
|
if v == 0 {
|
||||||
|
t.offHist[i] = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func indexTokens(in []token) tokens {
|
||||||
|
var t tokens
|
||||||
|
t.indexTokens(in)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *tokens) indexTokens(in []token) {
|
||||||
|
t.Reset()
|
||||||
|
for _, tok := range in {
|
||||||
|
if tok < matchType {
|
||||||
|
t.tokens[t.n] = tok
|
||||||
|
t.litHist[tok]++
|
||||||
|
t.n++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t.AddMatch(uint32(tok.length()), tok.offset())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||||
|
func emitLiteral(dst *tokens, lit []byte) {
|
||||||
|
ol := int(dst.n)
|
||||||
|
for i, v := range lit {
|
||||||
|
dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
|
||||||
|
dst.litHist[v]++
|
||||||
|
}
|
||||||
|
dst.n += uint16(len(lit))
|
||||||
|
dst.nLits += len(lit)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *tokens) AddLiteral(lit byte) {
|
||||||
|
t.tokens[t.n] = token(lit)
|
||||||
|
t.litHist[lit]++
|
||||||
|
t.n++
|
||||||
|
t.nLits++
|
||||||
|
}
|
||||||
|
|
||||||
|
// EstimatedBits will return an minimum size estimated by an *optimal*
|
||||||
|
// compression of the block.
|
||||||
|
// The size of the block
|
||||||
|
func (t *tokens) EstimatedBits() int {
|
||||||
|
shannon := float64(0)
|
||||||
|
bits := int(0)
|
||||||
|
nMatches := 0
|
||||||
|
if t.nLits > 0 {
|
||||||
|
invTotal := 1.0 / float64(t.nLits)
|
||||||
|
for _, v := range t.litHist[:] {
|
||||||
|
if v > 0 {
|
||||||
|
n := float64(v)
|
||||||
|
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Just add 15 for EOB
|
||||||
|
shannon += 15
|
||||||
|
for _, v := range t.extraHist[1 : literalCount-256] {
|
||||||
|
if v > 0 {
|
||||||
|
n := float64(v)
|
||||||
|
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
|
||||||
|
bits += int(lengthExtraBits[v&31]) * int(v)
|
||||||
|
nMatches += int(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if nMatches > 0 {
|
||||||
|
invTotal := 1.0 / float64(nMatches)
|
||||||
|
for _, v := range t.offHist[:offsetCodeCount] {
|
||||||
|
if v > 0 {
|
||||||
|
n := float64(v)
|
||||||
|
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
|
||||||
|
bits += int(offsetExtraBits[v&31]) * int(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(shannon) + bits
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddMatch adds a match to the tokens.
|
||||||
|
// This function is very sensitive to inlining and right on the border.
|
||||||
|
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
|
||||||
|
if debugDecode {
|
||||||
|
if xlength >= maxMatchLength+baseMatchLength {
|
||||||
|
panic(fmt.Errorf("invalid length: %v", xlength))
|
||||||
|
}
|
||||||
|
if xoffset >= maxMatchOffset+baseMatchOffset {
|
||||||
|
panic(fmt.Errorf("invalid offset: %v", xoffset))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.nLits++
|
||||||
|
lengthCode := lengthCodes1[uint8(xlength)] & 31
|
||||||
|
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
|
||||||
|
t.extraHist[lengthCode]++
|
||||||
|
t.offHist[offsetCode(xoffset)&31]++
|
||||||
|
t.n++
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddMatchLong adds a match to the tokens, potentially longer than max match length.
|
||||||
|
// Length should NOT have the base subtracted, only offset should.
|
||||||
|
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
|
||||||
|
if debugDecode {
|
||||||
|
if xoffset >= maxMatchOffset+baseMatchOffset {
|
||||||
|
panic(fmt.Errorf("invalid offset: %v", xoffset))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
oc := offsetCode(xoffset) & 31
|
||||||
|
for xlength > 0 {
|
||||||
|
xl := xlength
|
||||||
|
if xl > 258 {
|
||||||
|
// We need to have at least baseMatchLength left over for next loop.
|
||||||
|
xl = 258 - baseMatchLength
|
||||||
|
}
|
||||||
|
xlength -= xl
|
||||||
|
xl -= 3
|
||||||
|
t.nLits++
|
||||||
|
lengthCode := lengthCodes1[uint8(xl)] & 31
|
||||||
|
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
|
||||||
|
t.extraHist[lengthCode]++
|
||||||
|
t.offHist[oc]++
|
||||||
|
t.n++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *tokens) AddEOB() {
|
||||||
|
t.tokens[t.n] = token(endBlockMarker)
|
||||||
|
t.extraHist[0]++
|
||||||
|
t.n++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *tokens) Slice() []token {
|
||||||
|
return t.tokens[:t.n]
|
||||||
|
}
|
||||||
|
|
||||||
|
// VarInt returns the tokens as varint encoded bytes.
|
||||||
|
func (t *tokens) VarInt() []byte {
|
||||||
|
var b = make([]byte, binary.MaxVarintLen32*int(t.n))
|
||||||
|
var off int
|
||||||
|
for _, v := range t.tokens[:t.n] {
|
||||||
|
off += binary.PutUvarint(b[off:], uint64(v))
|
||||||
|
}
|
||||||
|
return b[:off]
|
||||||
|
}
|
||||||
|
|
||||||
|
// FromVarInt restores t to the varint encoded tokens provided.
|
||||||
|
// Any data in t is removed.
|
||||||
|
func (t *tokens) FromVarInt(b []byte) error {
|
||||||
|
var buf = bytes.NewReader(b)
|
||||||
|
var toks []token
|
||||||
|
for {
|
||||||
|
r, err := binary.ReadUvarint(buf)
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
toks = append(toks, token(r))
|
||||||
|
}
|
||||||
|
t.indexTokens(toks)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the type of a token
|
// Returns the type of a token
|
||||||
func (t token) typ() uint32 { return uint32(t) & typeMask }
|
func (t token) typ() uint32 { return uint32(t) & typeMask }
|
||||||
|
|
||||||
// Returns the literal of a literal token
|
// Returns the literal of a literal token
|
||||||
func (t token) literal() uint32 { return uint32(t - literalType) }
|
func (t token) literal() uint8 { return uint8(t) }
|
||||||
|
|
||||||
// Returns the extra offset of a match token
|
// Returns the extra offset of a match token
|
||||||
func (t token) offset() uint32 { return uint32(t) & offsetMask }
|
func (t token) offset() uint32 { return uint32(t) & offsetMask }
|
||||||
|
|
||||||
func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
|
func (t token) length() uint8 { return uint8(t >> lengthShift) }
|
||||||
|
|
||||||
func lengthCode(len uint32) uint32 { return lengthCodes[len] }
|
// The code is never more than 8 bits, but is returned as uint32 for convenience.
|
||||||
|
func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
|
||||||
|
|
||||||
// Returns the offset code corresponding to a specific offset
|
// Returns the offset code corresponding to a specific offset
|
||||||
func offsetCode(off uint32) uint32 {
|
func offsetCode(off uint32) uint32 {
|
||||||
if off < uint32(len(offsetCodes)) {
|
if false {
|
||||||
return offsetCodes[off]
|
if off < uint32(len(offsetCodes)) {
|
||||||
} else if off>>7 < uint32(len(offsetCodes)) {
|
return offsetCodes[off&255]
|
||||||
return offsetCodes[off>>7] + 14
|
} else if off>>7 < uint32(len(offsetCodes)) {
|
||||||
} else {
|
return offsetCodes[(off>>7)&255] + 14
|
||||||
return offsetCodes[off>>14] + 28
|
} else {
|
||||||
|
return offsetCodes[(off>>14)&255] + 28
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if off < uint32(len(offsetCodes)) {
|
||||||
|
return offsetCodes[uint8(off)]
|
||||||
|
}
|
||||||
|
return offsetCodes14[uint8(off>>7)]
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,11 +10,11 @@ import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
|
"hash/crc32"
|
||||||
"io"
|
"io"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/klauspost/compress/flate"
|
"github.com/klauspost/compress/flate"
|
||||||
"github.com/klauspost/crc32"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
|
@ -7,10 +7,10 @@ package gzip
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash/crc32"
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"github.com/klauspost/compress/flate"
|
"github.com/klauspost/compress/flate"
|
||||||
"github.com/klauspost/crc32"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// These constants are copied from the flate package, so that code that imports
|
// These constants are copied from the flate package, so that code that imports
|
||||||
|
@ -22,6 +22,13 @@ const (
|
||||||
DefaultCompression = flate.DefaultCompression
|
DefaultCompression = flate.DefaultCompression
|
||||||
ConstantCompression = flate.ConstantCompression
|
ConstantCompression = flate.ConstantCompression
|
||||||
HuffmanOnly = flate.HuffmanOnly
|
HuffmanOnly = flate.HuffmanOnly
|
||||||
|
|
||||||
|
// StatelessCompression will do compression but without maintaining any state
|
||||||
|
// between Write calls.
|
||||||
|
// There will be no memory kept between Write calls,
|
||||||
|
// but compression and speed will be suboptimal.
|
||||||
|
// Because of this, the size of actual Write calls will affect output size.
|
||||||
|
StatelessCompression = -3
|
||||||
)
|
)
|
||||||
|
|
||||||
// A Writer is an io.WriteCloser.
|
// A Writer is an io.WriteCloser.
|
||||||
|
@ -59,7 +66,7 @@ func NewWriter(w io.Writer) *Writer {
|
||||||
// integer value between BestSpeed and BestCompression inclusive. The error
|
// integer value between BestSpeed and BestCompression inclusive. The error
|
||||||
// returned will be nil if the level is valid.
|
// returned will be nil if the level is valid.
|
||||||
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
|
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
|
||||||
if level < HuffmanOnly || level > BestCompression {
|
if level < StatelessCompression || level > BestCompression {
|
||||||
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
|
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
|
||||||
}
|
}
|
||||||
z := new(Writer)
|
z := new(Writer)
|
||||||
|
@ -69,9 +76,12 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
|
||||||
|
|
||||||
func (z *Writer) init(w io.Writer, level int) {
|
func (z *Writer) init(w io.Writer, level int) {
|
||||||
compressor := z.compressor
|
compressor := z.compressor
|
||||||
if compressor != nil {
|
if level != StatelessCompression {
|
||||||
compressor.Reset(w)
|
if compressor != nil {
|
||||||
|
compressor.Reset(w)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*z = Writer{
|
*z = Writer{
|
||||||
Header: Header{
|
Header: Header{
|
||||||
OS: 255, // unknown
|
OS: 255, // unknown
|
||||||
|
@ -189,12 +199,16 @@ func (z *Writer) Write(p []byte) (int, error) {
|
||||||
return n, z.err
|
return n, z.err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if z.compressor == nil {
|
|
||||||
|
if z.compressor == nil && z.level != StatelessCompression {
|
||||||
z.compressor, _ = flate.NewWriter(z.w, z.level)
|
z.compressor, _ = flate.NewWriter(z.w, z.level)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
z.size += uint32(len(p))
|
z.size += uint32(len(p))
|
||||||
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
|
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
|
||||||
|
if z.level == StatelessCompression {
|
||||||
|
return len(p), flate.StatelessDeflate(z.w, p, false)
|
||||||
|
}
|
||||||
n, z.err = z.compressor.Write(p)
|
n, z.err = z.compressor.Write(p)
|
||||||
return n, z.err
|
return n, z.err
|
||||||
}
|
}
|
||||||
|
@ -211,7 +225,7 @@ func (z *Writer) Flush() error {
|
||||||
if z.err != nil {
|
if z.err != nil {
|
||||||
return z.err
|
return z.err
|
||||||
}
|
}
|
||||||
if z.closed {
|
if z.closed || z.level == StatelessCompression {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !z.wroteHeader {
|
if !z.wroteHeader {
|
||||||
|
@ -240,7 +254,11 @@ func (z *Writer) Close() error {
|
||||||
return z.err
|
return z.err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
z.err = z.compressor.Close()
|
if z.level == StatelessCompression {
|
||||||
|
z.err = flate.StatelessDeflate(z.w, nil, true)
|
||||||
|
} else {
|
||||||
|
z.err = z.compressor.Close()
|
||||||
|
}
|
||||||
if z.err != nil {
|
if z.err != nil {
|
||||||
return z.err
|
return z.err
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
|
||||||
*.o
|
|
||||||
*.a
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Folders
|
|
||||||
_obj
|
|
||||||
_test
|
|
||||||
|
|
||||||
# Architecture specific extensions/prefixes
|
|
||||||
*.[568vq]
|
|
||||||
[568vq].out
|
|
||||||
|
|
||||||
*.cgo1.go
|
|
||||||
*.cgo2.c
|
|
||||||
_cgo_defun.c
|
|
||||||
_cgo_gotypes.go
|
|
||||||
_cgo_export.*
|
|
||||||
|
|
||||||
_testmain.go
|
|
||||||
|
|
||||||
*.exe
|
|
||||||
*.test
|
|
||||||
*.prof
|
|
|
@ -1,8 +0,0 @@
|
||||||
language: go
|
|
||||||
|
|
||||||
go:
|
|
||||||
- 1.3
|
|
||||||
- 1.4
|
|
||||||
- 1.5
|
|
||||||
- 1.6
|
|
||||||
- tip
|
|
|
@ -1,22 +0,0 @@
|
||||||
The MIT License (MIT)
|
|
||||||
|
|
||||||
Copyright (c) 2015 Klaus Post
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
|
|
|
@ -1,145 +0,0 @@
|
||||||
# cpuid
|
|
||||||
Package cpuid provides information about the CPU running the current program.
|
|
||||||
|
|
||||||
CPU features are detected on startup, and kept for fast access through the life of the application.
|
|
||||||
Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
|
|
||||||
|
|
||||||
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
|
|
||||||
|
|
||||||
Package home: https://github.com/klauspost/cpuid
|
|
||||||
|
|
||||||
[![GoDoc][1]][2] [![Build Status][3]][4]
|
|
||||||
|
|
||||||
[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
|
|
||||||
[2]: https://godoc.org/github.com/klauspost/cpuid
|
|
||||||
[3]: https://travis-ci.org/klauspost/cpuid.svg
|
|
||||||
[4]: https://travis-ci.org/klauspost/cpuid
|
|
||||||
|
|
||||||
# features
|
|
||||||
## CPU Instructions
|
|
||||||
* **CMOV** (i686 CMOV)
|
|
||||||
* **NX** (NX (No-Execute) bit)
|
|
||||||
* **AMD3DNOW** (AMD 3DNOW)
|
|
||||||
* **AMD3DNOWEXT** (AMD 3DNowExt)
|
|
||||||
* **MMX** (standard MMX)
|
|
||||||
* **MMXEXT** (SSE integer functions or AMD MMX ext)
|
|
||||||
* **SSE** (SSE functions)
|
|
||||||
* **SSE2** (P4 SSE functions)
|
|
||||||
* **SSE3** (Prescott SSE3 functions)
|
|
||||||
* **SSSE3** (Conroe SSSE3 functions)
|
|
||||||
* **SSE4** (Penryn SSE4.1 functions)
|
|
||||||
* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
|
|
||||||
* **SSE42** (Nehalem SSE4.2 functions)
|
|
||||||
* **AVX** (AVX functions)
|
|
||||||
* **AVX2** (AVX2 functions)
|
|
||||||
* **FMA3** (Intel FMA 3)
|
|
||||||
* **FMA4** (Bulldozer FMA4 functions)
|
|
||||||
* **XOP** (Bulldozer XOP functions)
|
|
||||||
* **F16C** (Half-precision floating-point conversion)
|
|
||||||
* **BMI1** (Bit Manipulation Instruction Set 1)
|
|
||||||
* **BMI2** (Bit Manipulation Instruction Set 2)
|
|
||||||
* **TBM** (AMD Trailing Bit Manipulation)
|
|
||||||
* **LZCNT** (LZCNT instruction)
|
|
||||||
* **POPCNT** (POPCNT instruction)
|
|
||||||
* **AESNI** (Advanced Encryption Standard New Instructions)
|
|
||||||
* **CLMUL** (Carry-less Multiplication)
|
|
||||||
* **HTT** (Hyperthreading (enabled))
|
|
||||||
* **HLE** (Hardware Lock Elision)
|
|
||||||
* **RTM** (Restricted Transactional Memory)
|
|
||||||
* **RDRAND** (RDRAND instruction is available)
|
|
||||||
* **RDSEED** (RDSEED instruction is available)
|
|
||||||
* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
|
|
||||||
* **SHA** (Intel SHA Extensions)
|
|
||||||
* **AVX512F** (AVX-512 Foundation)
|
|
||||||
* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
|
|
||||||
* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
|
|
||||||
* **AVX512PF** (AVX-512 Prefetch Instructions)
|
|
||||||
* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
|
|
||||||
* **AVX512CD** (AVX-512 Conflict Detection Instructions)
|
|
||||||
* **AVX512BW** (AVX-512 Byte and Word Instructions)
|
|
||||||
* **AVX512VL** (AVX-512 Vector Length Extensions)
|
|
||||||
* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
|
|
||||||
* **MPX** (Intel MPX (Memory Protection Extensions))
|
|
||||||
* **ERMS** (Enhanced REP MOVSB/STOSB)
|
|
||||||
* **RDTSCP** (RDTSCP Instruction)
|
|
||||||
* **CX16** (CMPXCHG16B Instruction)
|
|
||||||
* **SGX** (Software Guard Extensions, with activation details)
|
|
||||||
|
|
||||||
## Performance
|
|
||||||
* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
|
|
||||||
* **SSE2SLOW** (SSE2 is supported, but usually not faster)
|
|
||||||
* **SSE3SLOW** (SSE3 is supported, but usually not faster)
|
|
||||||
* **ATOM** (Atom processor, some SSSE3 instructions are slower)
|
|
||||||
* **Cache line** (Probable size of a cache line).
|
|
||||||
* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
|
|
||||||
|
|
||||||
## Cpu Vendor/VM
|
|
||||||
* **Intel**
|
|
||||||
* **AMD**
|
|
||||||
* **VIA**
|
|
||||||
* **Transmeta**
|
|
||||||
* **NSC**
|
|
||||||
* **KVM** (Kernel-based Virtual Machine)
|
|
||||||
* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
|
|
||||||
* **VMware**
|
|
||||||
* **XenHVM**
|
|
||||||
|
|
||||||
# installing
|
|
||||||
|
|
||||||
```go get github.com/klauspost/cpuid```
|
|
||||||
|
|
||||||
# example
|
|
||||||
|
|
||||||
```Go
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"github.com/klauspost/cpuid"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
// Print basic CPU information:
|
|
||||||
fmt.Println("Name:", cpuid.CPU.BrandName)
|
|
||||||
fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
|
|
||||||
fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
|
|
||||||
fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
|
|
||||||
fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
|
|
||||||
fmt.Println("Features:", cpuid.CPU.Features)
|
|
||||||
fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
|
|
||||||
fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
|
|
||||||
fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
|
|
||||||
fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
|
|
||||||
fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
|
|
||||||
|
|
||||||
// Test if we have a specific feature:
|
|
||||||
if cpuid.CPU.SSE() {
|
|
||||||
fmt.Println("We have Streaming SIMD Extensions")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Sample output:
|
|
||||||
```
|
|
||||||
>go run main.go
|
|
||||||
Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
|
|
||||||
PhysicalCores: 2
|
|
||||||
ThreadsPerCore: 2
|
|
||||||
LogicalCores: 4
|
|
||||||
Family 6 Model: 42
|
|
||||||
Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
|
|
||||||
Cacheline bytes: 64
|
|
||||||
We have Streaming SIMD Extensions
|
|
||||||
```
|
|
||||||
|
|
||||||
# private package
|
|
||||||
|
|
||||||
In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
|
|
||||||
|
|
||||||
For this purpose all exports are removed, and functions and constants are lowercased.
|
|
||||||
|
|
||||||
This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
|
|
||||||
|
|
||||||
# license
|
|
||||||
|
|
||||||
This code is published under an MIT license. See LICENSE file for more information.
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,42 +0,0 @@
|
||||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
|
||||||
|
|
||||||
// +build 386,!gccgo
|
|
||||||
|
|
||||||
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
|
||||||
TEXT ·asmCpuid(SB), 7, $0
|
|
||||||
XORL CX, CX
|
|
||||||
MOVL op+0(FP), AX
|
|
||||||
CPUID
|
|
||||||
MOVL AX, eax+4(FP)
|
|
||||||
MOVL BX, ebx+8(FP)
|
|
||||||
MOVL CX, ecx+12(FP)
|
|
||||||
MOVL DX, edx+16(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
|
||||||
TEXT ·asmCpuidex(SB), 7, $0
|
|
||||||
MOVL op+0(FP), AX
|
|
||||||
MOVL op2+4(FP), CX
|
|
||||||
CPUID
|
|
||||||
MOVL AX, eax+8(FP)
|
|
||||||
MOVL BX, ebx+12(FP)
|
|
||||||
MOVL CX, ecx+16(FP)
|
|
||||||
MOVL DX, edx+20(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func xgetbv(index uint32) (eax, edx uint32)
|
|
||||||
TEXT ·asmXgetbv(SB), 7, $0
|
|
||||||
MOVL index+0(FP), CX
|
|
||||||
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
|
|
||||||
MOVL AX, eax+4(FP)
|
|
||||||
MOVL DX, edx+8(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
|
|
||||||
TEXT ·asmRdtscpAsm(SB), 7, $0
|
|
||||||
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
|
|
||||||
MOVL AX, eax+0(FP)
|
|
||||||
MOVL BX, ebx+4(FP)
|
|
||||||
MOVL CX, ecx+8(FP)
|
|
||||||
MOVL DX, edx+12(FP)
|
|
||||||
RET
|
|
|
@ -1,42 +0,0 @@
|
||||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
|
||||||
|
|
||||||
//+build amd64,!gccgo
|
|
||||||
|
|
||||||
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
|
||||||
TEXT ·asmCpuid(SB), 7, $0
|
|
||||||
XORQ CX, CX
|
|
||||||
MOVL op+0(FP), AX
|
|
||||||
CPUID
|
|
||||||
MOVL AX, eax+8(FP)
|
|
||||||
MOVL BX, ebx+12(FP)
|
|
||||||
MOVL CX, ecx+16(FP)
|
|
||||||
MOVL DX, edx+20(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
|
||||||
TEXT ·asmCpuidex(SB), 7, $0
|
|
||||||
MOVL op+0(FP), AX
|
|
||||||
MOVL op2+4(FP), CX
|
|
||||||
CPUID
|
|
||||||
MOVL AX, eax+8(FP)
|
|
||||||
MOVL BX, ebx+12(FP)
|
|
||||||
MOVL CX, ecx+16(FP)
|
|
||||||
MOVL DX, edx+20(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func asmXgetbv(index uint32) (eax, edx uint32)
|
|
||||||
TEXT ·asmXgetbv(SB), 7, $0
|
|
||||||
MOVL index+0(FP), CX
|
|
||||||
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
|
|
||||||
MOVL AX, eax+8(FP)
|
|
||||||
MOVL DX, edx+12(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
|
|
||||||
TEXT ·asmRdtscpAsm(SB), 7, $0
|
|
||||||
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
|
|
||||||
MOVL AX, eax+0(FP)
|
|
||||||
MOVL BX, ebx+4(FP)
|
|
||||||
MOVL CX, ecx+8(FP)
|
|
||||||
MOVL DX, edx+12(FP)
|
|
||||||
RET
|
|
|
@ -1,17 +0,0 @@
|
||||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
|
||||||
|
|
||||||
// +build 386,!gccgo amd64,!gccgo
|
|
||||||
|
|
||||||
package cpuid
|
|
||||||
|
|
||||||
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
|
||||||
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
|
||||||
func asmXgetbv(index uint32) (eax, edx uint32)
|
|
||||||
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
|
|
||||||
|
|
||||||
func initCPU() {
|
|
||||||
cpuid = asmCpuid
|
|
||||||
cpuidex = asmCpuidex
|
|
||||||
xgetbv = asmXgetbv
|
|
||||||
rdtscpAsm = asmRdtscpAsm
|
|
||||||
}
|
|
|
@ -1,23 +0,0 @@
|
||||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
|
||||||
|
|
||||||
// +build !amd64,!386 gccgo
|
|
||||||
|
|
||||||
package cpuid
|
|
||||||
|
|
||||||
func initCPU() {
|
|
||||||
cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
|
|
||||||
return 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
|
|
||||||
return 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
xgetbv = func(index uint32) (eax, edx uint32) {
|
|
||||||
return 0, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
|
|
||||||
return 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,3 +0,0 @@
|
||||||
package cpuid
|
|
||||||
|
|
||||||
//go:generate go run private-gen.go
|
|
|
@ -1,24 +0,0 @@
|
||||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
|
||||||
*.o
|
|
||||||
*.a
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Folders
|
|
||||||
_obj
|
|
||||||
_test
|
|
||||||
|
|
||||||
# Architecture specific extensions/prefixes
|
|
||||||
*.[568vq]
|
|
||||||
[568vq].out
|
|
||||||
|
|
||||||
*.cgo1.go
|
|
||||||
*.cgo2.c
|
|
||||||
_cgo_defun.c
|
|
||||||
_cgo_gotypes.go
|
|
||||||
_cgo_export.*
|
|
||||||
|
|
||||||
_testmain.go
|
|
||||||
|
|
||||||
*.exe
|
|
||||||
*.test
|
|
||||||
*.prof
|
|
|
@ -1,13 +0,0 @@
|
||||||
language: go
|
|
||||||
|
|
||||||
go:
|
|
||||||
- 1.3
|
|
||||||
- 1.4
|
|
||||||
- 1.5
|
|
||||||
- 1.6
|
|
||||||
- 1.7
|
|
||||||
- tip
|
|
||||||
|
|
||||||
script:
|
|
||||||
- go test -v .
|
|
||||||
- go test -v -race .
|
|
|
@ -1,28 +0,0 @@
|
||||||
Copyright (c) 2012 The Go Authors. All rights reserved.
|
|
||||||
Copyright (c) 2015 Klaus Post
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
@ -1,87 +0,0 @@
|
||||||
# crc32
|
|
||||||
CRC32 hash with x64 optimizations
|
|
||||||
|
|
||||||
This package is a drop-in replacement for the standard library `hash/crc32` package, that features SSE 4.2 optimizations on x64 platforms, for a 10x speedup.
|
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/klauspost/crc32.svg?branch=master)](https://travis-ci.org/klauspost/crc32)
|
|
||||||
|
|
||||||
# usage
|
|
||||||
|
|
||||||
Install using `go get github.com/klauspost/crc32`. This library is based on Go 1.5 code and requires Go 1.3 or newer.
|
|
||||||
|
|
||||||
Replace `import "hash/crc32"` with `import "github.com/klauspost/crc32"` and you are good to go.
|
|
||||||
|
|
||||||
# changes
|
|
||||||
* Oct 20, 2016: Changes have been merged to upstream Go. Package updated to match.
|
|
||||||
* Dec 4, 2015: Uses the "slice-by-8" trick more extensively, which gives a 1.5 to 2.5x speedup if assembler is unavailable.
|
|
||||||
|
|
||||||
|
|
||||||
# performance
|
|
||||||
|
|
||||||
For *Go 1.7* performance is equivalent to the standard library. So if you use this package for Go 1.7 you can switch back.
|
|
||||||
|
|
||||||
|
|
||||||
For IEEE tables (the most common), there is approximately a factor 10 speedup with "CLMUL" (Carryless multiplication) instruction:
|
|
||||||
```
|
|
||||||
benchmark old ns/op new ns/op delta
|
|
||||||
BenchmarkCrc32KB 99955 10258 -89.74%
|
|
||||||
|
|
||||||
benchmark old MB/s new MB/s speedup
|
|
||||||
BenchmarkCrc32KB 327.83 3194.20 9.74x
|
|
||||||
```
|
|
||||||
|
|
||||||
For other tables and "CLMUL" capable machines the performance is the same as the standard library.
|
|
||||||
|
|
||||||
Here are some detailed benchmarks, comparing to go 1.5 standard library with and without assembler enabled.
|
|
||||||
|
|
||||||
```
|
|
||||||
Std: Standard Go 1.5 library
|
|
||||||
Crc: Indicates IEEE type CRC.
|
|
||||||
40B: Size of each slice encoded.
|
|
||||||
NoAsm: Assembler was disabled (ie. not an AMD64 or SSE 4.2+ capable machine).
|
|
||||||
Castagnoli: Castagnoli CRC type.
|
|
||||||
|
|
||||||
BenchmarkStdCrc40B-4 10000000 158 ns/op 252.88 MB/s
|
|
||||||
BenchmarkCrc40BNoAsm-4 20000000 105 ns/op 377.38 MB/s (slice8)
|
|
||||||
BenchmarkCrc40B-4 20000000 105 ns/op 378.77 MB/s (slice8)
|
|
||||||
|
|
||||||
BenchmarkStdCrc1KB-4 500000 3604 ns/op 284.10 MB/s
|
|
||||||
BenchmarkCrc1KBNoAsm-4 1000000 1463 ns/op 699.79 MB/s (slice8)
|
|
||||||
BenchmarkCrc1KB-4 3000000 396 ns/op 2583.69 MB/s (asm)
|
|
||||||
|
|
||||||
BenchmarkStdCrc8KB-4 200000 11417 ns/op 717.48 MB/s (slice8)
|
|
||||||
BenchmarkCrc8KBNoAsm-4 200000 11317 ns/op 723.85 MB/s (slice8)
|
|
||||||
BenchmarkCrc8KB-4 500000 2919 ns/op 2805.73 MB/s (asm)
|
|
||||||
|
|
||||||
BenchmarkStdCrc32KB-4 30000 45749 ns/op 716.24 MB/s (slice8)
|
|
||||||
BenchmarkCrc32KBNoAsm-4 30000 45109 ns/op 726.42 MB/s (slice8)
|
|
||||||
BenchmarkCrc32KB-4 100000 11497 ns/op 2850.09 MB/s (asm)
|
|
||||||
|
|
||||||
BenchmarkStdNoAsmCastagnol40B-4 10000000 161 ns/op 246.94 MB/s
|
|
||||||
BenchmarkStdCastagnoli40B-4 50000000 28.4 ns/op 1410.69 MB/s (asm)
|
|
||||||
BenchmarkCastagnoli40BNoAsm-4 20000000 100 ns/op 398.01 MB/s (slice8)
|
|
||||||
BenchmarkCastagnoli40B-4 50000000 28.2 ns/op 1419.54 MB/s (asm)
|
|
||||||
|
|
||||||
BenchmarkStdNoAsmCastagnoli1KB-4 500000 3622 ns/op 282.67 MB/s
|
|
||||||
BenchmarkStdCastagnoli1KB-4 10000000 144 ns/op 7099.78 MB/s (asm)
|
|
||||||
BenchmarkCastagnoli1KBNoAsm-4 1000000 1475 ns/op 694.14 MB/s (slice8)
|
|
||||||
BenchmarkCastagnoli1KB-4 10000000 146 ns/op 6993.35 MB/s (asm)
|
|
||||||
|
|
||||||
BenchmarkStdNoAsmCastagnoli8KB-4 50000 28781 ns/op 284.63 MB/s
|
|
||||||
BenchmarkStdCastagnoli8KB-4 1000000 1029 ns/op 7957.89 MB/s (asm)
|
|
||||||
BenchmarkCastagnoli8KBNoAsm-4 200000 11410 ns/op 717.94 MB/s (slice8)
|
|
||||||
BenchmarkCastagnoli8KB-4 1000000 1000 ns/op 8188.71 MB/s (asm)
|
|
||||||
|
|
||||||
BenchmarkStdNoAsmCastagnoli32KB-4 10000 115426 ns/op 283.89 MB/s
|
|
||||||
BenchmarkStdCastagnoli32KB-4 300000 4065 ns/op 8059.13 MB/s (asm)
|
|
||||||
BenchmarkCastagnoli32KBNoAsm-4 30000 45171 ns/op 725.41 MB/s (slice8)
|
|
||||||
BenchmarkCastagnoli32KB-4 500000 4077 ns/op 8035.89 MB/s (asm)
|
|
||||||
```
|
|
||||||
|
|
||||||
The IEEE assembler optimizations has been submitted and will be part of the Go 1.6 standard library.
|
|
||||||
|
|
||||||
However, the improved use of slice-by-8 has not, but will probably be submitted for Go 1.7.
|
|
||||||
|
|
||||||
# license
|
|
||||||
|
|
||||||
Standard Go license. Changes are Copyright (c) 2015 Klaus Post under same conditions.
|
|
|
@ -1,207 +0,0 @@
|
||||||
// Copyright 2009 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// Package crc32 implements the 32-bit cyclic redundancy check, or CRC-32,
|
|
||||||
// checksum. See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for
|
|
||||||
// information.
|
|
||||||
//
|
|
||||||
// Polynomials are represented in LSB-first form also known as reversed representation.
|
|
||||||
//
|
|
||||||
// See http://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks#Reversed_representations_and_reciprocal_polynomials
|
|
||||||
// for information.
|
|
||||||
package crc32
|
|
||||||
|
|
||||||
import (
|
|
||||||
"hash"
|
|
||||||
"sync"
|
|
||||||
)
|
|
||||||
|
|
||||||
// The size of a CRC-32 checksum in bytes.
|
|
||||||
const Size = 4
|
|
||||||
|
|
||||||
// Predefined polynomials.
|
|
||||||
const (
|
|
||||||
// IEEE is by far and away the most common CRC-32 polynomial.
|
|
||||||
// Used by ethernet (IEEE 802.3), v.42, fddi, gzip, zip, png, ...
|
|
||||||
IEEE = 0xedb88320
|
|
||||||
|
|
||||||
// Castagnoli's polynomial, used in iSCSI.
|
|
||||||
// Has better error detection characteristics than IEEE.
|
|
||||||
// http://dx.doi.org/10.1109/26.231911
|
|
||||||
Castagnoli = 0x82f63b78
|
|
||||||
|
|
||||||
// Koopman's polynomial.
|
|
||||||
// Also has better error detection characteristics than IEEE.
|
|
||||||
// http://dx.doi.org/10.1109/DSN.2002.1028931
|
|
||||||
Koopman = 0xeb31d82e
|
|
||||||
)
|
|
||||||
|
|
||||||
// Table is a 256-word table representing the polynomial for efficient processing.
|
|
||||||
type Table [256]uint32
|
|
||||||
|
|
||||||
// This file makes use of functions implemented in architecture-specific files.
|
|
||||||
// The interface that they implement is as follows:
|
|
||||||
//
|
|
||||||
// // archAvailableIEEE reports whether an architecture-specific CRC32-IEEE
|
|
||||||
// // algorithm is available.
|
|
||||||
// archAvailableIEEE() bool
|
|
||||||
//
|
|
||||||
// // archInitIEEE initializes the architecture-specific CRC3-IEEE algorithm.
|
|
||||||
// // It can only be called if archAvailableIEEE() returns true.
|
|
||||||
// archInitIEEE()
|
|
||||||
//
|
|
||||||
// // archUpdateIEEE updates the given CRC32-IEEE. It can only be called if
|
|
||||||
// // archInitIEEE() was previously called.
|
|
||||||
// archUpdateIEEE(crc uint32, p []byte) uint32
|
|
||||||
//
|
|
||||||
// // archAvailableCastagnoli reports whether an architecture-specific
|
|
||||||
// // CRC32-C algorithm is available.
|
|
||||||
// archAvailableCastagnoli() bool
|
|
||||||
//
|
|
||||||
// // archInitCastagnoli initializes the architecture-specific CRC32-C
|
|
||||||
// // algorithm. It can only be called if archAvailableCastagnoli() returns
|
|
||||||
// // true.
|
|
||||||
// archInitCastagnoli()
|
|
||||||
//
|
|
||||||
// // archUpdateCastagnoli updates the given CRC32-C. It can only be called
|
|
||||||
// // if archInitCastagnoli() was previously called.
|
|
||||||
// archUpdateCastagnoli(crc uint32, p []byte) uint32
|
|
||||||
|
|
||||||
// castagnoliTable points to a lazily initialized Table for the Castagnoli
|
|
||||||
// polynomial. MakeTable will always return this value when asked to make a
|
|
||||||
// Castagnoli table so we can compare against it to find when the caller is
|
|
||||||
// using this polynomial.
|
|
||||||
var castagnoliTable *Table
|
|
||||||
var castagnoliTable8 *slicing8Table
|
|
||||||
var castagnoliArchImpl bool
|
|
||||||
var updateCastagnoli func(crc uint32, p []byte) uint32
|
|
||||||
var castagnoliOnce sync.Once
|
|
||||||
|
|
||||||
func castagnoliInit() {
|
|
||||||
castagnoliTable = simpleMakeTable(Castagnoli)
|
|
||||||
castagnoliArchImpl = archAvailableCastagnoli()
|
|
||||||
|
|
||||||
if castagnoliArchImpl {
|
|
||||||
archInitCastagnoli()
|
|
||||||
updateCastagnoli = archUpdateCastagnoli
|
|
||||||
} else {
|
|
||||||
// Initialize the slicing-by-8 table.
|
|
||||||
castagnoliTable8 = slicingMakeTable(Castagnoli)
|
|
||||||
updateCastagnoli = func(crc uint32, p []byte) uint32 {
|
|
||||||
return slicingUpdate(crc, castagnoliTable8, p)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// IEEETable is the table for the IEEE polynomial.
|
|
||||||
var IEEETable = simpleMakeTable(IEEE)
|
|
||||||
|
|
||||||
// ieeeTable8 is the slicing8Table for IEEE
|
|
||||||
var ieeeTable8 *slicing8Table
|
|
||||||
var ieeeArchImpl bool
|
|
||||||
var updateIEEE func(crc uint32, p []byte) uint32
|
|
||||||
var ieeeOnce sync.Once
|
|
||||||
|
|
||||||
func ieeeInit() {
|
|
||||||
ieeeArchImpl = archAvailableIEEE()
|
|
||||||
|
|
||||||
if ieeeArchImpl {
|
|
||||||
archInitIEEE()
|
|
||||||
updateIEEE = archUpdateIEEE
|
|
||||||
} else {
|
|
||||||
// Initialize the slicing-by-8 table.
|
|
||||||
ieeeTable8 = slicingMakeTable(IEEE)
|
|
||||||
updateIEEE = func(crc uint32, p []byte) uint32 {
|
|
||||||
return slicingUpdate(crc, ieeeTable8, p)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MakeTable returns a Table constructed from the specified polynomial.
|
|
||||||
// The contents of this Table must not be modified.
|
|
||||||
func MakeTable(poly uint32) *Table {
|
|
||||||
switch poly {
|
|
||||||
case IEEE:
|
|
||||||
ieeeOnce.Do(ieeeInit)
|
|
||||||
return IEEETable
|
|
||||||
case Castagnoli:
|
|
||||||
castagnoliOnce.Do(castagnoliInit)
|
|
||||||
return castagnoliTable
|
|
||||||
}
|
|
||||||
return simpleMakeTable(poly)
|
|
||||||
}
|
|
||||||
|
|
||||||
// digest represents the partial evaluation of a checksum.
|
|
||||||
type digest struct {
|
|
||||||
crc uint32
|
|
||||||
tab *Table
|
|
||||||
}
|
|
||||||
|
|
||||||
// New creates a new hash.Hash32 computing the CRC-32 checksum
|
|
||||||
// using the polynomial represented by the Table.
|
|
||||||
// Its Sum method will lay the value out in big-endian byte order.
|
|
||||||
func New(tab *Table) hash.Hash32 {
|
|
||||||
if tab == IEEETable {
|
|
||||||
ieeeOnce.Do(ieeeInit)
|
|
||||||
}
|
|
||||||
return &digest{0, tab}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
|
|
||||||
// using the IEEE polynomial.
|
|
||||||
// Its Sum method will lay the value out in big-endian byte order.
|
|
||||||
func NewIEEE() hash.Hash32 { return New(IEEETable) }
|
|
||||||
|
|
||||||
func (d *digest) Size() int { return Size }
|
|
||||||
|
|
||||||
func (d *digest) BlockSize() int { return 1 }
|
|
||||||
|
|
||||||
func (d *digest) Reset() { d.crc = 0 }
|
|
||||||
|
|
||||||
// Update returns the result of adding the bytes in p to the crc.
|
|
||||||
func Update(crc uint32, tab *Table, p []byte) uint32 {
|
|
||||||
switch tab {
|
|
||||||
case castagnoliTable:
|
|
||||||
return updateCastagnoli(crc, p)
|
|
||||||
case IEEETable:
|
|
||||||
// Unfortunately, because IEEETable is exported, IEEE may be used without a
|
|
||||||
// call to MakeTable. We have to make sure it gets initialized in that case.
|
|
||||||
ieeeOnce.Do(ieeeInit)
|
|
||||||
return updateIEEE(crc, p)
|
|
||||||
default:
|
|
||||||
return simpleUpdate(crc, tab, p)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *digest) Write(p []byte) (n int, err error) {
|
|
||||||
switch d.tab {
|
|
||||||
case castagnoliTable:
|
|
||||||
d.crc = updateCastagnoli(d.crc, p)
|
|
||||||
case IEEETable:
|
|
||||||
// We only create digest objects through New() which takes care of
|
|
||||||
// initialization in this case.
|
|
||||||
d.crc = updateIEEE(d.crc, p)
|
|
||||||
default:
|
|
||||||
d.crc = simpleUpdate(d.crc, d.tab, p)
|
|
||||||
}
|
|
||||||
return len(p), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *digest) Sum32() uint32 { return d.crc }
|
|
||||||
|
|
||||||
func (d *digest) Sum(in []byte) []byte {
|
|
||||||
s := d.Sum32()
|
|
||||||
return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checksum returns the CRC-32 checksum of data
|
|
||||||
// using the polynomial represented by the Table.
|
|
||||||
func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
|
|
||||||
|
|
||||||
// ChecksumIEEE returns the CRC-32 checksum of data
|
|
||||||
// using the IEEE polynomial.
|
|
||||||
func ChecksumIEEE(data []byte) uint32 {
|
|
||||||
ieeeOnce.Do(ieeeInit)
|
|
||||||
return updateIEEE(0, data)
|
|
||||||
}
|
|
|
@ -1,230 +0,0 @@
|
||||||
// Copyright 2011 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build !appengine,!gccgo
|
|
||||||
|
|
||||||
// AMD64-specific hardware-assisted CRC32 algorithms. See crc32.go for a
|
|
||||||
// description of the interface that each architecture-specific file
|
|
||||||
// implements.
|
|
||||||
|
|
||||||
package crc32
|
|
||||||
|
|
||||||
import "unsafe"
|
|
||||||
|
|
||||||
// This file contains the code to call the SSE 4.2 version of the Castagnoli
|
|
||||||
// and IEEE CRC.
|
|
||||||
|
|
||||||
// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use
|
|
||||||
// CPUID to test for SSE 4.1, 4.2 and CLMUL support.
|
|
||||||
func haveSSE41() bool
|
|
||||||
func haveSSE42() bool
|
|
||||||
func haveCLMUL() bool
|
|
||||||
|
|
||||||
// castagnoliSSE42 is defined in crc32_amd64.s and uses the SSE4.2 CRC32
|
|
||||||
// instruction.
|
|
||||||
//go:noescape
|
|
||||||
func castagnoliSSE42(crc uint32, p []byte) uint32
|
|
||||||
|
|
||||||
// castagnoliSSE42Triple is defined in crc32_amd64.s and uses the SSE4.2 CRC32
|
|
||||||
// instruction.
|
|
||||||
//go:noescape
|
|
||||||
func castagnoliSSE42Triple(
|
|
||||||
crcA, crcB, crcC uint32,
|
|
||||||
a, b, c []byte,
|
|
||||||
rounds uint32,
|
|
||||||
) (retA uint32, retB uint32, retC uint32)
|
|
||||||
|
|
||||||
// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ
|
|
||||||
// instruction as well as SSE 4.1.
|
|
||||||
//go:noescape
|
|
||||||
func ieeeCLMUL(crc uint32, p []byte) uint32
|
|
||||||
|
|
||||||
var sse42 = haveSSE42()
|
|
||||||
var useFastIEEE = haveCLMUL() && haveSSE41()
|
|
||||||
|
|
||||||
const castagnoliK1 = 168
|
|
||||||
const castagnoliK2 = 1344
|
|
||||||
|
|
||||||
type sse42Table [4]Table
|
|
||||||
|
|
||||||
var castagnoliSSE42TableK1 *sse42Table
|
|
||||||
var castagnoliSSE42TableK2 *sse42Table
|
|
||||||
|
|
||||||
func archAvailableCastagnoli() bool {
|
|
||||||
return sse42
|
|
||||||
}
|
|
||||||
|
|
||||||
func archInitCastagnoli() {
|
|
||||||
if !sse42 {
|
|
||||||
panic("arch-specific Castagnoli not available")
|
|
||||||
}
|
|
||||||
castagnoliSSE42TableK1 = new(sse42Table)
|
|
||||||
castagnoliSSE42TableK2 = new(sse42Table)
|
|
||||||
// See description in updateCastagnoli.
|
|
||||||
// t[0][i] = CRC(i000, O)
|
|
||||||
// t[1][i] = CRC(0i00, O)
|
|
||||||
// t[2][i] = CRC(00i0, O)
|
|
||||||
// t[3][i] = CRC(000i, O)
|
|
||||||
// where O is a sequence of K zeros.
|
|
||||||
var tmp [castagnoliK2]byte
|
|
||||||
for b := 0; b < 4; b++ {
|
|
||||||
for i := 0; i < 256; i++ {
|
|
||||||
val := uint32(i) << uint32(b*8)
|
|
||||||
castagnoliSSE42TableK1[b][i] = castagnoliSSE42(val, tmp[:castagnoliK1])
|
|
||||||
castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// castagnoliShift computes the CRC32-C of K1 or K2 zeroes (depending on the
|
|
||||||
// table given) with the given initial crc value. This corresponds to
|
|
||||||
// CRC(crc, O) in the description in updateCastagnoli.
|
|
||||||
func castagnoliShift(table *sse42Table, crc uint32) uint32 {
|
|
||||||
return table[3][crc>>24] ^
|
|
||||||
table[2][(crc>>16)&0xFF] ^
|
|
||||||
table[1][(crc>>8)&0xFF] ^
|
|
||||||
table[0][crc&0xFF]
|
|
||||||
}
|
|
||||||
|
|
||||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
|
|
||||||
if !sse42 {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
|
|
||||||
// This method is inspired from the algorithm in Intel's white paper:
|
|
||||||
// "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
|
|
||||||
// The same strategy of splitting the buffer in three is used but the
|
|
||||||
// combining calculation is different; the complete derivation is explained
|
|
||||||
// below.
|
|
||||||
//
|
|
||||||
// -- The basic idea --
|
|
||||||
//
|
|
||||||
// The CRC32 instruction (available in SSE4.2) can process 8 bytes at a
|
|
||||||
// time. In recent Intel architectures the instruction takes 3 cycles;
|
|
||||||
// however the processor can pipeline up to three instructions if they
|
|
||||||
// don't depend on each other.
|
|
||||||
//
|
|
||||||
// Roughly this means that we can process three buffers in about the same
|
|
||||||
// time we can process one buffer.
|
|
||||||
//
|
|
||||||
// The idea is then to split the buffer in three, CRC the three pieces
|
|
||||||
// separately and then combine the results.
|
|
||||||
//
|
|
||||||
// Combining the results requires precomputed tables, so we must choose a
|
|
||||||
// fixed buffer length to optimize. The longer the length, the faster; but
|
|
||||||
// only buffers longer than this length will use the optimization. We choose
|
|
||||||
// two cutoffs and compute tables for both:
|
|
||||||
// - one around 512: 168*3=504
|
|
||||||
// - one around 4KB: 1344*3=4032
|
|
||||||
//
|
|
||||||
// -- The nitty gritty --
|
|
||||||
//
|
|
||||||
// Let CRC(I, X) be the non-inverted CRC32-C of the sequence X (with
|
|
||||||
// initial non-inverted CRC I). This function has the following properties:
|
|
||||||
// (a) CRC(I, AB) = CRC(CRC(I, A), B)
|
|
||||||
// (b) CRC(I, A xor B) = CRC(I, A) xor CRC(0, B)
|
|
||||||
//
|
|
||||||
// Say we want to compute CRC(I, ABC) where A, B, C are three sequences of
|
|
||||||
// K bytes each, where K is a fixed constant. Let O be the sequence of K zero
|
|
||||||
// bytes.
|
|
||||||
//
|
|
||||||
// CRC(I, ABC) = CRC(I, ABO xor C)
|
|
||||||
// = CRC(I, ABO) xor CRC(0, C)
|
|
||||||
// = CRC(CRC(I, AB), O) xor CRC(0, C)
|
|
||||||
// = CRC(CRC(I, AO xor B), O) xor CRC(0, C)
|
|
||||||
// = CRC(CRC(I, AO) xor CRC(0, B), O) xor CRC(0, C)
|
|
||||||
// = CRC(CRC(CRC(I, A), O) xor CRC(0, B), O) xor CRC(0, C)
|
|
||||||
//
|
|
||||||
// The castagnoliSSE42Triple function can compute CRC(I, A), CRC(0, B),
|
|
||||||
// and CRC(0, C) efficiently. We just need to find a way to quickly compute
|
|
||||||
// CRC(uvwx, O) given a 4-byte initial value uvwx. We can precompute these
|
|
||||||
// values; since we can't have a 32-bit table, we break it up into four
|
|
||||||
// 8-bit tables:
|
|
||||||
//
|
|
||||||
// CRC(uvwx, O) = CRC(u000, O) xor
|
|
||||||
// CRC(0v00, O) xor
|
|
||||||
// CRC(00w0, O) xor
|
|
||||||
// CRC(000x, O)
|
|
||||||
//
|
|
||||||
// We can compute tables corresponding to the four terms for all 8-bit
|
|
||||||
// values.
|
|
||||||
|
|
||||||
crc = ^crc
|
|
||||||
|
|
||||||
// If a buffer is long enough to use the optimization, process the first few
|
|
||||||
// bytes to align the buffer to an 8 byte boundary (if necessary).
|
|
||||||
if len(p) >= castagnoliK1*3 {
|
|
||||||
delta := int(uintptr(unsafe.Pointer(&p[0])) & 7)
|
|
||||||
if delta != 0 {
|
|
||||||
delta = 8 - delta
|
|
||||||
crc = castagnoliSSE42(crc, p[:delta])
|
|
||||||
p = p[delta:]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process 3*K2 at a time.
|
|
||||||
for len(p) >= castagnoliK2*3 {
|
|
||||||
// Compute CRC(I, A), CRC(0, B), and CRC(0, C).
|
|
||||||
crcA, crcB, crcC := castagnoliSSE42Triple(
|
|
||||||
crc, 0, 0,
|
|
||||||
p, p[castagnoliK2:], p[castagnoliK2*2:],
|
|
||||||
castagnoliK2/24)
|
|
||||||
|
|
||||||
// CRC(I, AB) = CRC(CRC(I, A), O) xor CRC(0, B)
|
|
||||||
crcAB := castagnoliShift(castagnoliSSE42TableK2, crcA) ^ crcB
|
|
||||||
// CRC(I, ABC) = CRC(CRC(I, AB), O) xor CRC(0, C)
|
|
||||||
crc = castagnoliShift(castagnoliSSE42TableK2, crcAB) ^ crcC
|
|
||||||
p = p[castagnoliK2*3:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process 3*K1 at a time.
|
|
||||||
for len(p) >= castagnoliK1*3 {
|
|
||||||
// Compute CRC(I, A), CRC(0, B), and CRC(0, C).
|
|
||||||
crcA, crcB, crcC := castagnoliSSE42Triple(
|
|
||||||
crc, 0, 0,
|
|
||||||
p, p[castagnoliK1:], p[castagnoliK1*2:],
|
|
||||||
castagnoliK1/24)
|
|
||||||
|
|
||||||
// CRC(I, AB) = CRC(CRC(I, A), O) xor CRC(0, B)
|
|
||||||
crcAB := castagnoliShift(castagnoliSSE42TableK1, crcA) ^ crcB
|
|
||||||
// CRC(I, ABC) = CRC(CRC(I, AB), O) xor CRC(0, C)
|
|
||||||
crc = castagnoliShift(castagnoliSSE42TableK1, crcAB) ^ crcC
|
|
||||||
p = p[castagnoliK1*3:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use the simple implementation for what's left.
|
|
||||||
crc = castagnoliSSE42(crc, p)
|
|
||||||
return ^crc
|
|
||||||
}
|
|
||||||
|
|
||||||
func archAvailableIEEE() bool {
|
|
||||||
return useFastIEEE
|
|
||||||
}
|
|
||||||
|
|
||||||
var archIeeeTable8 *slicing8Table
|
|
||||||
|
|
||||||
func archInitIEEE() {
|
|
||||||
if !useFastIEEE {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
// We still use slicing-by-8 for small buffers.
|
|
||||||
archIeeeTable8 = slicingMakeTable(IEEE)
|
|
||||||
}
|
|
||||||
|
|
||||||
func archUpdateIEEE(crc uint32, p []byte) uint32 {
|
|
||||||
if !useFastIEEE {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(p) >= 64 {
|
|
||||||
left := len(p) & 15
|
|
||||||
do := len(p) - left
|
|
||||||
crc = ^ieeeCLMUL(^crc, p[:do])
|
|
||||||
p = p[do:]
|
|
||||||
}
|
|
||||||
if len(p) == 0 {
|
|
||||||
return crc
|
|
||||||
}
|
|
||||||
return slicingUpdate(crc, archIeeeTable8, p)
|
|
||||||
}
|
|
|
@ -1,319 +0,0 @@
|
||||||
// Copyright 2011 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#define NOSPLIT 4
|
|
||||||
#define RODATA 8
|
|
||||||
|
|
||||||
// castagnoliSSE42 updates the (non-inverted) crc with the given buffer.
|
|
||||||
//
|
|
||||||
// func castagnoliSSE42(crc uint32, p []byte) uint32
|
|
||||||
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
|
|
||||||
MOVL crc+0(FP), AX // CRC value
|
|
||||||
MOVQ p+8(FP), SI // data pointer
|
|
||||||
MOVQ p_len+16(FP), CX // len(p)
|
|
||||||
|
|
||||||
// If there are fewer than 8 bytes to process, skip alignment.
|
|
||||||
CMPQ CX, $8
|
|
||||||
JL less_than_8
|
|
||||||
|
|
||||||
MOVQ SI, BX
|
|
||||||
ANDQ $7, BX
|
|
||||||
JZ aligned
|
|
||||||
|
|
||||||
// Process the first few bytes to 8-byte align the input.
|
|
||||||
|
|
||||||
// BX = 8 - BX. We need to process this many bytes to align.
|
|
||||||
SUBQ $1, BX
|
|
||||||
XORQ $7, BX
|
|
||||||
|
|
||||||
BTQ $0, BX
|
|
||||||
JNC align_2
|
|
||||||
|
|
||||||
CRC32B (SI), AX
|
|
||||||
DECQ CX
|
|
||||||
INCQ SI
|
|
||||||
|
|
||||||
align_2:
|
|
||||||
BTQ $1, BX
|
|
||||||
JNC align_4
|
|
||||||
|
|
||||||
// CRC32W (SI), AX
|
|
||||||
BYTE $0x66; BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
|
|
||||||
|
|
||||||
SUBQ $2, CX
|
|
||||||
ADDQ $2, SI
|
|
||||||
|
|
||||||
align_4:
|
|
||||||
BTQ $2, BX
|
|
||||||
JNC aligned
|
|
||||||
|
|
||||||
// CRC32L (SI), AX
|
|
||||||
BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
|
|
||||||
|
|
||||||
SUBQ $4, CX
|
|
||||||
ADDQ $4, SI
|
|
||||||
|
|
||||||
aligned:
|
|
||||||
// The input is now 8-byte aligned and we can process 8-byte chunks.
|
|
||||||
CMPQ CX, $8
|
|
||||||
JL less_than_8
|
|
||||||
|
|
||||||
CRC32Q (SI), AX
|
|
||||||
ADDQ $8, SI
|
|
||||||
SUBQ $8, CX
|
|
||||||
JMP aligned
|
|
||||||
|
|
||||||
less_than_8:
|
|
||||||
// We may have some bytes left over; process 4 bytes, then 2, then 1.
|
|
||||||
BTQ $2, CX
|
|
||||||
JNC less_than_4
|
|
||||||
|
|
||||||
// CRC32L (SI), AX
|
|
||||||
BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
|
|
||||||
ADDQ $4, SI
|
|
||||||
|
|
||||||
less_than_4:
|
|
||||||
BTQ $1, CX
|
|
||||||
JNC less_than_2
|
|
||||||
|
|
||||||
// CRC32W (SI), AX
|
|
||||||
BYTE $0x66; BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
|
|
||||||
ADDQ $2, SI
|
|
||||||
|
|
||||||
less_than_2:
|
|
||||||
BTQ $0, CX
|
|
||||||
JNC done
|
|
||||||
|
|
||||||
CRC32B (SI), AX
|
|
||||||
|
|
||||||
done:
|
|
||||||
MOVL AX, ret+32(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// castagnoliSSE42Triple updates three (non-inverted) crcs with (24*rounds)
|
|
||||||
// bytes from each buffer.
|
|
||||||
//
|
|
||||||
// func castagnoliSSE42Triple(
|
|
||||||
// crc1, crc2, crc3 uint32,
|
|
||||||
// a, b, c []byte,
|
|
||||||
// rounds uint32,
|
|
||||||
// ) (retA uint32, retB uint32, retC uint32)
|
|
||||||
TEXT ·castagnoliSSE42Triple(SB), NOSPLIT, $0
|
|
||||||
MOVL crcA+0(FP), AX
|
|
||||||
MOVL crcB+4(FP), CX
|
|
||||||
MOVL crcC+8(FP), DX
|
|
||||||
|
|
||||||
MOVQ a+16(FP), R8 // data pointer
|
|
||||||
MOVQ b+40(FP), R9 // data pointer
|
|
||||||
MOVQ c+64(FP), R10 // data pointer
|
|
||||||
|
|
||||||
MOVL rounds+88(FP), R11
|
|
||||||
|
|
||||||
loop:
|
|
||||||
CRC32Q (R8), AX
|
|
||||||
CRC32Q (R9), CX
|
|
||||||
CRC32Q (R10), DX
|
|
||||||
|
|
||||||
CRC32Q 8(R8), AX
|
|
||||||
CRC32Q 8(R9), CX
|
|
||||||
CRC32Q 8(R10), DX
|
|
||||||
|
|
||||||
CRC32Q 16(R8), AX
|
|
||||||
CRC32Q 16(R9), CX
|
|
||||||
CRC32Q 16(R10), DX
|
|
||||||
|
|
||||||
ADDQ $24, R8
|
|
||||||
ADDQ $24, R9
|
|
||||||
ADDQ $24, R10
|
|
||||||
|
|
||||||
DECQ R11
|
|
||||||
JNZ loop
|
|
||||||
|
|
||||||
MOVL AX, retA+96(FP)
|
|
||||||
MOVL CX, retB+100(FP)
|
|
||||||
MOVL DX, retC+104(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func haveSSE42() bool
|
|
||||||
TEXT ·haveSSE42(SB), NOSPLIT, $0
|
|
||||||
XORQ AX, AX
|
|
||||||
INCL AX
|
|
||||||
CPUID
|
|
||||||
SHRQ $20, CX
|
|
||||||
ANDQ $1, CX
|
|
||||||
MOVB CX, ret+0(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func haveCLMUL() bool
|
|
||||||
TEXT ·haveCLMUL(SB), NOSPLIT, $0
|
|
||||||
XORQ AX, AX
|
|
||||||
INCL AX
|
|
||||||
CPUID
|
|
||||||
SHRQ $1, CX
|
|
||||||
ANDQ $1, CX
|
|
||||||
MOVB CX, ret+0(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func haveSSE41() bool
|
|
||||||
TEXT ·haveSSE41(SB), NOSPLIT, $0
|
|
||||||
XORQ AX, AX
|
|
||||||
INCL AX
|
|
||||||
CPUID
|
|
||||||
SHRQ $19, CX
|
|
||||||
ANDQ $1, CX
|
|
||||||
MOVB CX, ret+0(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// CRC32 polynomial data
|
|
||||||
//
|
|
||||||
// These constants are lifted from the
|
|
||||||
// Linux kernel, since they avoid the costly
|
|
||||||
// PSHUFB 16 byte reversal proposed in the
|
|
||||||
// original Intel paper.
|
|
||||||
DATA r2r1kp<>+0(SB)/8, $0x154442bd4
|
|
||||||
DATA r2r1kp<>+8(SB)/8, $0x1c6e41596
|
|
||||||
DATA r4r3kp<>+0(SB)/8, $0x1751997d0
|
|
||||||
DATA r4r3kp<>+8(SB)/8, $0x0ccaa009e
|
|
||||||
DATA rupolykp<>+0(SB)/8, $0x1db710641
|
|
||||||
DATA rupolykp<>+8(SB)/8, $0x1f7011641
|
|
||||||
DATA r5kp<>+0(SB)/8, $0x163cd6124
|
|
||||||
|
|
||||||
GLOBL r2r1kp<>(SB), RODATA, $16
|
|
||||||
GLOBL r4r3kp<>(SB), RODATA, $16
|
|
||||||
GLOBL rupolykp<>(SB), RODATA, $16
|
|
||||||
GLOBL r5kp<>(SB), RODATA, $8
|
|
||||||
|
|
||||||
// Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
|
||||||
// len(p) must be at least 64, and must be a multiple of 16.
|
|
||||||
|
|
||||||
// func ieeeCLMUL(crc uint32, p []byte) uint32
|
|
||||||
TEXT ·ieeeCLMUL(SB), NOSPLIT, $0
|
|
||||||
MOVL crc+0(FP), X0 // Initial CRC value
|
|
||||||
MOVQ p+8(FP), SI // data pointer
|
|
||||||
MOVQ p_len+16(FP), CX // len(p)
|
|
||||||
|
|
||||||
MOVOU (SI), X1
|
|
||||||
MOVOU 16(SI), X2
|
|
||||||
MOVOU 32(SI), X3
|
|
||||||
MOVOU 48(SI), X4
|
|
||||||
PXOR X0, X1
|
|
||||||
ADDQ $64, SI // buf+=64
|
|
||||||
SUBQ $64, CX // len-=64
|
|
||||||
CMPQ CX, $64 // Less than 64 bytes left
|
|
||||||
JB remain64
|
|
||||||
|
|
||||||
MOVOA r2r1kp<>+0(SB), X0
|
|
||||||
|
|
||||||
loopback64:
|
|
||||||
MOVOA X1, X5
|
|
||||||
MOVOA X2, X6
|
|
||||||
MOVOA X3, X7
|
|
||||||
MOVOA X4, X8
|
|
||||||
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PCLMULQDQ $0, X0, X2
|
|
||||||
PCLMULQDQ $0, X0, X3
|
|
||||||
PCLMULQDQ $0, X0, X4
|
|
||||||
|
|
||||||
// Load next early
|
|
||||||
MOVOU (SI), X11
|
|
||||||
MOVOU 16(SI), X12
|
|
||||||
MOVOU 32(SI), X13
|
|
||||||
MOVOU 48(SI), X14
|
|
||||||
|
|
||||||
PCLMULQDQ $0x11, X0, X5
|
|
||||||
PCLMULQDQ $0x11, X0, X6
|
|
||||||
PCLMULQDQ $0x11, X0, X7
|
|
||||||
PCLMULQDQ $0x11, X0, X8
|
|
||||||
|
|
||||||
PXOR X5, X1
|
|
||||||
PXOR X6, X2
|
|
||||||
PXOR X7, X3
|
|
||||||
PXOR X8, X4
|
|
||||||
|
|
||||||
PXOR X11, X1
|
|
||||||
PXOR X12, X2
|
|
||||||
PXOR X13, X3
|
|
||||||
PXOR X14, X4
|
|
||||||
|
|
||||||
ADDQ $0x40, DI
|
|
||||||
ADDQ $64, SI // buf+=64
|
|
||||||
SUBQ $64, CX // len-=64
|
|
||||||
CMPQ CX, $64 // Less than 64 bytes left?
|
|
||||||
JGE loopback64
|
|
||||||
|
|
||||||
// Fold result into a single register (X1)
|
|
||||||
remain64:
|
|
||||||
MOVOA r4r3kp<>+0(SB), X0
|
|
||||||
|
|
||||||
MOVOA X1, X5
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PCLMULQDQ $0x11, X0, X5
|
|
||||||
PXOR X5, X1
|
|
||||||
PXOR X2, X1
|
|
||||||
|
|
||||||
MOVOA X1, X5
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PCLMULQDQ $0x11, X0, X5
|
|
||||||
PXOR X5, X1
|
|
||||||
PXOR X3, X1
|
|
||||||
|
|
||||||
MOVOA X1, X5
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PCLMULQDQ $0x11, X0, X5
|
|
||||||
PXOR X5, X1
|
|
||||||
PXOR X4, X1
|
|
||||||
|
|
||||||
// If there is less than 16 bytes left we are done
|
|
||||||
CMPQ CX, $16
|
|
||||||
JB finish
|
|
||||||
|
|
||||||
// Encode 16 bytes
|
|
||||||
remain16:
|
|
||||||
MOVOU (SI), X10
|
|
||||||
MOVOA X1, X5
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PCLMULQDQ $0x11, X0, X5
|
|
||||||
PXOR X5, X1
|
|
||||||
PXOR X10, X1
|
|
||||||
SUBQ $16, CX
|
|
||||||
ADDQ $16, SI
|
|
||||||
CMPQ CX, $16
|
|
||||||
JGE remain16
|
|
||||||
|
|
||||||
finish:
|
|
||||||
// Fold final result into 32 bits and return it
|
|
||||||
PCMPEQB X3, X3
|
|
||||||
PCLMULQDQ $1, X1, X0
|
|
||||||
PSRLDQ $8, X1
|
|
||||||
PXOR X0, X1
|
|
||||||
|
|
||||||
MOVOA X1, X2
|
|
||||||
MOVQ r5kp<>+0(SB), X0
|
|
||||||
|
|
||||||
// Creates 32 bit mask. Note that we don't care about upper half.
|
|
||||||
PSRLQ $32, X3
|
|
||||||
|
|
||||||
PSRLDQ $4, X2
|
|
||||||
PAND X3, X1
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PXOR X2, X1
|
|
||||||
|
|
||||||
MOVOA rupolykp<>+0(SB), X0
|
|
||||||
|
|
||||||
MOVOA X1, X2
|
|
||||||
PAND X3, X1
|
|
||||||
PCLMULQDQ $0x10, X0, X1
|
|
||||||
PAND X3, X1
|
|
||||||
PCLMULQDQ $0, X0, X1
|
|
||||||
PXOR X2, X1
|
|
||||||
|
|
||||||
// PEXTRD $1, X1, AX (SSE 4.1)
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x3a
|
|
||||||
BYTE $0x16; BYTE $0xc8; BYTE $0x01
|
|
||||||
MOVL AX, ret+32(FP)
|
|
||||||
|
|
||||||
RET
|
|
|
@ -1,43 +0,0 @@
|
||||||
// Copyright 2011 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build !appengine,!gccgo
|
|
||||||
|
|
||||||
package crc32
|
|
||||||
|
|
||||||
// This file contains the code to call the SSE 4.2 version of the Castagnoli
|
|
||||||
// CRC.
|
|
||||||
|
|
||||||
// haveSSE42 is defined in crc32_amd64p32.s and uses CPUID to test for SSE 4.2
|
|
||||||
// support.
|
|
||||||
func haveSSE42() bool
|
|
||||||
|
|
||||||
// castagnoliSSE42 is defined in crc32_amd64p32.s and uses the SSE4.2 CRC32
|
|
||||||
// instruction.
|
|
||||||
//go:noescape
|
|
||||||
func castagnoliSSE42(crc uint32, p []byte) uint32
|
|
||||||
|
|
||||||
var sse42 = haveSSE42()
|
|
||||||
|
|
||||||
func archAvailableCastagnoli() bool {
|
|
||||||
return sse42
|
|
||||||
}
|
|
||||||
|
|
||||||
func archInitCastagnoli() {
|
|
||||||
if !sse42 {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
// No initialization necessary.
|
|
||||||
}
|
|
||||||
|
|
||||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
|
|
||||||
if !sse42 {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
return castagnoliSSE42(crc, p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func archAvailableIEEE() bool { return false }
|
|
||||||
func archInitIEEE() { panic("not available") }
|
|
||||||
func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
|
|
|
@ -1,67 +0,0 @@
|
||||||
// Copyright 2011 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#define NOSPLIT 4
|
|
||||||
#define RODATA 8
|
|
||||||
|
|
||||||
// func castagnoliSSE42(crc uint32, p []byte) uint32
|
|
||||||
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
|
|
||||||
MOVL crc+0(FP), AX // CRC value
|
|
||||||
MOVL p+4(FP), SI // data pointer
|
|
||||||
MOVL p_len+8(FP), CX // len(p)
|
|
||||||
|
|
||||||
NOTL AX
|
|
||||||
|
|
||||||
// If there's less than 8 bytes to process, we do it byte-by-byte.
|
|
||||||
CMPQ CX, $8
|
|
||||||
JL cleanup
|
|
||||||
|
|
||||||
// Process individual bytes until the input is 8-byte aligned.
|
|
||||||
startup:
|
|
||||||
MOVQ SI, BX
|
|
||||||
ANDQ $7, BX
|
|
||||||
JZ aligned
|
|
||||||
|
|
||||||
CRC32B (SI), AX
|
|
||||||
DECQ CX
|
|
||||||
INCQ SI
|
|
||||||
JMP startup
|
|
||||||
|
|
||||||
aligned:
|
|
||||||
// The input is now 8-byte aligned and we can process 8-byte chunks.
|
|
||||||
CMPQ CX, $8
|
|
||||||
JL cleanup
|
|
||||||
|
|
||||||
CRC32Q (SI), AX
|
|
||||||
ADDQ $8, SI
|
|
||||||
SUBQ $8, CX
|
|
||||||
JMP aligned
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
// We may have some bytes left over that we process one at a time.
|
|
||||||
CMPQ CX, $0
|
|
||||||
JE done
|
|
||||||
|
|
||||||
CRC32B (SI), AX
|
|
||||||
INCQ SI
|
|
||||||
DECQ CX
|
|
||||||
JMP cleanup
|
|
||||||
|
|
||||||
done:
|
|
||||||
NOTL AX
|
|
||||||
MOVL AX, ret+16(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
// func haveSSE42() bool
|
|
||||||
TEXT ·haveSSE42(SB), NOSPLIT, $0
|
|
||||||
XORQ AX, AX
|
|
||||||
INCL AX
|
|
||||||
CPUID
|
|
||||||
SHRQ $20, CX
|
|
||||||
ANDQ $1, CX
|
|
||||||
MOVB CX, ret+0(FP)
|
|
||||||
RET
|
|
||||||
|
|
|
@ -1,89 +0,0 @@
|
||||||
// Copyright 2011 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// This file contains CRC32 algorithms that are not specific to any architecture
|
|
||||||
// and don't use hardware acceleration.
|
|
||||||
//
|
|
||||||
// The simple (and slow) CRC32 implementation only uses a 256*4 bytes table.
|
|
||||||
//
|
|
||||||
// The slicing-by-8 algorithm is a faster implementation that uses a bigger
|
|
||||||
// table (8*256*4 bytes).
|
|
||||||
|
|
||||||
package crc32
|
|
||||||
|
|
||||||
// simpleMakeTable allocates and constructs a Table for the specified
|
|
||||||
// polynomial. The table is suitable for use with the simple algorithm
|
|
||||||
// (simpleUpdate).
|
|
||||||
func simpleMakeTable(poly uint32) *Table {
|
|
||||||
t := new(Table)
|
|
||||||
simplePopulateTable(poly, t)
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
// simplePopulateTable constructs a Table for the specified polynomial, suitable
|
|
||||||
// for use with simpleUpdate.
|
|
||||||
func simplePopulateTable(poly uint32, t *Table) {
|
|
||||||
for i := 0; i < 256; i++ {
|
|
||||||
crc := uint32(i)
|
|
||||||
for j := 0; j < 8; j++ {
|
|
||||||
if crc&1 == 1 {
|
|
||||||
crc = (crc >> 1) ^ poly
|
|
||||||
} else {
|
|
||||||
crc >>= 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t[i] = crc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// simpleUpdate uses the simple algorithm to update the CRC, given a table that
|
|
||||||
// was previously computed using simpleMakeTable.
|
|
||||||
func simpleUpdate(crc uint32, tab *Table, p []byte) uint32 {
|
|
||||||
crc = ^crc
|
|
||||||
for _, v := range p {
|
|
||||||
crc = tab[byte(crc)^v] ^ (crc >> 8)
|
|
||||||
}
|
|
||||||
return ^crc
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use slicing-by-8 when payload >= this value.
|
|
||||||
const slicing8Cutoff = 16
|
|
||||||
|
|
||||||
// slicing8Table is array of 8 Tables, used by the slicing-by-8 algorithm.
|
|
||||||
type slicing8Table [8]Table
|
|
||||||
|
|
||||||
// slicingMakeTable constructs a slicing8Table for the specified polynomial. The
|
|
||||||
// table is suitable for use with the slicing-by-8 algorithm (slicingUpdate).
|
|
||||||
func slicingMakeTable(poly uint32) *slicing8Table {
|
|
||||||
t := new(slicing8Table)
|
|
||||||
simplePopulateTable(poly, &t[0])
|
|
||||||
for i := 0; i < 256; i++ {
|
|
||||||
crc := t[0][i]
|
|
||||||
for j := 1; j < 8; j++ {
|
|
||||||
crc = t[0][crc&0xFF] ^ (crc >> 8)
|
|
||||||
t[j][i] = crc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
// slicingUpdate uses the slicing-by-8 algorithm to update the CRC, given a
|
|
||||||
// table that was previously computed using slicingMakeTable.
|
|
||||||
func slicingUpdate(crc uint32, tab *slicing8Table, p []byte) uint32 {
|
|
||||||
if len(p) >= slicing8Cutoff {
|
|
||||||
crc = ^crc
|
|
||||||
for len(p) > 8 {
|
|
||||||
crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
|
|
||||||
crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
|
|
||||||
tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
|
|
||||||
tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
|
|
||||||
p = p[8:]
|
|
||||||
}
|
|
||||||
crc = ^crc
|
|
||||||
}
|
|
||||||
if len(p) == 0 {
|
|
||||||
return crc
|
|
||||||
}
|
|
||||||
return simpleUpdate(crc, &tab[0], p)
|
|
||||||
}
|
|
|
@ -1,15 +0,0 @@
|
||||||
// Copyright 2011 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build !amd64,!amd64p32,!s390x
|
|
||||||
|
|
||||||
package crc32
|
|
||||||
|
|
||||||
func archAvailableIEEE() bool { return false }
|
|
||||||
func archInitIEEE() { panic("not available") }
|
|
||||||
func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
|
|
||||||
|
|
||||||
func archAvailableCastagnoli() bool { return false }
|
|
||||||
func archInitCastagnoli() { panic("not available") }
|
|
||||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 { panic("not available") }
|
|
|
@ -1,91 +0,0 @@
|
||||||
// Copyright 2016 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build s390x
|
|
||||||
|
|
||||||
package crc32
|
|
||||||
|
|
||||||
const (
|
|
||||||
vxMinLen = 64
|
|
||||||
vxAlignMask = 15 // align to 16 bytes
|
|
||||||
)
|
|
||||||
|
|
||||||
// hasVectorFacility reports whether the machine has the z/Architecture
|
|
||||||
// vector facility installed and enabled.
|
|
||||||
func hasVectorFacility() bool
|
|
||||||
|
|
||||||
var hasVX = hasVectorFacility()
|
|
||||||
|
|
||||||
// vectorizedCastagnoli implements CRC32 using vector instructions.
|
|
||||||
// It is defined in crc32_s390x.s.
|
|
||||||
//go:noescape
|
|
||||||
func vectorizedCastagnoli(crc uint32, p []byte) uint32
|
|
||||||
|
|
||||||
// vectorizedIEEE implements CRC32 using vector instructions.
|
|
||||||
// It is defined in crc32_s390x.s.
|
|
||||||
//go:noescape
|
|
||||||
func vectorizedIEEE(crc uint32, p []byte) uint32
|
|
||||||
|
|
||||||
func archAvailableCastagnoli() bool {
|
|
||||||
return hasVX
|
|
||||||
}
|
|
||||||
|
|
||||||
var archCastagnoliTable8 *slicing8Table
|
|
||||||
|
|
||||||
func archInitCastagnoli() {
|
|
||||||
if !hasVX {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
// We still use slicing-by-8 for small buffers.
|
|
||||||
archCastagnoliTable8 = slicingMakeTable(Castagnoli)
|
|
||||||
}
|
|
||||||
|
|
||||||
// archUpdateCastagnoli calculates the checksum of p using
|
|
||||||
// vectorizedCastagnoli.
|
|
||||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
|
|
||||||
if !hasVX {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
// Use vectorized function if data length is above threshold.
|
|
||||||
if len(p) >= vxMinLen {
|
|
||||||
aligned := len(p) & ^vxAlignMask
|
|
||||||
crc = vectorizedCastagnoli(crc, p[:aligned])
|
|
||||||
p = p[aligned:]
|
|
||||||
}
|
|
||||||
if len(p) == 0 {
|
|
||||||
return crc
|
|
||||||
}
|
|
||||||
return slicingUpdate(crc, archCastagnoliTable8, p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func archAvailableIEEE() bool {
|
|
||||||
return hasVX
|
|
||||||
}
|
|
||||||
|
|
||||||
var archIeeeTable8 *slicing8Table
|
|
||||||
|
|
||||||
func archInitIEEE() {
|
|
||||||
if !hasVX {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
// We still use slicing-by-8 for small buffers.
|
|
||||||
archIeeeTable8 = slicingMakeTable(IEEE)
|
|
||||||
}
|
|
||||||
|
|
||||||
// archUpdateIEEE calculates the checksum of p using vectorizedIEEE.
|
|
||||||
func archUpdateIEEE(crc uint32, p []byte) uint32 {
|
|
||||||
if !hasVX {
|
|
||||||
panic("not available")
|
|
||||||
}
|
|
||||||
// Use vectorized function if data length is above threshold.
|
|
||||||
if len(p) >= vxMinLen {
|
|
||||||
aligned := len(p) & ^vxAlignMask
|
|
||||||
crc = vectorizedIEEE(crc, p[:aligned])
|
|
||||||
p = p[aligned:]
|
|
||||||
}
|
|
||||||
if len(p) == 0 {
|
|
||||||
return crc
|
|
||||||
}
|
|
||||||
return slicingUpdate(crc, archIeeeTable8, p)
|
|
||||||
}
|
|
|
@ -1,249 +0,0 @@
|
||||||
// Copyright 2016 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// +build s390x
|
|
||||||
|
|
||||||
#include "textflag.h"
|
|
||||||
|
|
||||||
// Vector register range containing CRC-32 constants
|
|
||||||
|
|
||||||
#define CONST_PERM_LE2BE V9
|
|
||||||
#define CONST_R2R1 V10
|
|
||||||
#define CONST_R4R3 V11
|
|
||||||
#define CONST_R5 V12
|
|
||||||
#define CONST_RU_POLY V13
|
|
||||||
#define CONST_CRC_POLY V14
|
|
||||||
|
|
||||||
// The CRC-32 constant block contains reduction constants to fold and
|
|
||||||
// process particular chunks of the input data stream in parallel.
|
|
||||||
//
|
|
||||||
// Note that the constant definitions below are extended in order to compute
|
|
||||||
// intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
|
|
||||||
// The rightmost doubleword can be 0 to prevent contribution to the result or
|
|
||||||
// can be multiplied by 1 to perform an XOR without the need for a separate
|
|
||||||
// VECTOR EXCLUSIVE OR instruction.
|
|
||||||
//
|
|
||||||
// The polynomials used are bit-reflected:
|
|
||||||
//
|
|
||||||
// IEEE: P'(x) = 0x0edb88320
|
|
||||||
// Castagnoli: P'(x) = 0x082f63b78
|
|
||||||
|
|
||||||
// IEEE polynomial constants
|
|
||||||
DATA ·crcleconskp+0(SB)/8, $0x0F0E0D0C0B0A0908 // LE-to-BE mask
|
|
||||||
DATA ·crcleconskp+8(SB)/8, $0x0706050403020100
|
|
||||||
DATA ·crcleconskp+16(SB)/8, $0x00000001c6e41596 // R2
|
|
||||||
DATA ·crcleconskp+24(SB)/8, $0x0000000154442bd4 // R1
|
|
||||||
DATA ·crcleconskp+32(SB)/8, $0x00000000ccaa009e // R4
|
|
||||||
DATA ·crcleconskp+40(SB)/8, $0x00000001751997d0 // R3
|
|
||||||
DATA ·crcleconskp+48(SB)/8, $0x0000000000000000
|
|
||||||
DATA ·crcleconskp+56(SB)/8, $0x0000000163cd6124 // R5
|
|
||||||
DATA ·crcleconskp+64(SB)/8, $0x0000000000000000
|
|
||||||
DATA ·crcleconskp+72(SB)/8, $0x00000001F7011641 // u'
|
|
||||||
DATA ·crcleconskp+80(SB)/8, $0x0000000000000000
|
|
||||||
DATA ·crcleconskp+88(SB)/8, $0x00000001DB710641 // P'(x) << 1
|
|
||||||
|
|
||||||
GLOBL ·crcleconskp(SB), RODATA, $144
|
|
||||||
|
|
||||||
// Castagonli Polynomial constants
|
|
||||||
DATA ·crccleconskp+0(SB)/8, $0x0F0E0D0C0B0A0908 // LE-to-BE mask
|
|
||||||
DATA ·crccleconskp+8(SB)/8, $0x0706050403020100
|
|
||||||
DATA ·crccleconskp+16(SB)/8, $0x000000009e4addf8 // R2
|
|
||||||
DATA ·crccleconskp+24(SB)/8, $0x00000000740eef02 // R1
|
|
||||||
DATA ·crccleconskp+32(SB)/8, $0x000000014cd00bd6 // R4
|
|
||||||
DATA ·crccleconskp+40(SB)/8, $0x00000000f20c0dfe // R3
|
|
||||||
DATA ·crccleconskp+48(SB)/8, $0x0000000000000000
|
|
||||||
DATA ·crccleconskp+56(SB)/8, $0x00000000dd45aab8 // R5
|
|
||||||
DATA ·crccleconskp+64(SB)/8, $0x0000000000000000
|
|
||||||
DATA ·crccleconskp+72(SB)/8, $0x00000000dea713f1 // u'
|
|
||||||
DATA ·crccleconskp+80(SB)/8, $0x0000000000000000
|
|
||||||
DATA ·crccleconskp+88(SB)/8, $0x0000000105ec76f0 // P'(x) << 1
|
|
||||||
|
|
||||||
GLOBL ·crccleconskp(SB), RODATA, $144
|
|
||||||
|
|
||||||
// func hasVectorFacility() bool
|
|
||||||
TEXT ·hasVectorFacility(SB), NOSPLIT, $24-1
|
|
||||||
MOVD $x-24(SP), R1
|
|
||||||
XC $24, 0(R1), 0(R1) // clear the storage
|
|
||||||
MOVD $2, R0 // R0 is the number of double words stored -1
|
|
||||||
WORD $0xB2B01000 // STFLE 0(R1)
|
|
||||||
XOR R0, R0 // reset the value of R0
|
|
||||||
MOVBZ z-8(SP), R1
|
|
||||||
AND $0x40, R1
|
|
||||||
BEQ novector
|
|
||||||
|
|
||||||
vectorinstalled:
|
|
||||||
// check if the vector instruction has been enabled
|
|
||||||
VLEIB $0, $0xF, V16
|
|
||||||
VLGVB $0, V16, R1
|
|
||||||
CMPBNE R1, $0xF, novector
|
|
||||||
MOVB $1, ret+0(FP) // have vx
|
|
||||||
RET
|
|
||||||
|
|
||||||
novector:
|
|
||||||
MOVB $0, ret+0(FP) // no vx
|
|
||||||
RET
|
|
||||||
|
|
||||||
// The CRC-32 function(s) use these calling conventions:
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
//
|
|
||||||
// R2: Initial CRC value, typically ~0; and final CRC (return) value.
|
|
||||||
// R3: Input buffer pointer, performance might be improved if the
|
|
||||||
// buffer is on a doubleword boundary.
|
|
||||||
// R4: Length of the buffer, must be 64 bytes or greater.
|
|
||||||
//
|
|
||||||
// Register usage:
|
|
||||||
//
|
|
||||||
// R5: CRC-32 constant pool base pointer.
|
|
||||||
// V0: Initial CRC value and intermediate constants and results.
|
|
||||||
// V1..V4: Data for CRC computation.
|
|
||||||
// V5..V8: Next data chunks that are fetched from the input buffer.
|
|
||||||
//
|
|
||||||
// V9..V14: CRC-32 constants.
|
|
||||||
|
|
||||||
// func vectorizedIEEE(crc uint32, p []byte) uint32
|
|
||||||
TEXT ·vectorizedIEEE(SB), NOSPLIT, $0
|
|
||||||
MOVWZ crc+0(FP), R2 // R2 stores the CRC value
|
|
||||||
MOVD p+8(FP), R3 // data pointer
|
|
||||||
MOVD p_len+16(FP), R4 // len(p)
|
|
||||||
|
|
||||||
MOVD $·crcleconskp(SB), R5
|
|
||||||
BR vectorizedBody<>(SB)
|
|
||||||
|
|
||||||
// func vectorizedCastagnoli(crc uint32, p []byte) uint32
|
|
||||||
TEXT ·vectorizedCastagnoli(SB), NOSPLIT, $0
|
|
||||||
MOVWZ crc+0(FP), R2 // R2 stores the CRC value
|
|
||||||
MOVD p+8(FP), R3 // data pointer
|
|
||||||
MOVD p_len+16(FP), R4 // len(p)
|
|
||||||
|
|
||||||
// R5: crc-32 constant pool base pointer, constant is used to reduce crc
|
|
||||||
MOVD $·crccleconskp(SB), R5
|
|
||||||
BR vectorizedBody<>(SB)
|
|
||||||
|
|
||||||
TEXT vectorizedBody<>(SB), NOSPLIT, $0
|
|
||||||
XOR $0xffffffff, R2 // NOTW R2
|
|
||||||
VLM 0(R5), CONST_PERM_LE2BE, CONST_CRC_POLY
|
|
||||||
|
|
||||||
// Load the initial CRC value into the rightmost word of V0
|
|
||||||
VZERO V0
|
|
||||||
VLVGF $3, R2, V0
|
|
||||||
|
|
||||||
// Crash if the input size is less than 64-bytes.
|
|
||||||
CMP R4, $64
|
|
||||||
BLT crash
|
|
||||||
|
|
||||||
// Load a 64-byte data chunk and XOR with CRC
|
|
||||||
VLM 0(R3), V1, V4 // 64-bytes into V1..V4
|
|
||||||
|
|
||||||
// Reflect the data if the CRC operation is in the bit-reflected domain
|
|
||||||
VPERM V1, V1, CONST_PERM_LE2BE, V1
|
|
||||||
VPERM V2, V2, CONST_PERM_LE2BE, V2
|
|
||||||
VPERM V3, V3, CONST_PERM_LE2BE, V3
|
|
||||||
VPERM V4, V4, CONST_PERM_LE2BE, V4
|
|
||||||
|
|
||||||
VX V0, V1, V1 // V1 ^= CRC
|
|
||||||
ADD $64, R3 // BUF = BUF + 64
|
|
||||||
ADD $(-64), R4
|
|
||||||
|
|
||||||
// Check remaining buffer size and jump to proper folding method
|
|
||||||
CMP R4, $64
|
|
||||||
BLT less_than_64bytes
|
|
||||||
|
|
||||||
fold_64bytes_loop:
|
|
||||||
// Load the next 64-byte data chunk into V5 to V8
|
|
||||||
VLM 0(R3), V5, V8
|
|
||||||
VPERM V5, V5, CONST_PERM_LE2BE, V5
|
|
||||||
VPERM V6, V6, CONST_PERM_LE2BE, V6
|
|
||||||
VPERM V7, V7, CONST_PERM_LE2BE, V7
|
|
||||||
VPERM V8, V8, CONST_PERM_LE2BE, V8
|
|
||||||
|
|
||||||
// Perform a GF(2) multiplication of the doublewords in V1 with
|
|
||||||
// the reduction constants in V0. The intermediate result is
|
|
||||||
// then folded (accumulated) with the next data chunk in V5 and
|
|
||||||
// stored in V1. Repeat this step for the register contents
|
|
||||||
// in V2, V3, and V4 respectively.
|
|
||||||
|
|
||||||
VGFMAG CONST_R2R1, V1, V5, V1
|
|
||||||
VGFMAG CONST_R2R1, V2, V6, V2
|
|
||||||
VGFMAG CONST_R2R1, V3, V7, V3
|
|
||||||
VGFMAG CONST_R2R1, V4, V8, V4
|
|
||||||
|
|
||||||
// Adjust buffer pointer and length for next loop
|
|
||||||
ADD $64, R3 // BUF = BUF + 64
|
|
||||||
ADD $(-64), R4 // LEN = LEN - 64
|
|
||||||
|
|
||||||
CMP R4, $64
|
|
||||||
BGE fold_64bytes_loop
|
|
||||||
|
|
||||||
less_than_64bytes:
|
|
||||||
// Fold V1 to V4 into a single 128-bit value in V1
|
|
||||||
VGFMAG CONST_R4R3, V1, V2, V1
|
|
||||||
VGFMAG CONST_R4R3, V1, V3, V1
|
|
||||||
VGFMAG CONST_R4R3, V1, V4, V1
|
|
||||||
|
|
||||||
// Check whether to continue with 64-bit folding
|
|
||||||
CMP R4, $16
|
|
||||||
BLT final_fold
|
|
||||||
|
|
||||||
fold_16bytes_loop:
|
|
||||||
VL 0(R3), V2 // Load next data chunk
|
|
||||||
VPERM V2, V2, CONST_PERM_LE2BE, V2
|
|
||||||
|
|
||||||
VGFMAG CONST_R4R3, V1, V2, V1 // Fold next data chunk
|
|
||||||
|
|
||||||
// Adjust buffer pointer and size for folding next data chunk
|
|
||||||
ADD $16, R3
|
|
||||||
ADD $-16, R4
|
|
||||||
|
|
||||||
// Process remaining data chunks
|
|
||||||
CMP R4, $16
|
|
||||||
BGE fold_16bytes_loop
|
|
||||||
|
|
||||||
final_fold:
|
|
||||||
VLEIB $7, $0x40, V9
|
|
||||||
VSRLB V9, CONST_R4R3, V0
|
|
||||||
VLEIG $0, $1, V0
|
|
||||||
|
|
||||||
VGFMG V0, V1, V1
|
|
||||||
|
|
||||||
VLEIB $7, $0x20, V9 // Shift by words
|
|
||||||
VSRLB V9, V1, V2 // Store remaining bits in V2
|
|
||||||
VUPLLF V1, V1 // Split rightmost doubleword
|
|
||||||
VGFMAG CONST_R5, V1, V2, V1 // V1 = (V1 * R5) XOR V2
|
|
||||||
|
|
||||||
// The input values to the Barret reduction are the degree-63 polynomial
|
|
||||||
// in V1 (R(x)), degree-32 generator polynomial, and the reduction
|
|
||||||
// constant u. The Barret reduction result is the CRC value of R(x) mod
|
|
||||||
// P(x).
|
|
||||||
//
|
|
||||||
// The Barret reduction algorithm is defined as:
|
|
||||||
//
|
|
||||||
// 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
|
|
||||||
// 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
|
|
||||||
// 3. C(x) = R(x) XOR T2(x) mod x^32
|
|
||||||
//
|
|
||||||
// Note: To compensate the division by x^32, use the vector unpack
|
|
||||||
// instruction to move the leftmost word into the leftmost doubleword
|
|
||||||
// of the vector register. The rightmost doubleword is multiplied
|
|
||||||
// with zero to not contribute to the intermedate results.
|
|
||||||
|
|
||||||
// T1(x) = floor( R(x) / x^32 ) GF2MUL u
|
|
||||||
VUPLLF V1, V2
|
|
||||||
VGFMG CONST_RU_POLY, V2, V2
|
|
||||||
|
|
||||||
// Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
|
|
||||||
// V2 and XOR the intermediate result, T2(x), with the value in V1.
|
|
||||||
// The final result is in the rightmost word of V2.
|
|
||||||
|
|
||||||
VUPLLF V2, V2
|
|
||||||
VGFMAG CONST_CRC_POLY, V2, V1, V2
|
|
||||||
|
|
||||||
done:
|
|
||||||
VLGVF $2, V2, R2
|
|
||||||
XOR $0xffffffff, R2 // NOTW R2
|
|
||||||
MOVWZ R2, ret + 32(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
crash:
|
|
||||||
MOVD $0, (R0) // input size is less than 64-bytes
|
|
|
@ -12,6 +12,8 @@ gitea.com/macaron/captcha
|
||||||
gitea.com/macaron/cors
|
gitea.com/macaron/cors
|
||||||
# gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439
|
# gitea.com/macaron/csrf v0.0.0-20190822024205-3dc5a4474439
|
||||||
gitea.com/macaron/csrf
|
gitea.com/macaron/csrf
|
||||||
|
# gitea.com/macaron/gzip v0.0.0-20191118033930-0c4c5566a0e5
|
||||||
|
gitea.com/macaron/gzip
|
||||||
# gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223
|
# gitea.com/macaron/i18n v0.0.0-20190822004228-474e714e2223
|
||||||
gitea.com/macaron/i18n
|
gitea.com/macaron/i18n
|
||||||
# gitea.com/macaron/inject v0.0.0-20190805023432-d4c86e31027a
|
# gitea.com/macaron/inject v0.0.0-20190805023432-d4c86e31027a
|
||||||
|
@ -259,13 +261,9 @@ github.com/keybase/go-crypto/openpgp/errors
|
||||||
github.com/keybase/go-crypto/openpgp/packet
|
github.com/keybase/go-crypto/openpgp/packet
|
||||||
github.com/keybase/go-crypto/openpgp/s2k
|
github.com/keybase/go-crypto/openpgp/s2k
|
||||||
github.com/keybase/go-crypto/rsa
|
github.com/keybase/go-crypto/rsa
|
||||||
# github.com/klauspost/compress v0.0.0-20161025140425-8df558b6cb6f
|
# github.com/klauspost/compress v1.9.2
|
||||||
github.com/klauspost/compress/flate
|
github.com/klauspost/compress/flate
|
||||||
github.com/klauspost/compress/gzip
|
github.com/klauspost/compress/gzip
|
||||||
# github.com/klauspost/cpuid v0.0.0-20160302075316-09cded8978dc
|
|
||||||
github.com/klauspost/cpuid
|
|
||||||
# github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6
|
|
||||||
github.com/klauspost/crc32
|
|
||||||
# github.com/kr/pretty v0.1.0
|
# github.com/kr/pretty v0.1.0
|
||||||
github.com/kr/pretty
|
github.com/kr/pretty
|
||||||
# github.com/kr/text v0.1.0
|
# github.com/kr/text v0.1.0
|
||||||
|
|
Loading…
Reference in New Issue