cmd/kgz: add extended metadata support to kgz compression

This commit is contained in:
2025-11-18 18:34:39 -08:00
parent c4c9abe310
commit 17e999754b
2 changed files with 182 additions and 12 deletions

View File

@@ -3,18 +3,31 @@ kgz
kgz is like gzip, but supports compressing and decompressing to a different
directory than the source file is in.
Usage: kgz [-l] source [target]
Usage: kgz [-l] [-k] [-m] [-x] [--uid N] [--gid N] source [target]
If target is a directory, the basename of the sourcefile will be used
If target is a directory, the basename of the source file will be used
as the target filename. Compression and decompression is selected
based on whether the source filename ends in ".gz".
Flags:
-l level Compression level (0-9). Only meaninful when
compressing a file.
-l level Compression level (0-9). Only meaningful when compressing.
-u Do not restrict the size during decompression. As
a safeguard against gzip bombs, the maximum size
allowed is 32 * the compressed file size.
-k Keep the source file (do not remove it after successful
compression or decompression).
-m On decompression, set the file mtime from the gzip header.
-x On compression, include uid/gid/mode/ctime in the gzip Extra
field so that decompression can restore them. The Extra payload
is an ASN.1 DER-encoded struct.
--uid N When used with -x, set UID in Extra to N (override source).
--gid N When used with -x, set GID in Extra to N (override source).
Metadata notes:
- mtime is stored in the standard gzip header and restored with -m.
- uid/gid/mode/ctime are stored in a kgz-specific Extra subfield as an ASN.1
DER-encoded struct. Restoring
uid/gid may fail without sufficient privileges; such errors are ignored.

View File

@@ -3,23 +3,122 @@ package main
import (
"compress/flate"
"compress/gzip"
"encoding/asn1"
"encoding/binary"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strings"
goutilslib "git.wntrmute.dev/kyle/goutils/lib"
"golang.org/x/sys/unix"
)
const gzipExt = ".gz"
func compress(path, target string, level int) error {
// kgzExtraID is the two-byte subfield identifier used in the gzip Extra field
// for kgz-specific metadata.
var kgzExtraID = [2]byte{'K', 'G'}
// buildKGExtra constructs the gzip Extra subfield payload for kgz metadata.
//
// The payload is an ASN.1 DER-encoded struct with the following fields:
//
// Version INTEGER (currently 1)
// UID INTEGER
// GID INTEGER
// Mode INTEGER (permission bits)
// CTimeSec INTEGER (seconds)
// CTimeNSec INTEGER (nanoseconds)
//
// The ASN.1 blob is wrapped in a gzip Extra subfield with ID 'K','G'.
func buildKGExtra(uid, gid, mode uint32, ctimeS int64, ctimeNs int32) []byte {
// Define the ASN.1 structure to encode
type KGZExtra struct {
Version int
UID int
GID int
Mode int
CTimeSec int64
CTimeNSec int32
}
payload, err := asn1.Marshal(KGZExtra{
Version: 1,
UID: int(uid),
GID: int(gid),
Mode: int(mode),
CTimeSec: ctimeS,
CTimeNSec: ctimeNs,
})
if err != nil {
// On marshal failure, return empty to avoid breaking compression
return nil
}
// Wrap in gzip subfield: [ID1 ID2 LEN(lo) LEN(hi) PAYLOAD]
extra := make([]byte, 4+len(payload))
extra[0] = kgzExtraID[0]
extra[1] = kgzExtraID[1]
binary.LittleEndian.PutUint16(extra[2:], uint16(len(payload)))
copy(extra[4:], payload)
return extra
}
// parseKGExtra scans a gzip Extra blob and returns kgz metadata if present.
func parseKGExtra(extra []byte) (uid, gid, mode uint32, ctimeS int64, ctimeNs int32, ok bool) {
i := 0
for i+4 <= len(extra) {
id1 := extra[i]
id2 := extra[i+1]
l := int(binary.LittleEndian.Uint16(extra[i+2 : i+4]))
i += 4
if i+l > len(extra) {
break
}
if id1 == kgzExtraID[0] && id2 == kgzExtraID[1] {
// ASN.1 decode payload
payload := extra[i : i+l]
var s struct {
Version int
UID int
GID int
Mode int
CTimeSec int64
CTimeNSec int32
}
if _, err := asn1.Unmarshal(payload, &s); err != nil {
return 0, 0, 0, 0, 0, false
}
if s.Version != 1 {
return 0, 0, 0, 0, 0, false
}
return uint32(s.UID), uint32(s.GID), uint32(s.Mode), s.CTimeSec, s.CTimeNSec, true
}
i += l
}
return 0, 0, 0, 0, 0, false
}
func compress(path, target string, level int, includeExtra bool, setUID, setGID int) error {
sourceFile, err := os.Open(path)
if err != nil {
return fmt.Errorf("opening file for read: %w", err)
}
defer sourceFile.Close()
// Gather file metadata
var st unix.Stat_t
if err := unix.Stat(path, &st); err != nil {
return fmt.Errorf("stat source: %w", err)
}
fi, err := sourceFile.Stat()
if err != nil {
return fmt.Errorf("stat source file: %w", err)
}
destFile, err := os.Create(target)
if err != nil {
return fmt.Errorf("opening file for write: %w", err)
@@ -30,6 +129,27 @@ func compress(path, target string, level int) error {
if err != nil {
return fmt.Errorf("invalid compression level: %w", err)
}
// Set header metadata
gzipCompressor.ModTime = fi.ModTime()
if includeExtra {
uid := uint32(st.Uid)
gid := uint32(st.Gid)
if setUID >= 0 {
uid = uint32(setUID)
}
if setGID >= 0 {
gid = uint32(setGID)
}
mode := uint32(st.Mode & 0o7777)
// Use portable helper to gather ctime
var cts int64
var ctns int32
if ft, err := goutilslib.LoadFileTime(path); err == nil {
cts = ft.Changed.Unix()
ctns = int32(ft.Changed.Nanosecond())
}
gzipCompressor.Extra = buildKGExtra(uid, gid, mode, cts, ctns)
}
defer gzipCompressor.Close()
_, err = io.Copy(gzipCompressor, sourceFile)
@@ -40,7 +160,7 @@ func compress(path, target string, level int) error {
return nil
}
func uncompress(path, target string, unrestrict bool) error {
func uncompress(path, target string, unrestrict bool, preserveMtime bool) error {
sourceFile, err := os.Open(path)
if err != nil {
return fmt.Errorf("opening file for read: %w", err)
@@ -79,19 +199,40 @@ func uncompress(path, target string, unrestrict bool) error {
if err != nil {
return fmt.Errorf("uncompressing file: %w", err)
}
// Apply metadata from Extra (uid/gid/mode) if present
if gzipUncompressor.Header.Extra != nil {
if uid, gid, mode, _, _, ok := parseKGExtra(gzipUncompressor.Header.Extra); ok {
// Chmod
_ = os.Chmod(target, os.FileMode(mode))
// Chown (may fail without privileges)
_ = os.Chown(target, int(uid), int(gid))
}
}
// Preserve mtime if requested
if preserveMtime {
mt := gzipUncompressor.Header.ModTime
if !mt.IsZero() {
// Set both atime and mtime to mt for simplicity
_ = os.Chtimes(target, mt, mt)
}
}
return nil
}
func usage(w io.Writer) {
fmt.Fprintf(w, `Usage: %s [-l] source [target]
fmt.Fprintf(w, `Usage: %s [-l] [-k] [-m] [-x] [--uid N] [--gid N] source [target]
kgz is like gzip, but supports compressing and decompressing to a different
directory than the source file is in.
Flags:
-l level Compression level (0-9). Only meaninful when
compressing a file.
-l level Compression level (0-9). Only meaningful when compressing.
-u Do not restrict the size during decompression (gzip bomb guard is 32x).
-k Keep the source file (do not remove it after successful (de)compression).
-m On decompression, set the file mtime from the gzip header.
-x On compression, include uid/gid/mode/ctime in the gzip Extra field.
--uid N When used with -x, set UID in Extra to N (overrides source owner).
--gid N When used with -x, set GID in Extra to N (overrides source group).
`, os.Args[0])
}
@@ -150,9 +291,19 @@ func main() {
var target = "."
var err error
var unrestrict bool
var keep bool
var preserveMtime bool
var includeExtra bool
var setUID int
var setGID int
flag.IntVar(&level, "l", flate.DefaultCompression, "compression level")
flag.BoolVar(&unrestrict, "u", false, "do not restrict decompression")
flag.BoolVar(&keep, "k", false, "keep the source file (do not remove it)")
flag.BoolVar(&preserveMtime, "m", false, "on decompression, set mtime from gzip header")
flag.BoolVar(&includeExtra, "x", false, "on compression, include uid/gid/mode/ctime in gzip Extra")
flag.IntVar(&setUID, "uid", -1, "when used with -x, set UID in Extra to this value")
flag.IntVar(&setGID, "gid", -1, "when used with -x, set GID in Extra to this value")
flag.Parse()
if flag.NArg() < 1 || flag.NArg() > 2 {
@@ -172,12 +323,15 @@ func main() {
os.Exit(1)
}
err = uncompress(path, target, unrestrict)
err = uncompress(path, target, unrestrict, preserveMtime)
if err != nil {
os.Remove(target)
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
if !keep {
_ = os.Remove(path)
}
return
}
@@ -187,10 +341,13 @@ func main() {
os.Exit(1)
}
err = compress(path, target, level)
err = compress(path, target, level, includeExtra, setUID, setGID)
if err != nil {
os.Remove(target)
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
if !keep {
_ = os.Remove(path)
}
}