cmd/kgz: add extended metadata support to kgz compression

This commit is contained in:
2025-11-18 18:34:39 -08:00
parent c4c9abe310
commit 17e999754b
2 changed files with 182 additions and 12 deletions

View File

@@ -3,18 +3,31 @@ kgz
kgz is like gzip, but supports compressing and decompressing to a different kgz is like gzip, but supports compressing and decompressing to a different
directory than the source file is in. directory than the source file is in.
Usage: kgz [-l] source [target] Usage: kgz [-l] [-k] [-m] [-x] [--uid N] [--gid N] source [target]
If target is a directory, the basename of the sourcefile will be used If target is a directory, the basename of the source file will be used
as the target filename. Compression and decompression is selected as the target filename. Compression and decompression is selected
based on whether the source filename ends in ".gz". based on whether the source filename ends in ".gz".
Flags: Flags:
-l level Compression level (0-9). Only meaninful when -l level Compression level (0-9). Only meaningful when compressing.
compressing a file.
-u Do not restrict the size during decompression. As -u Do not restrict the size during decompression. As
a safeguard against gzip bombs, the maximum size a safeguard against gzip bombs, the maximum size
allowed is 32 * the compressed file size. allowed is 32 * the compressed file size.
-k Keep the source file (do not remove it after successful
compression or decompression).
-m On decompression, set the file mtime from the gzip header.
-x On compression, include uid/gid/mode/ctime in the gzip Extra
field so that decompression can restore them. The Extra payload
is an ASN.1 DER-encoded struct.
--uid N When used with -x, set UID in Extra to N (override source).
--gid N When used with -x, set GID in Extra to N (override source).
Metadata notes:
- mtime is stored in the standard gzip header and restored with -m.
- uid/gid/mode/ctime are stored in a kgz-specific Extra subfield as an ASN.1
DER-encoded struct. Restoring
uid/gid may fail without sufficient privileges; such errors are ignored.

View File

@@ -3,23 +3,122 @@ package main
import ( import (
"compress/flate" "compress/flate"
"compress/gzip" "compress/gzip"
"encoding/asn1"
"encoding/binary"
"flag" "flag"
"fmt" "fmt"
"io" "io"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
goutilslib "git.wntrmute.dev/kyle/goutils/lib"
"golang.org/x/sys/unix"
) )
const gzipExt = ".gz" const gzipExt = ".gz"
func compress(path, target string, level int) error { // kgzExtraID is the two-byte subfield identifier used in the gzip Extra field
// for kgz-specific metadata.
var kgzExtraID = [2]byte{'K', 'G'}
// buildKGExtra constructs the gzip Extra subfield payload for kgz metadata.
//
// The payload is an ASN.1 DER-encoded struct with the following fields:
//
// Version INTEGER (currently 1)
// UID INTEGER
// GID INTEGER
// Mode INTEGER (permission bits)
// CTimeSec INTEGER (seconds)
// CTimeNSec INTEGER (nanoseconds)
//
// The ASN.1 blob is wrapped in a gzip Extra subfield with ID 'K','G'.
func buildKGExtra(uid, gid, mode uint32, ctimeS int64, ctimeNs int32) []byte {
// Define the ASN.1 structure to encode
type KGZExtra struct {
Version int
UID int
GID int
Mode int
CTimeSec int64
CTimeNSec int32
}
payload, err := asn1.Marshal(KGZExtra{
Version: 1,
UID: int(uid),
GID: int(gid),
Mode: int(mode),
CTimeSec: ctimeS,
CTimeNSec: ctimeNs,
})
if err != nil {
// On marshal failure, return empty to avoid breaking compression
return nil
}
// Wrap in gzip subfield: [ID1 ID2 LEN(lo) LEN(hi) PAYLOAD]
extra := make([]byte, 4+len(payload))
extra[0] = kgzExtraID[0]
extra[1] = kgzExtraID[1]
binary.LittleEndian.PutUint16(extra[2:], uint16(len(payload)))
copy(extra[4:], payload)
return extra
}
// parseKGExtra scans a gzip Extra blob and returns kgz metadata if present.
func parseKGExtra(extra []byte) (uid, gid, mode uint32, ctimeS int64, ctimeNs int32, ok bool) {
i := 0
for i+4 <= len(extra) {
id1 := extra[i]
id2 := extra[i+1]
l := int(binary.LittleEndian.Uint16(extra[i+2 : i+4]))
i += 4
if i+l > len(extra) {
break
}
if id1 == kgzExtraID[0] && id2 == kgzExtraID[1] {
// ASN.1 decode payload
payload := extra[i : i+l]
var s struct {
Version int
UID int
GID int
Mode int
CTimeSec int64
CTimeNSec int32
}
if _, err := asn1.Unmarshal(payload, &s); err != nil {
return 0, 0, 0, 0, 0, false
}
if s.Version != 1 {
return 0, 0, 0, 0, 0, false
}
return uint32(s.UID), uint32(s.GID), uint32(s.Mode), s.CTimeSec, s.CTimeNSec, true
}
i += l
}
return 0, 0, 0, 0, 0, false
}
func compress(path, target string, level int, includeExtra bool, setUID, setGID int) error {
sourceFile, err := os.Open(path) sourceFile, err := os.Open(path)
if err != nil { if err != nil {
return fmt.Errorf("opening file for read: %w", err) return fmt.Errorf("opening file for read: %w", err)
} }
defer sourceFile.Close() defer sourceFile.Close()
// Gather file metadata
var st unix.Stat_t
if err := unix.Stat(path, &st); err != nil {
return fmt.Errorf("stat source: %w", err)
}
fi, err := sourceFile.Stat()
if err != nil {
return fmt.Errorf("stat source file: %w", err)
}
destFile, err := os.Create(target) destFile, err := os.Create(target)
if err != nil { if err != nil {
return fmt.Errorf("opening file for write: %w", err) return fmt.Errorf("opening file for write: %w", err)
@@ -30,6 +129,27 @@ func compress(path, target string, level int) error {
if err != nil { if err != nil {
return fmt.Errorf("invalid compression level: %w", err) return fmt.Errorf("invalid compression level: %w", err)
} }
// Set header metadata
gzipCompressor.ModTime = fi.ModTime()
if includeExtra {
uid := uint32(st.Uid)
gid := uint32(st.Gid)
if setUID >= 0 {
uid = uint32(setUID)
}
if setGID >= 0 {
gid = uint32(setGID)
}
mode := uint32(st.Mode & 0o7777)
// Use portable helper to gather ctime
var cts int64
var ctns int32
if ft, err := goutilslib.LoadFileTime(path); err == nil {
cts = ft.Changed.Unix()
ctns = int32(ft.Changed.Nanosecond())
}
gzipCompressor.Extra = buildKGExtra(uid, gid, mode, cts, ctns)
}
defer gzipCompressor.Close() defer gzipCompressor.Close()
_, err = io.Copy(gzipCompressor, sourceFile) _, err = io.Copy(gzipCompressor, sourceFile)
@@ -40,7 +160,7 @@ func compress(path, target string, level int) error {
return nil return nil
} }
func uncompress(path, target string, unrestrict bool) error { func uncompress(path, target string, unrestrict bool, preserveMtime bool) error {
sourceFile, err := os.Open(path) sourceFile, err := os.Open(path)
if err != nil { if err != nil {
return fmt.Errorf("opening file for read: %w", err) return fmt.Errorf("opening file for read: %w", err)
@@ -79,19 +199,40 @@ func uncompress(path, target string, unrestrict bool) error {
if err != nil { if err != nil {
return fmt.Errorf("uncompressing file: %w", err) return fmt.Errorf("uncompressing file: %w", err)
} }
// Apply metadata from Extra (uid/gid/mode) if present
if gzipUncompressor.Header.Extra != nil {
if uid, gid, mode, _, _, ok := parseKGExtra(gzipUncompressor.Header.Extra); ok {
// Chmod
_ = os.Chmod(target, os.FileMode(mode))
// Chown (may fail without privileges)
_ = os.Chown(target, int(uid), int(gid))
}
}
// Preserve mtime if requested
if preserveMtime {
mt := gzipUncompressor.Header.ModTime
if !mt.IsZero() {
// Set both atime and mtime to mt for simplicity
_ = os.Chtimes(target, mt, mt)
}
}
return nil return nil
} }
func usage(w io.Writer) { func usage(w io.Writer) {
fmt.Fprintf(w, `Usage: %s [-l] source [target] fmt.Fprintf(w, `Usage: %s [-l] [-k] [-m] [-x] [--uid N] [--gid N] source [target]
kgz is like gzip, but supports compressing and decompressing to a different kgz is like gzip, but supports compressing and decompressing to a different
directory than the source file is in. directory than the source file is in.
Flags: Flags:
-l level Compression level (0-9). Only meaninful when -l level Compression level (0-9). Only meaningful when compressing.
compressing a file. -u Do not restrict the size during decompression (gzip bomb guard is 32x).
-k Keep the source file (do not remove it after successful (de)compression).
-m On decompression, set the file mtime from the gzip header.
-x On compression, include uid/gid/mode/ctime in the gzip Extra field.
--uid N When used with -x, set UID in Extra to N (overrides source owner).
--gid N When used with -x, set GID in Extra to N (overrides source group).
`, os.Args[0]) `, os.Args[0])
} }
@@ -150,9 +291,19 @@ func main() {
var target = "." var target = "."
var err error var err error
var unrestrict bool var unrestrict bool
var keep bool
var preserveMtime bool
var includeExtra bool
var setUID int
var setGID int
flag.IntVar(&level, "l", flate.DefaultCompression, "compression level") flag.IntVar(&level, "l", flate.DefaultCompression, "compression level")
flag.BoolVar(&unrestrict, "u", false, "do not restrict decompression") flag.BoolVar(&unrestrict, "u", false, "do not restrict decompression")
flag.BoolVar(&keep, "k", false, "keep the source file (do not remove it)")
flag.BoolVar(&preserveMtime, "m", false, "on decompression, set mtime from gzip header")
flag.BoolVar(&includeExtra, "x", false, "on compression, include uid/gid/mode/ctime in gzip Extra")
flag.IntVar(&setUID, "uid", -1, "when used with -x, set UID in Extra to this value")
flag.IntVar(&setGID, "gid", -1, "when used with -x, set GID in Extra to this value")
flag.Parse() flag.Parse()
if flag.NArg() < 1 || flag.NArg() > 2 { if flag.NArg() < 1 || flag.NArg() > 2 {
@@ -172,12 +323,15 @@ func main() {
os.Exit(1) os.Exit(1)
} }
err = uncompress(path, target, unrestrict) err = uncompress(path, target, unrestrict, preserveMtime)
if err != nil { if err != nil {
os.Remove(target) os.Remove(target)
fmt.Fprintf(os.Stderr, "%s\n", err) fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1) os.Exit(1)
} }
if !keep {
_ = os.Remove(path)
}
return return
} }
@@ -187,10 +341,13 @@ func main() {
os.Exit(1) os.Exit(1)
} }
err = compress(path, target, level) err = compress(path, target, level, includeExtra, setUID, setGID)
if err != nil { if err != nil {
os.Remove(target) os.Remove(target)
fmt.Fprintf(os.Stderr, "%s\n", err) fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1) os.Exit(1)
} }
if !keep {
_ = os.Remove(path)
}
} }