From b3b840b14d480a8e5df7ef191b03b3006bc90a14 Mon Sep 17 00:00:00 2001 From: Nicholas Sherlock Date: Sat, 17 Mar 2018 12:22:49 +1300 Subject: [PATCH] Support custom filename encoding option (#72) --- extract.js | 8 +++++--- headers.js | 16 ++++++++-------- test/extract.js | 37 +++++++++++++++++++++++++++++++++++++ test/fixtures/index.js | 1 + test/fixtures/latin1.tar | Bin 0 -> 10240 bytes 5 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 test/fixtures/latin1.tar diff --git a/extract.js b/extract.js index dee0a23..0d37cf7 100644 --- a/extract.js +++ b/extract.js @@ -43,6 +43,8 @@ var Extract = function (opts) { if (!(this instanceof Extract)) return new Extract(opts) Writable.call(this, opts) + opts = opts || {} + this._offset = 0 this._buffer = bl() this._missing = 0 @@ -102,14 +104,14 @@ var Extract = function (opts) { var ongnulongpath = function () { var size = self._header.size - this._gnuLongPath = headers.decodeLongPath(b.slice(0, size)) + this._gnuLongPath = headers.decodeLongPath(b.slice(0, size), opts.filenameEncoding) b.consume(size) onstreamend() } var ongnulonglinkpath = function () { var size = self._header.size - this._gnuLongLinkPath = headers.decodeLongPath(b.slice(0, size)) + this._gnuLongLinkPath = headers.decodeLongPath(b.slice(0, size), opts.filenameEncoding) b.consume(size) onstreamend() } @@ -118,7 +120,7 @@ var Extract = function (opts) { var offset = self._offset var header try { - header = self._header = headers.decode(b.slice(0, 512)) + header = self._header = headers.decode(b.slice(0, 512), opts.filenameEncoding) } catch (err) { self.emit('error', err) } diff --git a/headers.js b/headers.js index 31a8364..0548213 100644 --- a/headers.js +++ b/headers.js @@ -149,8 +149,8 @@ var decodeOct = function (val, offset, length) { } } -var decodeStr = function (val, offset, length) { - return val.slice(offset, indexOf(val, 0, offset, offset + length)).toString() +var decodeStr = function (val, offset, length, encoding) { + return val.slice(offset, indexOf(val, 0, offset, offset + length)).toString(encoding) } var addLength = function (str) { @@ -161,8 +161,8 @@ var addLength = function (str) { return (len + digits) + str } -exports.decodeLongPath = function (buf) { - return decodeStr(buf, 0, buf.length) +exports.decodeLongPath = function (buf, encoding) { + return decodeStr(buf, 0, buf.length, encoding) } exports.encodePax = function (opts) { // TODO: encode more stuff in pax @@ -240,23 +240,23 @@ exports.encode = function (opts) { return buf } -exports.decode = function (buf) { +exports.decode = function (buf, filenameEncoding) { var typeflag = buf[156] === 0 ? 0 : buf[156] - ZERO_OFFSET - var name = decodeStr(buf, 0, 100) + var name = decodeStr(buf, 0, 100, filenameEncoding) var mode = decodeOct(buf, 100, 8) var uid = decodeOct(buf, 108, 8) var gid = decodeOct(buf, 116, 8) var size = decodeOct(buf, 124, 12) var mtime = decodeOct(buf, 136, 12) var type = toType(typeflag) - var linkname = buf[157] === 0 ? null : decodeStr(buf, 157, 100) + var linkname = buf[157] === 0 ? null : decodeStr(buf, 157, 100, filenameEncoding) var uname = decodeStr(buf, 265, 32) var gname = decodeStr(buf, 297, 32) var devmajor = decodeOct(buf, 329, 8) var devminor = decodeOct(buf, 337, 8) - if (buf[345]) name = decodeStr(buf, 345, 155) + '/' + name + if (buf[345]) name = decodeStr(buf, 345, 155, filenameEncoding) + '/' + name // to support old tar versions that use trailing / to indicate dirs if (typeflag === 0 && name && name[name.length - 1] === '/') typeflag = 5 diff --git a/test/extract.js b/test/extract.js index b3a4f36..83c53f5 100644 --- a/test/extract.js +++ b/test/extract.js @@ -589,3 +589,40 @@ test('huge', function (t) { var reader = fs.createReadStream(fixtures.HUGE) reader.pipe(gunzip).pipe(extract) }) + +test('latin-1', function (t) { // can unpack filenames encoded in latin-1 + t.plan(3) + + // This is the older name for the "latin1" encoding in Node + var extract = tar.extract({ filenameEncoding: 'binary' }) + var noEntries = false + + extract.on('entry', function (header, stream, callback) { + t.deepEqual(header, { + name: 'En français, s\'il vous plaît?.txt', + mode: parseInt('644', 8), + uid: 0, + gid: 0, + size: 14, + mtime: new Date(1495941034000), + type: 'file', + linkname: null, + uname: 'root', + gname: 'root', + devmajor: 0, + devminor: 0 + }) + + stream.pipe(concat(function (data) { + noEntries = true + t.same(data.toString(), 'Hello, world!\n') + callback() + })) + }) + + extract.on('finish', function () { + t.ok(noEntries) + }) + + extract.end(fs.readFileSync(fixtures.LATIN1_TAR)) +}) diff --git a/test/fixtures/index.js b/test/fixtures/index.js index a507e52..1e2901c 100644 --- a/test/fixtures/index.js +++ b/test/fixtures/index.js @@ -15,3 +15,4 @@ exports.BASE_256_UID_GID = path.join(__dirname, 'base-256-uid-gid.tar') exports.LARGE_UID_GID = path.join(__dirname, 'large-uid-gid.tar') exports.BASE_256_SIZE = path.join(__dirname, 'base-256-size.tar') exports.HUGE = path.join(__dirname, 'huge.tar.gz') +exports.LATIN1_TAR = path.join(__dirname, 'latin1.tar') diff --git a/test/fixtures/latin1.tar b/test/fixtures/latin1.tar new file mode 100644 index 0000000000000000000000000000000000000000..3583c8933cc75deb9fd0bb934351a778be5c605c GIT binary patch literal 10240 zcmeIuu?oU45XSM$KE*k?6iM0`M<>D82&hm9jU;LHjoo|(>ndW0E*<{ga)gB3-S61R zHTmx8+gwTBv_Xd0XSoIcDu-=(6g5A3jI&mK>X`4UwQu35Ab