From 8eb2ccdaa68f8a94cf90e02577eb18f185dd7dd9 Mon Sep 17 00:00:00 2001 From: myfreeer Date: Sat, 3 Oct 2020 12:41:11 +0800 Subject: [PATCH] xlsx: use TextDecoder and TextEncoder in browser Doing a profiling in chrome dev tools shows that the `Buffer.toString()` and `Buffer.from(string)` is using unexpected long cpu time. With the native TextDecoder and TextEncoder it can get much faster in browsers supporting it. On browsers not supporting TextDecoder, like Internet Explorer, this would fallback to original `Buffer.toString()` and `Buffer.from(string)`. This implements almost the same of https://github.com/exceljs/exceljs/pull/1458 in a non monkey-patching way covering xlsx only. Closes https://github.com/exceljs/exceljs/pull/1458 References: https://github.com/feross/buffer/issues/268 https://github.com/feross/buffer/issues/60 https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder --- lib/utils/browser-buffer-decode.js | 14 ++++++++++++++ lib/utils/browser-buffer-encode.js | 15 +++++++++++++++ lib/utils/parse-sax.js | 3 ++- lib/utils/zip-stream.js | 6 ++++++ lib/xlsx/xlsx.js | 23 ++++++++++++++++++++--- 5 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 lib/utils/browser-buffer-decode.js create mode 100644 lib/utils/browser-buffer-encode.js diff --git a/lib/utils/browser-buffer-decode.js b/lib/utils/browser-buffer-decode.js new file mode 100644 index 000000000..0400accaa --- /dev/null +++ b/lib/utils/browser-buffer-decode.js @@ -0,0 +1,14 @@ +// eslint-disable-next-line node/no-unsupported-features/node-builtins +const textDecoder = typeof TextDecoder === 'undefined' ? null : new TextDecoder('utf-8'); + +function bufferToString(chunk) { + if (typeof chunk === 'string') { + return chunk; + } + if (textDecoder) { + return textDecoder.decode(chunk); + } + return chunk.toString(); +} + +exports.bufferToString = bufferToString; diff --git a/lib/utils/browser-buffer-encode.js b/lib/utils/browser-buffer-encode.js new file mode 100644 index 000000000..8898a0dde --- /dev/null +++ b/lib/utils/browser-buffer-encode.js @@ -0,0 +1,15 @@ +// eslint-disable-next-line node/no-unsupported-features/node-builtins +const textEncoder = typeof TextEncoder === 'undefined' ? null : new TextEncoder('utf-8'); +const {Buffer} = require('buffer'); + +function stringToBuffer(str) { + if (typeof str !== 'string') { + return str; + } + if (textEncoder) { + return Buffer.from(textEncoder.encode(str).buffer); + } + return Buffer.from(str); +} + +exports.stringToBuffer = stringToBuffer; diff --git a/lib/utils/parse-sax.js b/lib/utils/parse-sax.js index d3a53a497..14c682a73 100644 --- a/lib/utils/parse-sax.js +++ b/lib/utils/parse-sax.js @@ -1,5 +1,6 @@ const {SaxesParser} = require('saxes'); const {PassThrough} = require('readable-stream'); +const {bufferToString} = require('./browser-buffer-decode'); module.exports = async function* (iterable) { // TODO: Remove once node v8 is deprecated @@ -17,7 +18,7 @@ module.exports = async function* (iterable) { saxesParser.on('text', value => events.push({eventType: 'text', value})); saxesParser.on('closetag', value => events.push({eventType: 'closetag', value})); for await (const chunk of iterable) { - saxesParser.write(chunk.toString()); + saxesParser.write(bufferToString(chunk)); // saxesParser.write and saxesParser.on() are synchronous, // so we can only reach the below line once all events have been emitted if (error) throw error; diff --git a/lib/utils/zip-stream.js b/lib/utils/zip-stream.js index 20c96bdb8..96efd8e7b 100644 --- a/lib/utils/zip-stream.js +++ b/lib/utils/zip-stream.js @@ -2,6 +2,7 @@ const events = require('events'); const JSZip = require('jszip'); const StreamBuf = require('./stream-buf'); +const {stringToBuffer} = require('./browser-buffer-encode'); // ============================================================================= // The ZipWriter class @@ -25,6 +26,11 @@ class ZipWriter extends events.EventEmitter { if (options.hasOwnProperty('base64') && options.base64) { this.zip.file(options.name, data, {base64: true}); } else { + // https://www.npmjs.com/package/process + if (process.browser && typeof data === 'string') { + // use TextEncoder in browser + data = stringToBuffer(data); + } this.zip.file(options.name, data); } } diff --git a/lib/xlsx/xlsx.js b/lib/xlsx/xlsx.js index 5a81b4415..0a22b6023 100644 --- a/lib/xlsx/xlsx.js +++ b/lib/xlsx/xlsx.js @@ -6,6 +6,7 @@ const StreamBuf = require('../utils/stream-buf'); const utils = require('../utils/utils'); const XmlStream = require('../utils/xml-stream'); +const {bufferToString} = require('../utils/browser-buffer-decode'); const StylesXform = require('./xform/style/styles-xform'); @@ -283,11 +284,27 @@ class XLSX { if (entryName[0] === '/') { entryName = entryName.substr(1); } - const stream = new PassThrough(); - if (entryName.match(/xl\/media\//)) { + let stream; + if (entryName.match(/xl\/media\//) || + // themes are not parsed as stream + entryName.match(/xl\/theme\/([a-zA-Z0-9]+)[.]xml/)) { + stream = new PassThrough(); stream.write(await entry.async('nodebuffer')); } else { - const content = await entry.async('string'); + // use object mode to avoid buffer-string convention + stream = new PassThrough({ + writableObjectMode: true, + readableObjectMode: true, + }); + let content; + // https://www.npmjs.com/package/process + if (process.browser) { + // running in node.js + content = await entry.async('string'); + } else { + // running in browser, use TextDecoder if possible + content = bufferToString(await entry.async('nodebuffer')); + } const chunkSize = 16 * 1024; for (let i = 0; i < content.length; i += chunkSize) { stream.write(content.substring(i, i + chunkSize));