From 6639d2d2565699ac1d5f083bb4db660c66456467 Mon Sep 17 00:00:00 2001 From: Dominic Tarr Date: Mon, 26 May 2014 18:10:47 +0200 Subject: [PATCH 1/5] test invalid buffers, set bytesRead = 0, and return undefined --- test.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test.js b/test.js index 4c5f51d..96e2a17 100644 --- a/test.js +++ b/test.js @@ -117,6 +117,20 @@ test('encodingLength', function (assert) { assert.end() }) +test('buffer too short', function (assert) { + + var value = encode(9812938912312) + var buffer = encode(value) + + var l = buffer.length + while(l--) { + var val = decode(buffer.slice(0, l)) + assert.equal(val, undefined) + assert.equal(decode.bytesRead, 0) + } + assert.end() +}) + function randint(range) { return Math.floor(Math.random() * range) } From 5a00978b1bcda0118ed9f369bc0d83b9f509a934 Mon Sep 17 00:00:00 2001 From: Dominic Tarr Date: Mon, 26 May 2014 18:11:56 +0200 Subject: [PATCH 2/5] instead of throwing, return undefined in invalid (too short) buffers --- decode.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/decode.js b/decode.js index 6269e73..25605aa 100644 --- a/decode.js +++ b/decode.js @@ -18,7 +18,12 @@ function read(buf, offset) { shift += 7 } while (b >= MSB) - read.bytesRead = counter - offset + if(b === undefined) { + read.bytesRead = 0 + res = undefined + } + else + read.bytesRead = counter - offset return res } From 9af95d37bc9f0e1efaf48bc67f0b0f8a1c2dd83a Mon Sep 17 00:00:00 2001 From: Dominic Tarr Date: Mon, 26 May 2014 18:15:10 +0200 Subject: [PATCH 3/5] correct usage notes --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index df72382..c289888 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ returns the number of bytes this number will be encoded as, up to a maximum of 8 ## usage notes -if you are using this to decode buffers from a streaming source it's up to you to make sure that you send 'complete' buffers into `varint.decode`. the maximum number of bytes that varint will need to decode is 8, so all you have to do is make sure you are sending buffers that are at least 8 bytes long from the point at which you know a varint range begins. - -for example, if you are reading buffers from a `fs.createReadStream`, -imagine the first buffer contains one full varint range and half of a second one, and the second buffer contains the second half of the second varint range. in order to be safe across the buffer boundaries you'd just have to make sure the buffer you give to `varint.decode` contains the full varint range (8 bytes), otherwise you'll get an error. +If varint is passed a buffer that does not contain a valid end +byte, then `decode` will return undefined, and `decode.bytesRead` +will be set to 0. If you are reading from a streaming source, +it's okay to pass an incomplete buffer into `decode`, detect this +case, and then concatenate the next buffer. # License From 76ce046de9b6a9f3a5c0ed1662a978c2e4ff079a Mon Sep 17 00:00:00 2001 From: Dominic Tarr Date: Thu, 29 May 2014 17:22:51 +0200 Subject: [PATCH 4/5] make a quick benchmark script --- bench.js | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 bench.js diff --git a/bench.js b/bench.js new file mode 100644 index 0000000..c13931c --- /dev/null +++ b/bench.js @@ -0,0 +1,57 @@ +var N = 1e7 +var M = 10 +/* + benchmark encoding and decoding N random integers. + + A number is encoded into a buffer, (the buffer is reused so + that allocation does not affect the benchmark) + + to test the effect on performance of invalid records + (i.e. too short, with the Most Significant Byte missing) + every M items, attempt to decode from a shorter slice of the buffer. + This will probably be produce an invalid result. We do not + need to write into that buffer - because it refurs to the same memory as + the full size buffer. + + run with INVALID=1 to include N/M invalid decodes. + + results: + with no invalid decodes, I get about 2428 decodes/ms + with invalid decodes: + old code that overruns buffer: 1122 decodes/ms + check length & return undefined: 2439 decodecs/ms + check length & return NaN: 2434 d/ms + check length & return -1: 2400 d/ms + + conclusion, it doesn't make a significant difference whether + what is returned to show an invalid read, + but if you overrun the buffer the cost is considerable. + + recomendation: return undefined +*/ + +var buffer = new Buffer(8) +var _buffer = buffer.slice(0, 4) +var varint = require('./') +var l = N +var invalid = 0 + +includeInvalid = !!process.env.INVALID + +var start = Date.now() +while (l--) { + var int = Math.floor(Math.random()*0x01fffffffffffff) + varint.encode(int, buffer, 0) + //console.log(int, varint.decode(buffer, 0)) + //every 1000 varints, do one that will be too short, + //measure + if(includeInvalid && !(l%M)) { + if(undefined == varint.decode(_buffer, 0)) + invalid ++ + } else + if(int !== varint.decode(buffer, 0)) + throw new Error('decode was incorrect') +} + +console.log('decode&encode/ms, invalidDecodes') +console.log(N/(Date.now() - start) + ',', invalid) From 490e48499d069ede4e5366801d029ae16587821c Mon Sep 17 00:00:00 2001 From: Dominic Tarr Date: Thu, 29 May 2014 17:23:33 +0200 Subject: [PATCH 5/5] do not read outside the bounds of the buffer --- decode.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/decode.js b/decode.js index 25605aa..01b5767 100644 --- a/decode.js +++ b/decode.js @@ -9,8 +9,13 @@ function read(buf, offset) { , shift = 0 , counter = offset , b + , l = buf.length do { + if(counter >= l) { + read.bytesRead = 0 + return undefined + } b = buf[counter++] res += shift < 28 ? (b & REST) << shift @@ -18,12 +23,7 @@ function read(buf, offset) { shift += 7 } while (b >= MSB) - if(b === undefined) { - read.bytesRead = 0 - res = undefined - } - else - read.bytesRead = counter - offset + read.bytesRead = counter - offset return res }