diff --git a/README.md b/README.md index df72382..c289888 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ returns the number of bytes this number will be encoded as, up to a maximum of 8 ## usage notes -if you are using this to decode buffers from a streaming source it's up to you to make sure that you send 'complete' buffers into `varint.decode`. the maximum number of bytes that varint will need to decode is 8, so all you have to do is make sure you are sending buffers that are at least 8 bytes long from the point at which you know a varint range begins. - -for example, if you are reading buffers from a `fs.createReadStream`, -imagine the first buffer contains one full varint range and half of a second one, and the second buffer contains the second half of the second varint range. in order to be safe across the buffer boundaries you'd just have to make sure the buffer you give to `varint.decode` contains the full varint range (8 bytes), otherwise you'll get an error. +If varint is passed a buffer that does not contain a valid end +byte, then `decode` will return undefined, and `decode.bytesRead` +will be set to 0. If you are reading from a streaming source, +it's okay to pass an incomplete buffer into `decode`, detect this +case, and then concatenate the next buffer. # License diff --git a/bench.js b/bench.js new file mode 100644 index 0000000..c13931c --- /dev/null +++ b/bench.js @@ -0,0 +1,57 @@ +var N = 1e7 +var M = 10 +/* + benchmark encoding and decoding N random integers. + + A number is encoded into a buffer, (the buffer is reused so + that allocation does not affect the benchmark) + + to test the effect on performance of invalid records + (i.e. too short, with the Most Significant Byte missing) + every M items, attempt to decode from a shorter slice of the buffer. + This will probably be produce an invalid result. We do not + need to write into that buffer - because it refurs to the same memory as + the full size buffer. + + run with INVALID=1 to include N/M invalid decodes. + + results: + with no invalid decodes, I get about 2428 decodes/ms + with invalid decodes: + old code that overruns buffer: 1122 decodes/ms + check length & return undefined: 2439 decodecs/ms + check length & return NaN: 2434 d/ms + check length & return -1: 2400 d/ms + + conclusion, it doesn't make a significant difference whether + what is returned to show an invalid read, + but if you overrun the buffer the cost is considerable. + + recomendation: return undefined +*/ + +var buffer = new Buffer(8) +var _buffer = buffer.slice(0, 4) +var varint = require('./') +var l = N +var invalid = 0 + +includeInvalid = !!process.env.INVALID + +var start = Date.now() +while (l--) { + var int = Math.floor(Math.random()*0x01fffffffffffff) + varint.encode(int, buffer, 0) + //console.log(int, varint.decode(buffer, 0)) + //every 1000 varints, do one that will be too short, + //measure + if(includeInvalid && !(l%M)) { + if(undefined == varint.decode(_buffer, 0)) + invalid ++ + } else + if(int !== varint.decode(buffer, 0)) + throw new Error('decode was incorrect') +} + +console.log('decode&encode/ms, invalidDecodes') +console.log(N/(Date.now() - start) + ',', invalid) diff --git a/decode.js b/decode.js index 6269e73..01b5767 100644 --- a/decode.js +++ b/decode.js @@ -9,8 +9,13 @@ function read(buf, offset) { , shift = 0 , counter = offset , b + , l = buf.length do { + if(counter >= l) { + read.bytesRead = 0 + return undefined + } b = buf[counter++] res += shift < 28 ? (b & REST) << shift diff --git a/test.js b/test.js index 4c5f51d..96e2a17 100644 --- a/test.js +++ b/test.js @@ -117,6 +117,20 @@ test('encodingLength', function (assert) { assert.end() }) +test('buffer too short', function (assert) { + + var value = encode(9812938912312) + var buffer = encode(value) + + var l = buffer.length + while(l--) { + var val = decode(buffer.slice(0, l)) + assert.equal(val, undefined) + assert.equal(decode.bytesRead, 0) + } + assert.end() +}) + function randint(range) { return Math.floor(Math.random() * range) }