Merge pull request #9 from dominictarr/incomplete

on incomplete buffers, return undefined, and set decode.bytesRead = 0
master
Chris Dickinson 10 years ago
commit 770ae8d530

@ -38,10 +38,11 @@ returns the number of bytes this number will be encoded as, up to a maximum of 8
## usage notes
if you are using this to decode buffers from a streaming source it's up to you to make sure that you send 'complete' buffers into `varint.decode`. the maximum number of bytes that varint will need to decode is 8, so all you have to do is make sure you are sending buffers that are at least 8 bytes long from the point at which you know a varint range begins.
for example, if you are reading buffers from a `fs.createReadStream`,
imagine the first buffer contains one full varint range and half of a second one, and the second buffer contains the second half of the second varint range. in order to be safe across the buffer boundaries you'd just have to make sure the buffer you give to `varint.decode` contains the full varint range (8 bytes), otherwise you'll get an error.
If varint is passed a buffer that does not contain a valid end
byte, then `decode` will return undefined, and `decode.bytesRead`
will be set to 0. If you are reading from a streaming source,
it's okay to pass an incomplete buffer into `decode`, detect this
case, and then concatenate the next buffer.
# License

@ -0,0 +1,57 @@
var N = 1e7
var M = 10
/*
benchmark encoding and decoding N random integers.
A number is encoded into a buffer, (the buffer is reused so
that allocation does not affect the benchmark)
to test the effect on performance of invalid records
(i.e. too short, with the Most Significant Byte missing)
every M items, attempt to decode from a shorter slice of the buffer.
This will probably be produce an invalid result. We do not
need to write into that buffer - because it refurs to the same memory as
the full size buffer.
run with INVALID=1 to include N/M invalid decodes.
results:
with no invalid decodes, I get about 2428 decodes/ms
with invalid decodes:
old code that overruns buffer: 1122 decodes/ms
check length & return undefined: 2439 decodecs/ms
check length & return NaN: 2434 d/ms
check length & return -1: 2400 d/ms
conclusion, it doesn't make a significant difference whether
what is returned to show an invalid read,
but if you overrun the buffer the cost is considerable.
recomendation: return undefined
*/
var buffer = new Buffer(8)
var _buffer = buffer.slice(0, 4)
var varint = require('./')
var l = N
var invalid = 0
includeInvalid = !!process.env.INVALID
var start = Date.now()
while (l--) {
var int = Math.floor(Math.random()*0x01fffffffffffff)
varint.encode(int, buffer, 0)
//console.log(int, varint.decode(buffer, 0))
//every 1000 varints, do one that will be too short,
//measure
if(includeInvalid && !(l%M)) {
if(undefined == varint.decode(_buffer, 0))
invalid ++
} else
if(int !== varint.decode(buffer, 0))
throw new Error('decode was incorrect')
}
console.log('decode&encode/ms, invalidDecodes')
console.log(N/(Date.now() - start) + ',', invalid)

@ -9,8 +9,13 @@ function read(buf, offset) {
, shift = 0
, counter = offset
, b
, l = buf.length
do {
if(counter >= l) {
read.bytesRead = 0
return undefined
}
b = buf[counter++]
res += shift < 28
? (b & REST) << shift

@ -117,6 +117,20 @@ test('encodingLength', function (assert) {
assert.end()
})
test('buffer too short', function (assert) {
var value = encode(9812938912312)
var buffer = encode(value)
var l = buffer.length
while(l--) {
var val = decode(buffer.slice(0, l))
assert.equal(val, undefined)
assert.equal(decode.bytesRead, 0)
}
assert.end()
})
function randint(range) {
return Math.floor(Math.random() * range)
}

Loading…
Cancel
Save