Merge pull request #9 from dominictarr/incomplete

on incomplete buffers, return undefined, and set decode.bytesRead = 0
10 years ago · 770ae8d530
parent d1288330fc 490e48499d
commit 770ae8d530
4 changed files with 81 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -38,10 +38,11 @@ returns the number of bytes this number will be encoded as, up to a maximum of 8

 ## usage notes

-if you are using this to decode buffers from a streaming source it's up to you to make sure that you send 'complete' buffers into `varint.decode`. the maximum number of bytes that varint will need to decode is 8, so all you have to do is make sure you are sending buffers that are at least 8 bytes long from the point at which you know a varint range begins.
-
-for example, if you are reading buffers from a `fs.createReadStream`,
-imagine the first buffer contains one full varint range and half of a second one, and the second buffer contains the second half of the second varint range. in order to be safe across the buffer boundaries you'd just have to make sure the buffer you give to `varint.decode` contains the full varint range (8 bytes), otherwise you'll get an error.
+If varint is passed a buffer that does not contain a valid end
+byte, then `decode` will return undefined, and `decode.bytesRead`
+will be set to 0. If you are reading from a streaming source,
+it's okay to pass an incomplete buffer into `decode`, detect this
+case, and then concatenate the next buffer.

 # License

--- a/bench.js
+++ b/bench.js
@ -0,0 +1,57 @@
+var N = 1e7
+var M = 10
+/*
+  benchmark encoding and decoding N random integers.
+
+  A number is encoded into a buffer, (the buffer is reused so
+  that allocation does not affect the benchmark)
+
+  to test the effect on performance of invalid records
+  (i.e. too short, with the Most Significant Byte missing)
+  every M items, attempt to decode from a shorter slice of the buffer.
+  This will probably be produce an invalid result. We do not
+  need to write into that buffer - because it refurs to the same memory as
+  the full size buffer.
+
+  run with INVALID=1 to include N/M invalid decodes.
+
+  results:
+    with no invalid decodes, I get about 2428 decodes/ms
+    with invalid decodes:
+      old code that overruns buffer: 1122 decodes/ms
+      check length & return undefined: 2439 decodecs/ms
+      check length & return NaN: 2434 d/ms
+      check length & return -1: 2400 d/ms
+
+  conclusion, it doesn't make a significant difference whether
+  what is returned to show an invalid read,
+  but if you overrun the buffer the cost is considerable.
+
+  recomendation: return undefined
+*/
+
+var buffer = new Buffer(8)
+var _buffer = buffer.slice(0, 4)
+var varint = require('./')
+var l = N
+var invalid = 0
+
+includeInvalid = !!process.env.INVALID
+
+var start = Date.now()
+while (l--) {
+  var int = Math.floor(Math.random()*0x01fffffffffffff)
+  varint.encode(int, buffer, 0)
+  //console.log(int, varint.decode(buffer, 0))
+  //every 1000 varints, do one that will be too short,
+  //measure
+  if(includeInvalid && !(l%M)) {
+    if(undefined == varint.decode(_buffer, 0))
+      invalid ++
+  } else 
+  if(int !== varint.decode(buffer, 0))
+    throw new Error('decode was incorrect')
+}
+
+console.log('decode&encode/ms, invalidDecodes')
+console.log(N/(Date.now() - start) + ',', invalid)
--- a/decode.js
+++ b/decode.js
@ -9,8 +9,13 @@ function read(buf, offset) {
    , shift  = 0
    , counter = offset
    , b
+    , l = buf.length
  
  do {
+    if(counter >= l) {
+      read.bytesRead = 0
+      return undefined
+    }
    b = buf[counter++]
    res += shift < 28
      ? (b & REST) << shift
--- a/test.js
+++ b/test.js
@ -117,6 +117,20 @@ test('encodingLength', function (assert) {
  assert.end()
 })

+test('buffer too short', function (assert) {
+
+  var value = encode(9812938912312)
+  var buffer = encode(value)
+
+  var l = buffer.length
+  while(l--) {
+    var val = decode(buffer.slice(0, l))
+    assert.equal(val, undefined)
+    assert.equal(decode.bytesRead, 0)
+  }
+  assert.end()
+})
+
 function randint(range) {
  return Math.floor(Math.random() * range)
 }