lib_bloom.js

/**
 * A Bloom filter is a space-efficient probabilistic data structure used to test 
 * whether an element is a member of a set. It can quickly indicate if an item 
 * is definitely not in the set or possibly in the set, but it may produce false 
 * positives. Blossom implements several Bloom filters. All use the FNV Hash function 
 * and the optimization described by Kirsch and Mitzenmacher.
 * @see http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.2442&rep=rep1&type=pdf
 * @module blossom/bloom 
 */

'use strict';

const { Bitfield } = require('./bitfield');
const { FNV } = require('./fnv');

/**
 * Determines the appropriate size attribute given a capacity and error rate.
 * @function
 * @memberof module:blossom/bloom
 * @param {number} capacity - Capacity of a Bloom filter
 * @param {number} errorRate - Acceptable error ratio for filter
 * @returns {number}
 */
function calculateSize(capacity, errorRate) {
  const log2sq = 0.480453;  // Math.pow(Math.log(2), 2)
  return Math.ceil(capacity * Math.log(errorRate) / -log2sq);
}

/**
 * Determines how many slices the filter should use for hashing.
 * @param {number} size - Size of the underlying {@link module:blossom/bitfield~Bitfield}
 * @param {number} capacity - Capacity of a Bloom filter
 * @returns {number}
 */
function calculateSlices(size, capacity) {
  return size / capacity * Math.log(2);
}

/**
 * Calculates the hashes for an item in a Bloom filter.
 * @param {string|Buffer} key - The item to hash
 * @param {number} size - The size of the resulting hash
 * @param {number} slices - Total slices to use for hashing
 * @returns {Array<number>}
 * @see {@link http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.72.2442}
 */
function calulateHashes(key, size, slices) {
  function fnv(seed, data) {
    const h = new FNV();

    h.update(seed);
    h.update(data);
    
    return h.value() >>> 0;
  }

  const h1 = fnv(Buffer.from('S'), key);
  const h2 = fnv(Buffer.from('W'), key);
  const hashes = [];

  for (let i = 0; i < slices; i++) {
    hashes.push((h1 + i * h2) % size);
  }

  return hashes;
}


class BloomFilter {

  /**
   * Classic Bloom filter dimensioned by the size of the bitfield 
   * and the number of hash functions.
   * @param {number} [size=16] - Total bits in the underlying buffer
   * @param {number} [slices=2] - Number of hash functions
   * @param {Buffer} [buffer=Buffer.alloc(size)] - Underlying buffer to use
   */
  constructor(size = 16, slices = 2, buffer) {
    this.size = size;
    this.slices = slices;
    this.bitfield = new Bitfield(size, buffer);
  }

  /**
   * Add an item to the Bloom filter.
   * @param {string|Buffer} key - Item to add
   * @returns {BloomFilter}
   */
  add(key) {
    const hashes = calulateHashes(key, this.size, this.slices);
    
    for (let i = 0; i < hashes.length; i++) {
      this.bitfield.set(hashes[i], true);
    }

    return this;
  }
  
  /**
   * Check if an item probably exists in the Bloom filter
   * @param {string|Buffer} key - Item to check
   * @returns {boolean}
   */
  has(key) {
    const hashes = calulateHashes(key, this.size, this.slices);

    for (let i = 0; i < hashes.length; i++) {
      if (!this.bitfield.get(hashes[i])) {
        return false;
      }
    }

    return true;
  }

  /**
   * Create a {@link BloomFilter} from an object.
   * @param {object} data
   * @param {number} data.size - Bloom filter size
   * @param {number} data.slices - Number of hashes to use.
   * @param {object} data.bitfield
   * @param {string|Buffer|array} data.bitfield.buffer - Underlying buffer to use
   * @returns {BloomFilter}
   */
  static destringify(data) {
    const filter = new BloomFilter(data.size, data.slices);
    filter.bitfield.buffer = Buffer.from(data.bitfield.buffer);

    return filter;
  }

}

class SafeBloomFilter {
  
  /**
   * Bloom filter enforcing a given false positive error 
   * probabilty for a given capacity.
   * @param {number} [capacity=20] - Max number of items allowed
   * @param {number} [errorRate=0.1] - Max false positive error probabilty
   * @param {Buffer} [buffer] - Underlying buffer to use
   */
  constructor(capacity = 20, errorRate = 0.1, buffer) {
    const size = calculateSize(capacity, errorRate);
    const slices = calculateSlices(size, capacity);
    
    this.capacity = capacity;
    this.errorRate = errorRate;
    this.count = 0;
    this.filter = new BloomFilter(size, slices, buffer);
  }

  /**
   * Adds item the the Bloom filter only if it is under capacity.
   * @param {string|Buffer} key - Item to add
   * @returns {boolean} wasAdded
   */
  add(key) {
    if (this.count >= this.capacity) {
      return false;
    }

    this.filter.add(key);
    this.count++;
    
    return true;
  }

  /**
   * Checks if the given item probably exists in the Bloom filter.
   * @param {string|Buffer} key - Item to check
   * @returns {boolean}
   */
  has(key) {
    return this.filter.has(key);
  }

  /**
   * Create a {@link SafeBloomFilter} from an object
   * @param {object} data
   * @param {number} data.capacity - Max items to allow in filter
   * @param {number} data.errorRate - Max false positive rate allowed
   * @param {number} data.count - Number of items in filter
   * @param {object} data.bitfield
   * @param {string|array|object|Buffer} data.bitfield.buffer - Underlying buffer
   * @returns {SafeBloomFilter}
   */
  static destringify(data) {
    const filter = new SafeBloomFilter(data.capacity, data.errorRate);
    filter.count = data.count;
    filter.filter.bitfield.buffer = Buffer.from(data.filter.bitfield.buffer);

    return filter;
  }

}

class ScalingBloomFilter {
  
  /**
   * A scaling bloom filter (SBF) as described by Almeida et al.
   * @see {@link http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.62.7953&rep=rep1&type=pdf}
   * @param {number} [errorRate=0.1] - Max false positive probabilty
   * @param {object} [options]
   * @param {number} [options.ratio=0.9] - Error rate ratio
   * @param {number} [options.scaling=2] - Scaling factor
   * @param {number} [options.initialCapacity=1000] - Starting capacity for underlying {@link SafeBloomFilter}
   */
  constructor(errorRate = 0.1, options) {
    options = options || {};

    this.errorRate = errorRate;
    this.ratio = options.ratio || 0.9;
    this.scaling = options.scaling || 2;
    this.initialCapacity = options.initialCapacity || 1000;
    this.filters = [
      new SafeBloomFilter(this.initialCapacity, errorRate * (1 - this.ratio))
    ];
  }

  /**
   * Add item to the SBF
   * @param {string|Buffer} key - Item to add
   * @returns {ScalingBloomFilter}
   */
  add(key) {
    let f = this.filters.slice(-1)[0];

    if (f.add(key)) {
      return;
    }

    f = new SafeBloomFilter(
      f.capacity * this.scaling, 
      f.errorRate * this.ratio
    );
    
    f.add(key);
    this.filters.push(f);

    return this;
  }

  /**
   * Checks if an item probably exists in the filter
   * @param {string|Buffer} key - Item to check
   * @returns {boolean}
   */
  has(key) {
    for (let i = this.filters.length; i-- > 0;) {
      if (this.filters[i].has(key)) {
        return true;
      }
    }

    return false;
  }

  /**
   * Create a {@link ScalingBloomFilter} from an object
   * @param {object} data
   * @param {number} data.ratio - SBF ratio to use
   * @param {number} data.scaling - Scaling factor
   * @param {number} data.initialCapacity - Starting capacity for underlying {@link module:blossom/bloom~SafeBloomFilter}
   * @param {Array<object>} data.filters - Serialized data for filters passed to {@link module:blossom/bloom~SafeBloomFilter.destringify}
   * @returns {ScalingBloomFilter}
   */
  static destringify(data) {
    const filter = new ScalingBloomFilter(data.errorRate, {
      ratio: data.ratio,
      scaling: data.scaling,
      initialCapacity: data.initialCapacity
    });

    filter.filters = [];

    for (let i = 0; i < data.filters.length; i++) {
      filter.filters.push(
        SafeBloomFilter.destringify(data.filters[i])
      );
    }

    return filter;
  }

}

class AttenuatedBloomFilter extends Array {

  static get [Symbol.species]() {
    return Array;
  }

  /**
   * Constructs an empty attenuated bloom filter given a bitfield size
   * and a filter depth.
   * @constructor
   * @extends {Array}
   * @param {object} [options]
   * @param {number} [options.bitfieldSize=160] - Size of the underlying bitfields
   * @param {number} [options.filterDepth=3] - Number of {@link module:blossom/bloom~BloomFilter}s
   */
  constructor(options = {}) {
    super();

    this.bitfieldSize = options.bitfieldSize || 160;
    this.filterDepth = options.filterDepth || 3;

    for (let i = 0; i < this.filterDepth; i++) {
      this.push(new BloomFilter(this.bitfieldSize, 2));
    }
  }

  /**
   * Returns a comma delimited string of each filter serialized to hex strings.
   * @returns {string}
   */
  toString() {
    return this.toHexArray().toString();
  }

  /**
   * Returns an array of hex string serialized filters.
   * @returns {string[]}
   */
  toHexArray() {
    return this.map(
      (bloomFilter) => bloomFilter.bitfield.toBuffer().toString('hex')
    );
  }

  /**
   * Merges a foreign attenuated filter into ours.
   * @param {AttenuatedBloomFilter} foreignFilters - The attenuated filter to
   * merge into ours (uses bitwise OR operation)
   * @returns {AttenuatedBloomFilter}
   */
  merge(foreignFilters) {
    for (let [localIndex, localFilter] of this.entries()) {
      if (localIndex === 0) {
        continue;
      }

      let foreignFilter = foreignFilters[localIndex - 1];
      let { data: foreignFilterBytes } = foreignFilter.bitfield.buffer.toJSON();
      let localFilterBuffer = localFilter.bitfield.buffer;

      if (!foreignFilters[localIndex - 1]) {
        return this;
      }

      for (let [bytePos, byteValue] of foreignFilterBytes.entries()) {
        localFilterBuffer[bytePos] = localFilterBuffer[bytePos] | byteValue;
      }
    }

    return this;
  }

  /**
   * Constructs a attentuated bloom filter from a array of hex strings
   * @static
   * @memberof AttenuatedBloomFilter
   * @param {string[]} hexFilters
   */
  static from(hexFilters) {
    const filters = new AttenuatedBloomFilter({
      bitfieldSize: Buffer.from(hexFilters[0], 'hex').length * 8,
      filterDepth: hexFilters.length
    });

    hexFilters.forEach((hexString, i) => {
      filters[i].bitfield.buffer = Buffer.from(hexString, 'hex')
    });

    return filters;
  }

}

module.exports.calulateHashes = calulateHashes;
module.exports.calculateSize = calculateSize;
module.exports.calculateSlices = calculateSlices;
module.exports.BloomFilter = BloomFilter;
module.exports.SafeBloomFilter = SafeBloomFilter;
module.exports.ScalingBloomFilter = ScalingBloomFilter;
module.exports.AttenuatedBloomFilter = AttenuatedBloomFilter;