nodejs实现

d9980db1 · Wu Jian Ping · 0856b034 · d9980db1 · d9980db1 · d9980db1
7 changed file
--- a/binding/nodejs/.eslintrc.js
+++ b/binding/nodejs/.eslintrc.js
+module.exports = {
+  env: {
+    browser: true,
+    commonjs: true,
+    es2021: true
+  },
+  extends: [
+    'standard'
+  ],
+  parserOptions: {
+    ecmaVersion: 'latest'
+  },
+  rules: {
+  }
+}
--- a/binding/nodejs/ReadMe.md
+++ b/binding/nodejs/ReadMe.md
 # ip2region nodejs 查询客户端实现

-# 使用方式
+## 使用方式

-# 查询测试
+### 完全基于文件的查询

-# bench 测试
+### 缓存 `VectorIndex` 索引
+
+### 缓存整个 `xdb` 数据
+
+```js
+const Ip2Region = require('./')
+
+const ip2region = new Ip2Region()
+
+// 注意：这边是同步代码，生产环境建议只执行一次初始化, 并且数据是存放在内存中的
+ip2region.load('../../data/ip2region.xdb')
+
+const info = ip2region.search('202.97.77.50')
+
+```
+
+## 查询测试
+
+## bench 测试
--- a/binding/nodejs/index.js
+++ b/binding/nodejs/index.js
+const fs = require('fs')
+
+// 常量定义
+// 每个 vector 索引项的字节数
+const VectorIndexSize = 8
+// vector 索引的列数
+const VectorIndexCols = 256
+// vector 索引段整个的字节数
+const VectorIndexLength = 256 * 256 * (4 + 4)
+// 二分索引项的字节数
+const SegmentIndexSize = 14
+// IPv4检查正则
+const IP_REGEX = /((25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d)))\.){3}(25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d)))/
+
+const getStartEndPtr = Symbol('#getStartEndPtr')
+const getBuffer = Symbol('#getBuffer')
+const openFilePromise = Symbol('#openFilePromise')
+
+class Searcher {
+  constructor (dbFile, vectorIndex, buffer) {
+    this._dbFile = dbFile
+    this._vectorIndex = vectorIndex
+    this._buffer = buffer
+
+    if (this._buffer) {
+      this._vectorIndex = this._buffer.subarray(256, 256 + VectorIndexLength)
+    }
+  }
+
+  async [getStartEndPtr] (idx, fd) {
+    if (this._vectorIndex) {
+      // 区域二分索引的开始地址
+      // idx 开始读取 4 个字节，用小端字节序解码得到一个整数
+      const sPtr = this._vectorIndex.readUInt32LE(idx)
+      // 二分区域索引的结束地址
+      // idx + 4 处读取 4 个字节，用小端字节序解码得到一个整数
+      const ePtr = this._vectorIndex.readUInt32LE(idx + 4)
+      return { sPtr, ePtr }
+    } else {
+      const buf = await this[getBuffer](256 + idx, 8, fd)
+      const sPtr = buf.readUInt32LE()
+      const ePtr = buf.readUInt32LE(4)
+      return { sPtr, ePtr }
+    }
+  }
+
+  async [getBuffer] (offset, length, fd) {
+    if (this._buffer) {
+      return this._buffer.subarray(offset, offset + length)
+    } else {
+      // 从文件中读取
+      const buf = Buffer.alloc(length)
+      return new Promise((resolve, reject) => {
+        fs.read(fd, buf, 0, length, offset, (err) => {
+          if (err) {
+            reject(err)
+          } else {
+            resolve(buf)
+          }
+        })
+      })
+    }
+  }
+
+  // fsPromises.open需要node.js v10.0.0以上版本才行，这边使用Promise包一下
+  [openFilePromise] (fileName) {
+    return new Promise((resolve, reject) => {
+      fs.open(fileName, 'r', (err, fd) => {
+        if (err) {
+          reject(err)
+        } else {
+          resolve(fd)
+        }
+      })
+    })
+  }
+
+  async search (ip) {
+    if (!IP_REGEX.test(ip)) {
+      throw new Error(`IP: ${ip} is invalid`)
+    }
+
+    let fd = null
+
+    // 假如不是通过newWithBuffer创建的对象，那么一定是需要依赖文件的
+
+    if (!this._buffer) {
+      // 不存在_buffer的情况
+      fd = await this[openFilePromise](this._dbFile)
+    }
+
+    // 切割IP
+    const ps = ip.split('.')
+    // 将各段转成int
+    const i0 = parseInt(ps[0])
+    const i1 = parseInt(ps[1])
+    const i2 = parseInt(ps[2])
+    const i3 = parseInt(ps[3])
+
+    // 假如使用移位操作的话，这边可能产生负数
+    const ipInt = i0 * 256 * 256 * 256 + i1 * 256 * 256 + i2 * 256 + i3
+
+    // 计算得到 vector 索引项的开始地址。
+    // 这里可以对比上述的 vector table 结构进行理解
+    const idx = i0 * VectorIndexCols * VectorIndexSize + i1 * VectorIndexSize
+
+    // 区域二分索引的开始地址和结束地址
+    const { sPtr, ePtr } = await this[getStartEndPtr](idx, fd)
+
+    // 二分搜索低位
+    let l = 0
+    // 二分搜索高位
+    // 上第一步得到的结束索引位置减去开始索引位置
+    // 再除以每个索引的字节大小就是这次搜索要扫描的索引的个数了。
+    let h = (ePtr - sPtr) / SegmentIndexSize
+
+    let result = null
+
+    while (l <= h) {
+      // 得到中间的那个索引
+      const m = (l + h) >> 1
+
+      // 计算中间索引项的指针地址
+      // 在起始地址 sPtr 上加上 m 的相对地址即可
+      const p = sPtr + m * SegmentIndexSize
+
+      // 从 p 位置开始读取 SegmentIndexSize = 14 个字节到 buff
+      // 得到一个完整的上述描述的二分索引项，不过为了减少不必要的操作
+      // 我们是按需要解码，此处 buff 为 p 开始的 14 个 byte 的数据
+      const buff = await this[getBuffer](p, SegmentIndexSize, fd)
+
+      // 前面 4 个字节是起始 IP
+      const sip = buff.readUInt32LE(0)
+
+      if (ipInt < sip) {
+        // 目标比 sip 小，也就是在 p 位置的左边
+        h = m - 1
+      } else {
+        // 4 ~ 7 之间的 4 个字节是结束 IP 地址
+        const eip = buff.readUInt32LE(4)
+        if (ipInt > eip) {
+          // 目标 ip 比 eip 大，也就是在 p 位置的右边
+          l = m + 1
+        } else {
+          // 搜索命中
+          // 目标 ip 正好在 sip 和 eip 之间
+          // 也就是找到目标 ip 了
+
+          // 8 ~ 10 两个字节是地域数据的长度
+          const dataLen = buff.readUInt16LE(8)
+          // 10 ~ 13 的 4 个字节是地域数据的地址
+          const dataPtr = buff.readUInt32LE(10)
+          const data = await this[getBuffer](dataPtr, dataLen, fd)
+          result = data.toString('utf-8')
+          break
+        }
+      }
+    }
+
+    if (fd) {
+      // 这边直接关闭，不需要等待
+      fs.close(fd, () => {})
+    }
+
+    return result
+  }
+}
+
+const _checkFile = dbPath => {
+  try {
+    fs.accessSync(dbPath, fs.constants.F_OK)
+  } catch (err) {
+    throw new Error(`${dbPath} ${err ? 'does not exist' : 'exists'}`)
+  }
+
+  try {
+    fs.accessSync(dbPath, fs.constants.R_OK)
+  } catch (err) {
+    throw new Error(`${dbPath} ${err ? 'is not readable' : 'is readable'}`)
+  }
+}
+
+const newWithFileOnly = dbPath => {
+  _checkFile(dbPath)
+
+  return new Searcher(dbPath, null, null)
+}
+
+const newWithVectorIndex = (dbPath, vectorIndex) => {
+  _checkFile(dbPath)
+
+  if (!Buffer.isBuffer(vectorIndex)) {
+    throw new Error('vectorIndex is invalid')
+  }
+
+  return new Searcher(dbPath, vectorIndex, null)
+}
+
+const newWithBuffer = buffer => {
+  if (!Buffer.isBuffer(buffer)) {
+    throw new Error('buffer is invalid')
+  }
+
+  return new Searcher(null, null, buffer)
+}
+
+// 从文件中获取VectorIndex数据
+const loadVectorIndexFromFile = dbPath => {
+  const fd = fs.openSync(dbPath, 'r')
+  const buffer = Buffer.alloc(VectorIndexLength)
+  fs.readSync(fd, buffer, 0, VectorIndexLength, 256)
+  fs.close(fd)
+  return buffer
+}
+
+// 将文件转换成buffer
+const loadContentFromFile = dbPath => {
+  const stats = fs.statSync(dbPath)
+  const buffer = Buffer.alloc(stats.size)
+  const fd = fs.openSync(dbPath, 'r')
+  fs.readSync(fd, buffer, 0, stats.size, 0)
+  fs.close(fd)
+  return buffer
+}
+
+module.exports = {
+  loadVectorIndexFromFile,
+  loadContentFromFile,
+  newWithFileOnly,
+  newWithVectorIndex,
+  newWithBuffer
+}
--- a/binding/nodejs/package-lock.json
+++ b/binding/nodejs/package-lock.json
--- a/binding/nodejs/package.json
+++ b/binding/nodejs/package.json
+{
+  "name": "ip2region",
+  "version": "1.0.0",
+  "description": "ip2region",
+  "main": "index.js",
+  "scripts": {
+    "test": "node ./tests/test.js"
+  },
+  "author": "",
+  "license": "ISC",
+  "devDependencies": {
+    "@types/node": "^18.0.6",
+    "benchmark": "^2.1.4",
+    "eslint": "^8.20.0",
+    "eslint-config-standard": "^17.0.0",
+    "eslint-plugin-import": "^2.26.0",
+    "eslint-plugin-n": "^15.2.4",
+    "eslint-plugin-promise": "^6.0.0"
+  },
+  "engines": {
+    "node": ">=8.0.0"
+  }
+}
\ No newline at end of file
--- a/binding/nodejs/tests/bench.js
+++ b/binding/nodejs/tests/bench.js
+const Benchmark = require('benchmark')
+const Searcher = require('..')
+
+const dbPath = '../../../data/ip2region.xdb'
+const buffer = Searcher.loadContentFromFile(dbPath)
+const seacher = Searcher.newWithBuffer(buffer)
+
+const suite = new Benchmark.Suite()
+suite
+  .add('#search - 1', async () => {
+    const ip = '202.97.77.50'
+    return seacher.search(ip)
+  })
+  .add('#search - 2', async () => {
+    const ip = '218.4.167.70'
+    return seacher.search(ip)
+  })
+  .on('cycle', function (event) {
+    console.log(String(event.target)) // eslint-disable-line
+  })
+  .on('complete', function () {
+    console.log('Fastest is ' + this.filter('fastest').map('name')) // eslint-disable-line
+  })
+  .run({ async: true })
--- a/binding/nodejs/tests/test.js
+++ b/binding/nodejs/tests/test.js
+const path = require('path')
+const Searcher = require('..')
+
+const dbPath = path.join(__dirname, '..', '..', '..', 'data', 'ip2region.xdb')
+const buffer = Searcher.loadContentFromFile(dbPath)
+const seacher1 = Searcher.newWithBuffer(buffer)
+
+const vectorIndex = Searcher.loadVectorIndexFromFile(dbPath)
+const seacher2 = Searcher.newWithVectorIndex(dbPath, vectorIndex)
+
+const seacher3 = Searcher.newWithFileOnly(dbPath)
+
+;(async () => {
+  const data1 = await Promise
+    .all([
+      seacher1.search('202.97.77.50'),
+      seacher1.search('218.4.167.70'),
+      seacher1.search('112.31.187.151'),
+      seacher1.search('202.98.17.164'),
+      seacher1.search('170.148.132.136'),
+      seacher1.search('194.138.202.210')
+    ])
+
+  console.log(data1)
+
+  const data2 = await Promise
+    .all([
+      seacher2.search('202.97.77.50'),
+      seacher2.search('218.4.167.70'),
+      seacher2.search('112.31.187.151'),
+      seacher2.search('202.98.17.164'),
+      seacher2.search('170.148.132.136'),
+      seacher2.search('194.138.202.210')
+    ])
+
+  console.log(data2)
+
+  const data3 = await Promise
+    .all([
+      seacher3.search('202.97.77.50'),
+      seacher3.search('218.4.167.70'),
+      seacher3.search('112.31.187.151'),
+      seacher3.search('202.98.17.164'),
+      seacher3.search('170.148.132.136'),
+      seacher3.search('194.138.202.210')
+    ])
+
+  console.log(data3)
+})()