...
 
Commits (45)
    https://gitcode.net/int/ip2region/-/commit/2e4df39af239d851d2d093c219f9f04161433de5 Update the doc 2022-06-17T10:10:42+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/cb6b06d64a143bfc460e4b9244371bcbbe334a53 add golang searcher binding 2022-06-17T14:18:44+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/4cf2c28d4943e7446895c270aa90915b2536c00a rename pacakge to xdb and add SearchByStr impl 2022-06-17T14:45:16+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/98e10b989110fe17999ae5e3745abdfd3e26188c Update the docs 2022-06-17T15:06:34+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/0873aa7d69f94b3e0b20e43a6f6790d7cf9da9e7 package the source lib to xdb 2022-06-17T15:23:02+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/bae17f007b876c31c721e853e034a6d78f0b3878 xdb_maker and usage docs 2022-06-17T15:35:58+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/79b3dd2aecc4e3ffdee657f5e1aa62e3e2e9ee54 update the command docs 2022-06-17T15:48:48+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/1d4d3f92990c98b3c9812d36386865faa03413a7 add vector index and xdb content load util func 2022-06-18T19:29:24+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/fc8dcdd443f8e38aa5c9b7684566f6f2d680aad6 add load header func 2022-06-18T19:36:45+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/542753f66600b466d82e7cd3fbca86453c6546af add Makefile and docs for golang xdb maker and binding 2022-06-19T19:07:03+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/e9a033b9f51ecc78d0c4bf32d325a341cb6baf62 add vectorIndex/Content cache policy impl 2022-06-20T15:14:33+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/244472d9356690b71a960b23f6a7c83784d42909 add docs for cache searcher create 2022-06-20T21:45:59+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/c3319ff11fd1292a7d36c344e2bcdab5cd179009 add cache-policy option desc 2022-06-20T22:12:21+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/e3cda45591c4f4d8d5cd7e18fb6d85042d98f23b add copy right statement and author contact info 2022-06-21T10:21:37+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/6bcef64d550e24dcbf8d2be9c95f0c62c0945b0b fix the bug of header write and regen the xdb file 2022-06-21T10:47:28+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/a3d33152ba6c2caf29c81d12e233c2e9ae71b2f3 add header mgr struct 2022-06-21T10:57:47+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/231f56b2915d3bcebf69c3b07ea5f29047a16546 use the Header struct 2022-06-21T11:34:47+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/82ed1e831e6c80f75b2a975c9740006451504396 break for the undefine option 2022-06-21T18:02:40+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/e2e530bdf4cd553bb2c383fd47017b5d0e130316 add php xdb searcher impl 2022-06-21T18:02:54+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/1635de43501df02e4d1f88b85e9d8ef71340116e record the start and end segment index ptr 2022-06-22T10:22:40+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/99802f2d2843856e964e7e98d28c39fe138ff48b print the dataLen and dataPtr for debug 2022-06-22T15:59:29+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/c28d01897f6c0d074b27b6bb5836fa6cdf1648ec xdb searcher util and tests 2022-06-22T17:36:01+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/f5051e3ebc89d11d4b651e3f66146890e0f89638 util test script to test all the utils functions 2022-06-22T18:06:50+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/8797368cda62a34c7038efd639c4897226fd6a68 print the cache policy for bench 2022-06-22T18:48:06+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/ba1321fa6bf0df9ebd7f01ec50d97cf8825bbfad add bench test script 2022-06-22T18:49:02+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/b1dd15849c2fa5775856b1088e0961fd3bcda6bb update the docs for cache-policy 2022-06-22T19:20:59+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/b97306af034aecc4bbf9c972c9d045c86d00b4cf add docs for php binding 2022-06-22T19:21:08+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/05e9a55e0203766fa1a908f3c12081edac7c0945 update docs 2022-06-22T19:23:03+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/8ed03b40e6abfe91bc050acc17ceffc8b52f7f92 remove the vectorIndex autoload for buffer based searcher to reduce memory usage 2022-06-23T11:42:28+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/4f7813a48235d54a3da98af3cf0e7d66423364f6 content buffer directly as vectorIndex 2022-06-23T14:52:39+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/1f0d2425821b4971b23781f4997e831e5273eb46 optimize the bench average costs stats 2022-06-23T14:57:02+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/a2b6eb6b8cc7d0c36f59c36fd8b47fcef44e28fc reduce the data copy for vectorIndex search with content cached 2022-06-23T15:03:18+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/e7f26019bbfdae74e403b9ce9b1511274ec73cef mark the php xdb search binding as finsihed 2022-06-23T15:04:51+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/1b6842a2f1c00ade1130c887ecc74d309df55cb4 it should be the cols 2022-06-23T17:03:25+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/8d03749a372782e29d27e8c6ea0dd00e962779cf add java xdb searcher impl 2022-06-23T17:26:17+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/f5d6b490f3d75a74c80000459adf409480e57bf4 use the raw []byte as vectorIndex to reduce memory alloc 2022-06-23T17:56:28+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/fa4c983710a5a6980e5c79edc16d763039c8b530 store the costs first 2022-06-23T17:59:14+08:00 lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/3e8d616a2a196a74331428c64dc05090bd378714 count maybe 0 2022-06-24T11:52:06+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/5d47a2773094eb7a307bd451e9fa374f8fcb98c6 impl the bench test 2022-06-24T11:52:33+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/05826d8b4ae6239367c9634f6f8204f137f498ec update the version number to 2.6.0 2022-06-24T11:56:28+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/20ad3cc7227062d8c9feaa0e6265c09157f1b323 update the marks for xdb cached search 2022-06-24T12:24:16+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/9361d9179c28bd19589e9e72ef0dd9dbc1a6bccb add doc for java binding 2022-06-24T12:41:35+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/0b137336ef392c60a1420702878bea04ab68530b optimize the php docs 2022-06-24T12:43:33+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/9c270d0c39170e0dbd4713757ebf8b3ea9721fbe update the pom for maven release 2022-06-24T15:23:04+08:00 Lion chenxin619315@gmail.com https://gitcode.net/int/ip2region/-/commit/7e41c0c00f582108215d16bfdec95899add507ed xdb java searcher is ready 2022-06-24T15:28:12+08:00 Lion chenxin619315@gmail.com
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
*.la *.la
*.so *.so
*.xdb *.xdb
*.iml
META-INF/ META-INF/
# Binary Files # # Binary Files #
...@@ -38,6 +39,11 @@ META-INF/ ...@@ -38,6 +39,11 @@ META-INF/
/binding/c/testSearcher /binding/c/testSearcher
# golang
/binding/golang/searcher
/binding/golang/xdb_searcher
/binding/golang/golang
# rust # rust
Cargo.lock Cargo.lock
target target
...@@ -45,10 +51,10 @@ target ...@@ -45,10 +51,10 @@ target
# VS ignore cases # VS ignore cases
/**/*.sln /**/*.sln
/v1.0//binding/c#/**/.vs/ /v1.0/binding/c#/**/.vs/
/v1.0//binding/c#/**/packages /v1.0/binding/c#/**/packages
/v1.0//binding/c#/**/bin /v1.0/binding/c#/**/bin
/v1.0//binding/c#/**/obj /v1.0/binding/c#/**/obj
/binding/c#/**/.vs/ /binding/c#/**/.vs/
/binding/c#/**/packages /binding/c#/**/packages
/binding/c#/**/bin /binding/c#/**/bin
...@@ -63,3 +69,5 @@ target ...@@ -63,3 +69,5 @@ target
# maker # maker
## golang ## golang
/maker/golang/dbmaker /maker/golang/dbmaker
/maker/golang/xdb_maker
/maker/golang/golang
# Ip2region 是什么 # Ip2region 是什么
ip2region v2.0 - 是一个离线IP地址定位库和IP定位数据管理框架,10微秒级别的查询效率,提供了众多主流编程语言的xdb数据生成和查询客户端实现。v1.0 旧版本: [v1.0版本入口](v1.0) ip2region v2.0 - 是一个离线IP地址定位库和IP定位数据管理框架,10微秒级别的查询效率,提供了众多主流编程语言的 `xdb` 数据生成和查询客户端实现。v1.0 旧版本: [v1.0版本入口](v1.0)
...@@ -32,21 +32,21 @@ v2.0 格式的 `xdb` 支持亿级别的 IP 数据段行数,region 信息也可 ...@@ -32,21 +32,21 @@ v2.0 格式的 `xdb` 支持亿级别的 IP 数据段行数,region 信息也可
使用文档和测试程序请参考各个查询客户端下的 ReadMe 介绍,全部查询 binding 实现情况如下: 使用文档和测试程序请参考各个查询客户端下的 ReadMe 介绍,全部查询 binding 实现情况如下:
| 编程语言 | 描述 | 开发状态 | 作者 | | 编程语言 | 描述 | 开发状态 | 贡献者 |
| :--- | :--- | :--- | :--- | | :--- | :--- |:-------| :--- |
| [golang](binding/golang) | golang 查询客户端实现 | 已完成 | [Lion](https://github.com/lionsoul2014) | | [golang](binding/golang) | golang 查询客户端实现 | 已完成 | [Lion](https://github.com/lionsoul2014) |
| [php](binding/php) | php 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [php](binding/php) | php 查询客户端实现 | 已完成 | [Lion](https://github.com/lionsoul2014) |
| [java](binding/java) | java 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [java](binding/java) | java 查询客户端实现 | 已完成 | [Lion](https://github.com/lionsoul2014) |
| [lua](binding/lua) | lua 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [lua](binding/lua) | lua 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) |
| [c](binding/c) | ANSC c 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [c](binding/c) | ANSC c 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) |
| [lua_c](binding/lua_c) | lua 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [lua_c](binding/lua_c) | lua 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) |
| [rust](binding/rust) | rust 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [rust](binding/rust) | rust 查询客户端实现 | 开发中... | [Lion](https://github.com/lionsoul2014) |
| [python](binding/python) | python 查询客户端实现 | 待开始 | 待确定 | | [python](binding/python) | python 查询客户端实现 | 待开始 | 待确定 |
| [nodejs](binding/nodejs) | nodejs 查询客户端实现 | 待开始 | 待确定 | | [nodejs](binding/nodejs) | nodejs 查询客户端实现 | 待开始 | 待确定 |
| [php5_ext](binding/php5_ext) | php5 c 扩展查询客户端实现 | 待开始 | 待确定 | | [php5_ext](binding/php5_ext) | php5 c 扩展查询客户端实现 | 待开始 | 待确定 |
| [php7_ext](binding/php7_ext) | php7 c 扩展查询客户端实现 | 待开始 | 待确定 | | [php7_ext](binding/php7_ext) | php7 c 扩展查询客户端实现 | 待开始 | 待确定 |
| [nginx](binding/nginx) | nginx的扩展查询客户端实现 | 待开始 | 待确定 | | [nginx](binding/nginx) | nginx的扩展查询客户端实现 | 待开始 | 待确定 |
| [csharp](binding/csharp) | c# 查询客户端实现 | 待开始 | 待确定 | | [csharp](binding/csharp) | c# 查询客户端实现 | 待开始 | 待确定 |
...@@ -54,7 +54,7 @@ v2.0 格式的 `xdb` 支持亿级别的 IP 数据段行数,region 信息也可 ...@@ -54,7 +54,7 @@ v2.0 格式的 `xdb` 支持亿级别的 IP 数据段行数,region 信息也可
使用文档和测试程序请参考不同生成程序下的 ReadMe 介绍,全部生成 maker 实现情况如下: 使用文档和测试程序请参考不同生成程序下的 ReadMe 介绍,全部生成 maker 实现情况如下:
| 编程语言 | 描述 | 开发状态 | 者 | | 编程语言 | 描述 | 开发状态 | 贡献者 |
| :--- | :--- | :--- | :--- | | :--- | :--- | :--- | :--- |
| [golang](maker/golang) | golang xdb 生成程序实现 | 已完成 | [Lion](https://github.com/lionsoul2014) | | [golang](maker/golang) | golang xdb 生成程序实现 | 已完成 | [Lion](https://github.com/lionsoul2014) |
| [java](maker/java) | java xdb 生成程序实现 | 开发中... | [Lion](https://github.com/lionsoul2014) | | [java](maker/java) | java xdb 生成程序实现 | 开发中... | [Lion](https://github.com/lionsoul2014) |
......
# ip2region golang binding makefile
all: build
.PHONY: all
build:
go build -o xdb_searcher
test:
go test -v ./...
clean:
find ./ -name xdb_searcher | xargs rm -f
\ No newline at end of file
# ip2region golang 查询客户端实现 # ip2region xdb golang 查询客户端实现
# 使用方式 # 使用方式
### package 获取
```bash
go get github.com/lionsoul2014/ip2region/binding/golang
```
### 完全基于文件的查询
```golang
import (
"fmt"
"github.com/lionsoul2014/ip2region/binding/golang/xdb"
"time"
)
func main() {
var dbPath = "ip2region.xdb file path"
searcher, err := xdb.NewWithFileOnly(dbPath)
if err != nil {
fmt.Printf("failed to create searcher: %s\n", err.Error())
return
}
defer searcher.Close()
// do the search
var ip = "1.2.3.4"
var tStart = time.Now()
region, err := searcher.SearchByStr(ip)
if err != nil {
fmt.Printf("failed to SearchIP(%s): %s\n", ip, err)
return
}
fmt.Printf("{region: %s, took: %s}\n", region, time.Since(tStart))
// 备注:并发使用,每个 goroutine 需要创建一个独立的 searcher 对象。
}
```
### 缓存 `VetorIndex` 索引
可以预先加载 vecotorIndex 缓存,然后做成全局变量,每次创建 searcher 的时候使用全局的 vectorIndex,可以减少一次固定的 IO 操作从而加速查询,减少系统 io 压力。
```golang
// 1、从 dbPath 加载 VectorIndex 缓存,把下述 vIndex 变量全局到内存里面。
vIndex, err := LoadVectorIndexFromFile(dbPath)
if err != nil {
fmt.Printf("failed to load vector index from `%s`: %s\n", dbPath, err)
return
}
// 2、用全局的 vIndex 创建带 VectorIndex 缓存的查询对象。
searcher, err := xdb.NewWithVectorIndex(dbPath, vIndex)
if err != nil {
fmt.Printf("failed to create searcher with vector index: %s\n", err)
return
}
// 备注:并发使用,全部 goroutine 共享全局的只读 vIndex 缓存,每个 goroutine 创建一个独立的 searcher 对象
```
### 缓存整个 xdb 数据
可以预先加载整个 ip2region.xdb 到内存,完全基于内存查询,类似于之前的 memory search 查询。
```golang
// 1、从 dbPath 加载整个 xdb 到内存
cBuff, err := LoadContentFromFile(dbPath)
if err != nil {
fmt.Printf("failed to load content from `%s`: %s\n", dbPath, err)
return
}
// 2、用全局的 cBuff 创建完全基于内存的查询对象。
searcher, err := xdb.NewWithBuffer(cBuff)
if err != nil {
fmt.Printf("failed to create searcher with content: %s\n", err)
return
}
// 备注:并发使用,用整个 xdb 缓存创建的 searcher 对象可以安全用于并发。
```
# 编译测试程序
通过如下方式编译得到 xdb_searcher 可执行程序:
```bash
# 切换到 golang binding 根目录
make
```
# 查询测试 # 查询测试
通过 `xdb_searcher search` 命令来测试 ip2region.xdb 的查询:
```
➜ golang git:(v2.0_xdb) ./xdb_searcher search
./xdb_searcher search [command options]
options:
--db string ip2region binary xdb file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:使用默认的 data/ip2region.xdb 进行查询测试
```bash
➜ golang git:(v2.0_xdb) ✗ ./xdb_searcher search --db=../../data/ip2region.xdb
ip2region xdb searcher test program, type `quit` to exit
ip2region>> 1.2.3.4
{region:美国|0|华盛顿|0|谷歌, took:101.57µs}
```
输入 ip 地址进行查询即可,输入 quit 退出测试程序。可以设置 `cache-policy` 为 file/vectorIndex/content 来测试不同的查询缓存机制。
# bench 测试 # bench 测试
通过 `xdb_searcher bench` 命令来进行自动 bench 测试,一方面确保程序和xdb文件都没有错误,一方面通过大量的查询得到平均查询性能:
```bash
➜ golang git:(v2.0_xdb) ./xdb_searcher bench
./xdb_searcher bench [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:通过 data/ip2region.xdb 和 data/ip.merge.txt 进行 bench 测试:
```bash
➜ golang git:(v2.0_xdb) ✗ ./xdb_searcher bench --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt
Bench finished, {total: 3417955, took: 28.211578339s, cost: 8253 ns/op}
```
可以设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 不同缓存机制的实现。
*请注意 bench 使用的 src 文件需要是生成对应的 xdb 文件的相同的源文件*。bench 程序会逐行读取 `src` 指定的源IP文件,然后每个 IP 段选取 5 个固定位置的 IP 进行测试,以确保查询的 region 信息和原始的 region 信息是相同。测试途中没有调试信息的输出,有错误会打印错误信息并且终止运行,所以看到 `Bench finished` 就表示 bench 成功了,cost 是表示每次查询操作的平均时间(ns)。
module github.com/lionsoul2014/ip2region/binding/golang
go 1.17
require github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package main
import (
"bufio"
"fmt"
"github.com/lionsoul2014/ip2region/binding/golang/xdb"
"github.com/mitchellh/go-homedir"
"log"
"os"
"strings"
"time"
)
func printHelp() {
fmt.Printf("ip2region xdb searcher\n")
fmt.Printf("%s [command] [command options]\n", os.Args[0])
fmt.Printf("Command: \n")
fmt.Printf(" search search input test\n")
fmt.Printf(" bench search bench test\n")
}
func testSearch() {
var err error
var dbFile, cachePolicy = "", "vectorIndex"
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var sIdx = strings.Index(r, "=")
if sIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:sIdx] {
case "db":
dbFile = r[sIdx+1:]
case "cache-policy":
cachePolicy = r[sIdx+1:]
default:
fmt.Printf("undefined option `%s`\n", r)
return
}
}
if dbFile == "" {
fmt.Printf("%s search [command options]\n", os.Args[0])
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --cache-policy string cache policy: file/vectorIndex/content\n")
return
}
dbPath, err := homedir.Expand(dbFile)
if err != nil {
fmt.Printf("invalid xdb file path `%s`: %s", dbFile, err)
return
}
// create the searcher with the cache policy setting
searcher, err := createSearcher(dbPath, cachePolicy)
if err != nil {
fmt.Printf("failed to create searcher: %s\n", err.Error())
return
}
defer func() {
searcher.Close()
fmt.Printf("searcher test program exited, thanks for trying\n")
}()
fmt.Printf(`ip2region xdb searcher test program, cachePolicy: %s
type 'quit' to exit
`, cachePolicy)
reader := bufio.NewReader(os.Stdin)
for {
fmt.Print("ip2region>> ")
str, err := reader.ReadString('\n')
if err != nil {
log.Fatalf("failed to read string: %s", err)
}
line := strings.TrimSpace(strings.TrimSuffix(str, "\n"))
if len(line) == 0 {
continue
}
if line == "quit" {
break
}
tStart := time.Now()
region, err := searcher.SearchByStr(line)
if err != nil {
fmt.Printf("\x1b[0;31m{err: %s, ioCount: %d}\x1b[0m\n", err.Error(), searcher.GetIOCount())
} else {
fmt.Printf("\x1b[0;32m{region: %s, ioCount: %d, took: %s}\x1b[0m\n", region, searcher.GetIOCount(), time.Since(tStart))
}
}
}
func testBench() {
var err error
var dbFile, srcFile, cachePolicy = "", "", "vectorIndex"
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var sIdx = strings.Index(r, "=")
if sIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:sIdx] {
case "db":
dbFile = r[sIdx+1:]
case "src":
srcFile = r[sIdx+1:]
case "cache-policy":
cachePolicy = r[sIdx+1:]
default:
fmt.Printf("undefined option `%s`\n", r)
return
}
}
if dbFile == "" || srcFile == "" {
fmt.Printf("%s bench [command options]\n", os.Args[0])
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --src string source ip text file path\n")
fmt.Printf(" --cache-policy string cache policy: file/vectorIndex/content\n")
return
}
dbPath, err := homedir.Expand(dbFile)
if err != nil {
fmt.Printf("invalid xdb file path `%s`: %s", dbFile, err)
return
}
searcher, err := createSearcher(dbPath, cachePolicy)
if err != nil {
fmt.Printf("failed to create searcher: %s\n", err.Error())
return
}
defer func() {
searcher.Close()
}()
handle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
if err != nil {
fmt.Printf("failed to open source text file: %s\n", err)
return
}
var count, tStart, costs = int64(0), time.Now(), int64(0)
var scanner = bufio.NewScanner(handle)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
var l = strings.TrimSpace(strings.TrimSuffix(scanner.Text(), "\n"))
var ps = strings.SplitN(l, "|", 3)
if len(ps) != 3 {
fmt.Printf("invalid ip segment line `%s`\n", l)
return
}
sip, err := xdb.CheckIP(ps[0])
if err != nil {
fmt.Printf("check start ip `%s`: %s\n", ps[0], err)
return
}
eip, err := xdb.CheckIP(ps[1])
if err != nil {
fmt.Printf("check end ip `%s`: %s\n", ps[1], err)
return
}
if sip > eip {
fmt.Printf("start ip(%s) should not be greater than end ip(%s)\n", ps[0], ps[1])
return
}
mip := xdb.MidIP(sip, eip)
for _, ip := range []uint32{sip, xdb.MidIP(sip, mip), mip, xdb.MidIP(mip, eip), eip} {
sTime := time.Now()
region, err := searcher.Search(ip)
if err != nil {
fmt.Printf("failed to search ip '%s': %s\n", xdb.Long2IP(ip), err)
return
}
costs += time.Since(sTime).Nanoseconds()
// check the region info
if region != ps[2] {
fmt.Printf("failed Search(%s) with (%s != %s)\n", xdb.Long2IP(ip), region, ps[2])
return
}
count++
}
}
cost := time.Since(tStart)
fmt.Printf("Bench finished, {cachePolicy: %s, total: %d, took: %s, cost: %d μs/op}\n",
cachePolicy, count, cost, costs/count/1000)
}
func createSearcher(dbPath string, cachePolicy string) (*xdb.Searcher, error) {
switch cachePolicy {
case "nil", "file":
return xdb.NewWithFileOnly(dbPath)
case "vectorIndex":
vIndex, err := xdb.LoadVectorIndexFromFile(dbPath)
if err != nil {
return nil, fmt.Errorf("failed to load vector index from `%s`: %w", dbPath, err)
}
return xdb.NewWithVectorIndex(dbPath, vIndex)
case "content":
cBuff, err := xdb.LoadContentFromFile(dbPath)
if err != nil {
return nil, fmt.Errorf("failed to load content from '%s': %w", dbPath, err)
}
return xdb.NewWithBuffer(cBuff)
default:
return nil, fmt.Errorf("invalid cache policy `%s`, options: file/vectorIndex/content", cachePolicy)
}
}
func main() {
if len(os.Args) < 2 {
printHelp()
return
}
// set the log flag
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
switch strings.ToLower(os.Args[1]) {
case "search":
testSearch()
case "bench":
testBench()
default:
printHelp()
}
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"encoding/binary"
"fmt"
)
type Header struct {
// data []byte
Version uint16
IndexPolicy IndexPolicy
CreatedAt uint32
StartIndexPtr uint32
EndIndexPtr uint32
}
func NewHeader(input []byte) (*Header, error) {
if len(input) < 16 {
return nil, fmt.Errorf("invalid input buffer")
}
return &Header{
Version: binary.LittleEndian.Uint16(input),
IndexPolicy: IndexPolicy(binary.LittleEndian.Uint16(input[2:])),
CreatedAt: binary.LittleEndian.Uint32(input[4:]),
StartIndexPtr: binary.LittleEndian.Uint32(input[8:]),
EndIndexPtr: binary.LittleEndian.Uint32(input[12:]),
}, nil
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// ip2region database v2.0 searcher.
// @Note this is a Not thread safe implementation.
//
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"encoding/binary"
"fmt"
"os"
)
const (
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
SegmentIndexBlockSize = 14
)
type IndexPolicy int
const (
VectorIndexPolicy IndexPolicy = 1
BTreeIndexPolicy IndexPolicy = 2
)
func (i IndexPolicy) String() string {
switch i {
case VectorIndexPolicy:
return "VectorIndex"
case BTreeIndexPolicy:
return "BtreeIndex"
default:
return "unknown"
}
}
type Searcher struct {
handle *os.File
// header info
header *Header
ioCount int
// use it only when this feature enabled.
// Preload the vector index will reduce the number of IO operations
// thus speedup the search process
vectorIndex []byte
// content buffer.
// running with the whole xdb file cached
contentBuff []byte
}
func baseNew(dbFile string, vIndex []byte, cBuff []byte) (*Searcher, error) {
var err error
// content buff first
if cBuff != nil {
return &Searcher{
vectorIndex: nil,
contentBuff: cBuff,
}, nil
}
// open the xdb binary file
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, err
}
return &Searcher{
handle: handle,
vectorIndex: vIndex,
}, nil
}
func NewWithFileOnly(dbFile string) (*Searcher, error) {
return baseNew(dbFile, nil, nil)
}
func NewWithVectorIndex(dbFile string, vIndex []byte) (*Searcher, error) {
return baseNew(dbFile, vIndex, nil)
}
func NewWithBuffer(cBuff []byte) (*Searcher, error) {
return baseNew("", nil, cBuff)
}
func (s *Searcher) Close() {
if s.handle != nil {
err := s.handle.Close()
if err != nil {
return
}
}
}
// GetIOCount return the global io count for the last search
func (s *Searcher) GetIOCount() int {
return s.ioCount
}
// SearchByStr find the region for the specified ip string
func (s *Searcher) SearchByStr(str string) (string, error) {
ip, err := CheckIP(str)
if err != nil {
return "", err
}
return s.Search(ip)
}
// Search find the region for the specified long ip
func (s *Searcher) Search(ip uint32) (string, error) {
// reset the global ioCount
s.ioCount = 0
// locate the segment index block based on the vector index
var il0 = (ip >> 24) & 0xFF
var il1 = (ip >> 16) & 0xFF
var idx = il0*VectorIndexCols*VectorIndexSize + il1*VectorIndexSize
var sPtr, ePtr = uint32(0), uint32(0)
if s.vectorIndex != nil {
sPtr = binary.LittleEndian.Uint32(s.vectorIndex[idx:])
ePtr = binary.LittleEndian.Uint32(s.vectorIndex[idx+4:])
} else if s.contentBuff != nil {
sPtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx:])
ePtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx+4:])
} else {
// read the vector index block
var buff = make([]byte, 8)
err := s.read(int64(HeaderInfoLength+idx), buff)
if err != nil {
return "", fmt.Errorf("read vector index block at %d: %w", HeaderInfoLength+idx, err)
}
sPtr = binary.LittleEndian.Uint32(buff)
ePtr = binary.LittleEndian.Uint32(buff[4:])
}
// fmt.Printf("sPtr=%d, ePtr=%d", sPtr, ePtr)
// binary search the segment index to get the region
var dataLen, dataPtr = 0, uint32(0)
var buff = make([]byte, SegmentIndexBlockSize)
var l, h = 0, int((ePtr - sPtr) / SegmentIndexBlockSize)
for l <= h {
m := (l + h) >> 1
p := sPtr + uint32(m*SegmentIndexBlockSize)
err := s.read(int64(p), buff)
if err != nil {
return "", fmt.Errorf("read segment index at %d: %w", p, err)
}
// decode the data step by step to reduce the unnecessary operations
sip := binary.LittleEndian.Uint32(buff)
if ip < sip {
h = m - 1
} else {
eip := binary.LittleEndian.Uint32(buff[4:])
if ip > eip {
l = m + 1
} else {
dataLen = int(binary.LittleEndian.Uint16(buff[8:]))
dataPtr = binary.LittleEndian.Uint32(buff[10:])
break
}
}
}
//fmt.Printf("dataLen: %d, dataPtr: %d", dataLen, dataPtr)
if dataLen == 0 {
return "", nil
}
// load and return the region data
var regionBuff = make([]byte, dataLen)
err := s.read(int64(dataPtr), regionBuff)
if err != nil {
return "", fmt.Errorf("read region at %d: %w", dataPtr, err)
}
return string(regionBuff), nil
}
// do the data read operation based on the setting.
// content buffer first or will read from the file.
// this operation will invoke the Seek for file based read.
func (s *Searcher) read(offset int64, buff []byte) error {
if s.contentBuff != nil {
cLen := copy(buff, s.contentBuff[offset:])
if cLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
} else {
_, err := s.handle.Seek(offset, 0)
if err != nil {
return fmt.Errorf("seek to %d: %w", offset, err)
}
s.ioCount++
rLen, err := s.handle.Read(buff)
if err != nil {
return fmt.Errorf("handle read: %w", err)
}
if rLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
}
return nil
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"encoding/binary"
"fmt"
"os"
"strconv"
"strings"
)
func CheckIP(ip string) (uint32, error) {
var ps = strings.Split(ip, ".")
if len(ps) != 4 {
return 0, fmt.Errorf("invalid ip address `%s`", ip)
}
var buff = make([]byte, 4)
for i, s := range ps {
d, err := strconv.Atoi(s)
if err != nil {
return 0, fmt.Errorf("the %dth part `%s` is not an integer", i, s)
}
if d < 0 || d > 255 {
return 0, fmt.Errorf("the %dth part `%s` should be an integer bettween 0 and 255", i, s)
}
buff[i] = byte(d)
}
// convert the ip to integer
return binary.BigEndian.Uint32(buff), nil
}
func Long2IP(ip uint32) string {
var buff = make([]string, 4)
buff[0] = fmt.Sprintf("%d", (ip>>24)&0xFF)
buff[1] = fmt.Sprintf("%d", (ip>>16)&0xFF)
buff[2] = fmt.Sprintf("%d", (ip>>8)&0xFF)
buff[3] = fmt.Sprintf("%d", (ip>>0)&0xFF)
return strings.Join(buff, ".")
}
func MidIP(sip uint32, eip uint32) uint32 {
return uint32((uint64(sip) + uint64(eip)) >> 1)
}
// LoadHeader load the header info from the specified handle
func LoadHeader(handle *os.File) (*Header, error) {
_, err := handle.Seek(0, 0)
if err != nil {
return nil, fmt.Errorf("seek to the header: %w", err)
}
var buff = make([]byte, HeaderInfoLength)
rLen, err := handle.Read(buff)
if err != nil {
return nil, err
}
if rLen != len(buff) {
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
return NewHeader(buff)
}
// LoadHeaderFromFile load header info from the specified db file path
func LoadHeaderFromFile(dbFile string) (*Header, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
}
return LoadHeader(handle)
}
// LoadHeaderFromBuff wrap the header info from the content buffer
func LoadHeaderFromBuff(cBuff []byte) (*Header, error) {
return NewHeader(cBuff[0:256])
}
// LoadVectorIndex util function to load the vector index from the specified file handle
func LoadVectorIndex(handle *os.File) ([]byte, error) {
// load all the vector index block
_, err := handle.Seek(HeaderInfoLength, 0)
if err != nil {
return nil, fmt.Errorf("seek to vector index: %w", err)
}
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
rLen, err := handle.Read(buff)
if err != nil {
return nil, err
}
if rLen != len(buff) {
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
return buff, nil
}
// LoadVectorIndexFromFile load vector index from a specified file path
func LoadVectorIndexFromFile(dbFile string) ([]byte, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
}
return LoadVectorIndex(handle)
}
// LoadContent load the whole xdb content from the specified file handle
func LoadContent(handle *os.File) ([]byte, error) {
// get file size
fi, err := handle.Stat()
if err != nil {
return nil, fmt.Errorf("stat: %w", err)
}
size := fi.Size()
// seek to the head of the file
_, err = handle.Seek(0, 0)
if err != nil {
return nil, fmt.Errorf("seek to get xdb file length: %w", err)
}
var buff = make([]byte, size)
rLen, err := handle.Read(buff)
if err != nil {
return nil, err
}
if rLen != len(buff) {
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
return buff, nil
}
// LoadContentFromFile load the whole xdb content from the specified db file path
func LoadContentFromFile(dbFile string) ([]byte, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
}
return LoadContent(handle)
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"fmt"
"testing"
"time"
)
func TestLoadVectorIndex(t *testing.T) {
vIndex, err := LoadVectorIndexFromFile("../../../data/ip2region.xdb")
if err != nil {
fmt.Printf("failed to load vector index: %s\n", err)
return
}
fmt.Printf("vIndex length: %d\n", len(vIndex))
}
func TestLoadContent(t *testing.T) {
buff, err := LoadContentFromFile("../../../data/ip2region.xdb")
if err != nil {
fmt.Printf("failed to load xdb content: %s\n", err)
return
}
fmt.Printf("buff length: %d\n", len(buff))
}
func TestLoadHeader(t *testing.T) {
header, err := LoadHeaderFromFile("../../../data/ip2region.xdb")
if err != nil {
fmt.Printf("failed to load xdb header info: %s\n", err)
return
}
fmt.Printf("Version : %d\n", header.Version)
fmt.Printf("IndexPolicy : %s\n", header.IndexPolicy.String())
fmt.Printf("CreatedAt : %d(%s)\n", header.CreatedAt, time.Unix(int64(header.CreatedAt), 0).Format(time.RFC3339))
fmt.Printf("StartIndexPtr : %d\n", header.StartIndexPtr)
fmt.Printf("EndIndexPtr : %d\n", header.EndIndexPtr)
}
# ip2region java 查询客户端实现 # ip2region xdb java 查询客户端实现
# 使用方式 # 使用方式
### maven 仓库:
```xml
<dependency>
<groupId>org.lionsoul</groupId>
<artifactId>ip2region</artifactId>
<version>2.6.1</version>
</dependency>
```
### 完全基于文件的查询
```java
import org.lionsoul.ip2region.xdb.Searcher;
import java.io.*;
import java.util.concurrent.TimeUnit;
public class SearcherTest {
public static void main(String[] args) {
// 1、创建 searcher 对象
String dbPath = "ip2region.xdb file path";
Searcher searcher = null;
try {
searcher = Searcher.newWithFileOnly(dbPath);
} catch (IOException e) {
System.out.printf("failed to create searcher with `%s`: %s\n", dbPath, e);
return;
}
// 2、查询
try {
String ip = "1.2.3.4";
long sTime = System.nanoTime();
String region = searcher.searchByStr(ip);
long cost = TimeUnit.NANOSECONDS.toMicros((long) (System.nanoTime() - sTime));
System.out.printf("{region: %s, ioCount: %d, took: %d μs}\n", region, searcher.getIOCount(), cost);
} catch (Exception e) {
System.out.printf("failed to search(%s): %s\n", ip, e);
return;
}
// 3、备注:并发使用,每个线程需要创建一个独立的 searcher 对象单独使用。
}
}
```
### 缓存 `VectorIndex` 索引
我们可以提前从 xdb 文件中加载出来 VectorIndex 数据,然后全局缓存,每次创建 Searcher 对象的时候使用全局的 VectorIndex 缓存可以减少一次固定的 IO 操作,从而加速查询,减少 IO 压力。
```java
import org.lionsoul.ip2region.xdb.Searcher;
import java.io.*;
import java.util.concurrent.TimeUnit;
public class SearcherTest {
public static void main(String[] args) {
String dbPath = "ip2region.xdb file path";
// 1、从 dbPath 中预先加载 VectorIndex 缓存,并且把这个得到的数据作为全局变量,后续反复使用。
byte[] vIndex;
try {
vIndex = Searcher.loadVectorIndexFromFile(dbPath);
} catch (Exception e) {
System.out.printf("failed to load vector index from `%s`: %s\n", dbPath, e);
return;
}
// 2、使用全局的 vIndex 创建带 VectorIndex 缓存的查询对象。
Searcher searcher;
try {
searcher = Searcher.newWithVectorIndex(dbPath, vIndex);
} catch (Exception e) {
System.out.printf("failed to create vectorIndex cached searcher with `%s`: %s\n", dbPath, e);
return;
}
// 3、查询
try {
String ip = "1.2.3.4";
long sTime = System.nanoTime();
String region = searcher.searchByStr(ip);
long cost = TimeUnit.NANOSECONDS.toMicros((long) (System.nanoTime() - sTime));
System.out.printf("{region: %s, ioCount: %d, took: %d μs}\n", region, searcher.getIOCount(), cost);
} catch (Exception e) {
System.out.printf("failed to search(%s): %s\n", ip, e);
return;
}
// 备注:每个线程需要单独创建一个独立的 Searcher 对象,但是都共享全局的制度 vIndex 缓存。
}
}
```
### 缓存整个 xdb 数据
我们也可以预先加载整个 ip2region.xdb 的数据到内存,然后基于这个数据创建查询对象来实现完全基于文件的查询,类似之前的 memory search。
```java
import org.lionsoul.ip2region.xdb.Searcher;
import java.io.*;
import java.util.concurrent.TimeUnit;
public class SearcherTest {
public static void main(String[] args) {
String dbPath = "ip2region.xdb file path";
// 1、从 dbPath 加载整个 xdb 到内存。
byte[] cBuff;
try {
cBuff = Searcher.loadContentFromFile(dbPath);
} catch (Exception e) {
System.out.printf("failed to load content from `%s`: %s\n", dbPath, e);
return;
}
// 2、使用上述的 cBuff 创建一个完全基于内存的查询对象。
Searcher searcher;
try {
searcher = Searcher.newWithBuffer(cBuff);
} catch (Exception e) {
System.out.printf("failed to create content cached searcher: %s\n", e);
return;
}
// 3、查询
try {
String ip = "1.2.3.4";
long sTime = System.nanoTime();
String region = searcher.searchByStr(ip);
long cost = TimeUnit.NANOSECONDS.toMicros((long) (System.nanoTime() - sTime));
System.out.printf("{region: %s, ioCount: %d, took: %d μs}\n", region, searcher.getIOCount(), cost);
} catch (Exception e) {
System.out.printf("failed to search(%s): %s\n", ip, e);
return;
}
// 备注:并发使用,用整个 xdb 数据缓存创建的查询对象可以安全的用于并发,也就是你可以把这个 searcher 对象做成全局对象去垮线程访问。
}
}
```
# 编译测试程序
通过 maven 来编译测试程序。
```bash
// cd 到 java binding 的根目录
cd binding/java/
mvn compile package
```
然后会在当前目录的 target 目录下得到一个 ip2region-{version}.jar 的打包文件。
# 查询测试 # 查询测试
可以通过 `java -jar ip2region-{version}.jar search` 命令来测试查询:
```bash
➜ java git:(v2.0_xdb) ✗ java -jar target/ip2region-2.6.0.jar search
java -jar ip2region-{version}.jar search [command options]
options:
--db string ip2region binary xdb file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:使用默认的 data/ip2region.xdb 文件进行查询测试:
```bash
➜ java git:(v2.0_xdb) ✗ java -jar target/ip2region-2.6.0.jar search --db=../../data/ip2region.xdb
ip2region xdb searcher test program, cachePolicy: vectorIndex
type 'quit' to exit
ip2region>> 1.2.3.4
{region: 美国|0|华盛顿|0|谷歌, ioCount: 7, took: 82 μs}
ip2region>>
```
输入 ip 即可进行查询测试,也可以设置 `cache-policy` 来分别测试 file/vectorIndex/content 三种不同缓存方式的查询效果。
# bench 测试 # bench 测试
可以通过 `java -jar ip2region-{version}.jar bench` 命令来进行 bench 测试,一方面确保 `xdb` 文件没有错误,一方面可以评估查询性能:
```bash
➜ java git:(v2.0_xdb) ✗ java -jar target/ip2region-2.6.0.jar bench
java -jar ip2region-{version}.jar bench [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:通过默认的 data/ip2region.xdb 和 data/ip.merge.txt 文件进行 bench 测试:
```bash
➜ java git:(v2.0_xdb) ✗ java -jar target/ip2region-2.6.0.jar bench --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt
Bench finished, {cachePolicy: vectorIndex, total: 3417955, took: 8s, cost: 2 μs/op}
```
可以通过设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 三种不同缓存方式的性能。
@Note: 注意 bench 使用的 src 文件要是生成对应 xdb 文件相同的源文件。
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.lionsoul</groupId>
<artifactId>ip2region</artifactId>
<version>2.6.1</version>
<packaging>jar</packaging>
<name>ip2region</name>
<url>https://github.com/lionsoul2014/ip2region</url>
<description>Open source offline internet address db manager framework and locator</description>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<url>git@github.com:lionsoul2014/ip2region.git</url>
<connection>scm:git:git@github.com:lionsoul2014/ip2region.git</connection>
<developerConnection>scm:git:git@github.com:lionsoul2014/ip2region.git</developerConnection>
</scm>
<developers>
<developer>
<id>lionsoul</id>
<name>chenxin</name>
<email>chenxin619315@gmail.com</email>
</developer>
</developers>
<issueManagement>
<url>https://github.com/lionsoul2014/ip2region/issues</url>
<system>Github issues</system>
</issueManagement>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.1.2</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9</version>
<executions>
<execution>
<id>attach-javadocs</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<additionalparam>${javadoc.opts}</additionalparam>
</configuration>
</execution>
</executions>
<configuration>
<failOnError>false</failOnError>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>1.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.lionsoul.ip2region.SearchTest</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>java8-doclint-disabled</id>
<activation>
<jdk>[1.8,)</jdk>
</activation>
<properties>
<javadoc.opts>-Xdoclint:none</javadoc.opts>
</properties>
</profile>
<profile>
<id>release</id>
<build>
<plugins>
<!-- Source -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Javadoc -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<additionalparam>${javadoc.opts}</additionalparam>
</configuration>
</execution>
</executions>
</plugin>
<!-- GPG -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<distributionManagement>
<snapshotRepository>
<id>oss</id>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
</snapshotRepository>
<repository>
<id>oss</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>
</profile>
</profiles>
</project>
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/23
package org.lionsoul.ip2region;
import org.lionsoul.ip2region.xdb.Searcher;
import java.io.*;
import java.util.concurrent.TimeUnit;
public class SearchTest {
public static void printHelp(String[] args) {
System.out.print("ip2region xdb searcher\n");
System.out.print("java -jar ip2region-{version}.jar [command] [command options]\n");
System.out.print("Command: \n");
System.out.print(" search search input test\n");
System.out.print(" bench search bench test\n");
}
public static Searcher createSearcher(String dbPath, String cachePolicy) throws IOException {
if ("file".equals(cachePolicy)) {
return Searcher.newWithFileOnly(dbPath);
} else if ("vectorIndex".equals(cachePolicy)) {
byte[] vIndex = Searcher.loadVectorIndexFromFile(dbPath);
return Searcher.newWithVectorIndex(dbPath, vIndex);
} else if ("content".equals(cachePolicy)) {
byte[] cBuff = Searcher.loadContentFromFile(dbPath);
return Searcher.newWithBuffer(cBuff);
} else {
throw new IOException("invalid cache policy `" + cachePolicy + "`, options: file/vectorIndex/content");
}
}
public static void searchTest(String[] args) throws IOException {
String dbPath = "", cachePolicy = "vectorIndex";
for (final String r : args) {
if (r.length() < 5) {
continue;
}
if (r.indexOf("--") != 0) {
continue;
}
int sIdx = r.indexOf('=');
if (sIdx < 0) {
System.out.printf("missing = for args pair `%s`\n", r);
return;
}
String key = r.substring(2, sIdx);
String val = r.substring(sIdx + 1);
// System.out.printf("key=%s, val=%s\n", key, val);
if ("db".equals(key)) {
dbPath = val;
} else if ("cache-policy".equals(key)) {
cachePolicy = val;
} else {
System.out.printf("undefined option `%s`\n", r);
return;
}
}
if (dbPath.length() < 1) {
System.out.print("java -jar ip2region-{version}.jar search [command options]\n");
System.out.print("options:\n");
System.out.print(" --db string ip2region binary xdb file path\n");
System.out.print(" --cache-policy string cache policy: file/vectorIndex/content\n");
return;
}
Searcher searcher = createSearcher(dbPath, cachePolicy);
final BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
System.out.printf("ip2region xdb searcher test program, cachePolicy: %s\ntype 'quit' to exit\n", cachePolicy);
while ( true ) {
System.out.print("ip2region>> ");
String line = reader.readLine().trim();
if ( line.length() < 2 ) {
continue;
}
if ( line.equalsIgnoreCase("quit") ) {
break;
}
try {
double sTime = System.nanoTime();
String region = searcher.searchByStr(line);
long cost = TimeUnit.NANOSECONDS.toMicros((long) (System.nanoTime() - sTime));
System.out.printf("{region: %s, ioCount: %d, took: %d μs}\n", region, searcher.getIOCount(), cost);
} catch (Exception e) {
System.out.printf("{err: %s, ioCount: %d}\n", e, searcher.getIOCount());
}
}
reader.close();
searcher.close();
System.out.println("searcher test program exited, thanks for trying");
}
public static void benchTest(String[] args) throws IOException {
String dbPath = "", srcPath = "", cachePolicy = "vectorIndex";
for (final String r : args) {
if (r.length() < 5) {
continue;
}
if (r.indexOf("--") != 0) {
continue;
}
int sIdx = r.indexOf('=');
if (sIdx < 0) {
System.out.printf("missing = for args pair `%s`\n", r);
return;
}
String key = r.substring(2, sIdx);
String val = r.substring(sIdx + 1);
if ("db".equals(key)) {
dbPath = val;
} else if ("src".equals(key)) {
srcPath = val;
} else if ("cache-policy".equals(key)) {
cachePolicy = val;
} else {
System.out.printf("undefined option `%s`\n", r);
return;
}
}
if (dbPath.length() < 1 || srcPath.length() < 1) {
System.out.print("java -jar ip2region-{version}.jar bench [command options]\n");
System.out.print("options:\n");
System.out.print(" --db string ip2region binary xdb file path\n");
System.out.print(" --src string source ip text file path\n");
System.out.print(" --cache-policy string cache policy: file/vectorIndex/content\n");
return;
}
Searcher searcher = createSearcher(dbPath, cachePolicy);
long count = 0, costs = 0, tStart = System.nanoTime();
String line;
final BufferedReader reader = new BufferedReader(new FileReader(srcPath));
while ((line = reader.readLine()) != null) {
String l = line.trim();
String[] ps = l.split("\\|", 3);
if (ps.length != 3) {
System.out.printf("invalid ip segment `%s`\n", l);
return;
}
long sip;
try {
sip = Searcher.checkIpAddr(ps[0]);
} catch (Exception e) {
System.out.printf("check start ip `%s`: %s\n", ps[0], e);
return;
}
long eip;
try {
eip = Searcher.checkIpAddr(ps[1]);
} catch (Exception e) {
System.out.printf("check end ip `%s`: %s\n", ps[1], e);
return;
}
if (sip > eip) {
System.out.printf("start ip(%s) should not be greater than end ip(%s)\n", ps[0], ps[1]);
return;
}
long mip = (sip + eip) >> 1;
for (final long ip : new long[]{sip, (sip + mip) >> 1, mip, (mip + eip) >> 1, eip}) {
long sTime = System.nanoTime();
String region = searcher.search(ip);
costs += System.nanoTime() - sTime;
// check the region info
if (!ps[2].equals(region)) {
System.out.printf("failed search(%s) with (%s != %s)\n", Searcher.long2ip(ip), region, ps[2]);
return;
}
count++;
}
}
reader.close();
searcher.close();
long took = System.nanoTime() - tStart;
System.out.printf("Bench finished, {cachePolicy: %s, total: %d, took: %ds, cost: %d μs/op}\n",
cachePolicy, count, TimeUnit.NANOSECONDS.toSeconds(took),
count == 0 ? 0 : TimeUnit.NANOSECONDS.toMicros(costs/count));
}
public static void main(String[] args) {
if (args.length < 1) {
printHelp(args);
return;
}
if ("search".equals(args[0])) {
try {
searchTest(args);
} catch (IOException e) {
System.out.printf("failed running search test: %s\n", e);
}
} else if ("bench".equals(args[0])) {
try {
benchTest(args);
} catch (IOException e) {
System.out.printf("failed running bench test: %s\n", e);
}
} else {
printHelp(args);
}
}
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/23
package org.lionsoul.ip2region;
import org.lionsoul.ip2region.xdb.Searcher;
import java.io.IOException;
public class UtilTest {
public static void testIP2Long() {
String ip = "1.2.3.4";
long ipAddr = 0;
try {
ipAddr = Searcher.checkIpAddr(ip);
} catch (Exception e) {
System.out.printf("failed to check ip: %s\n", e);
return;
}
if (ipAddr != 16909060) {
System.out.print("failed ip2long\n");
return;
}
String ip2 = Searcher.long2ip(ipAddr);
if (!ip.equals(ip2)) {
System.out.print("failed long2ip\n");
return;
}
System.out.printf("passed: ip=%s, ipAddr=%d, ip2=%s\n", ip, ipAddr, ip2);
}
public static void main(String[] args) {
System.out.print("testing IP2Long ... \n");
testIP2Long();
}
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/23
package org.lionsoul.ip2region.xdb;
import java.awt.image.SampleModel;
public class Header {
public final int version;
public final int indexPolicy;
public final int createdAt;
public final int startIndexPtr;
public final int endIndexPtr;
public Header(byte[] buff) {
assert buff.length >= 16;
version = Searcher.getInt2(buff, 0);
indexPolicy = Searcher.getInt2(buff, 2);
createdAt = Searcher.getInt(buff, 4);
startIndexPtr = Searcher.getInt(buff, 8);
endIndexPtr = Searcher.getInt(buff, 12);
}
@Override public String toString() {
return "{" +
"Version: " + version + ',' +
"IndexPolicy" + indexPolicy + ',' +
"CreatedAt" + createdAt + ',' +
"StartIndexPtr" + startIndexPtr + ',' +
"EndIndexPtr" + endIndexPtr +
'}';
}
}
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package org.lionsoul.ip2region.xdb;
// xdb searcher (Not thread safe implementation)
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/23
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Arrays;
public class Searcher {
// constant defined copied from the xdb maker
public static final int HeaderInfoLength = 256;
public static final int VectorIndexRows = 256;
public static final int VectorIndexCols = 256;
public static final int VectorIndexSize = 8;
public static final int SegmentIndexSize = 14;
// random access file handle for file based search
private final RandomAccessFile handle;
private int ioCount = 0;
// vector index.
// use the byte[] instead of VectorIndex entry array to keep
// the minimal memory allocation.
private final byte[] vectorIndex;
// xdb content buffer, used for in-memory search
private final byte[] contentBuff;
// --- static method to create searchers
public static Searcher newWithFileOnly(String dbPath) throws IOException {
return new Searcher(dbPath, null, null);
}
public static Searcher newWithVectorIndex(String dbPath, byte[] vectorIndex) throws IOException {
return new Searcher(dbPath, vectorIndex, null);
}
public static Searcher newWithBuffer(byte[] cBuff) throws IOException {
return new Searcher(null, null, cBuff);
}
// --- End of creator
public Searcher(String dbFile, byte[] vectorIndex, byte[] cBuff) throws IOException {
if (cBuff != null) {
this.handle = null;
this.vectorIndex = null;
this.contentBuff = cBuff;
} else {
this.handle = new RandomAccessFile(dbFile, "r");
this.vectorIndex = vectorIndex;
this.contentBuff = null;
}
}
public void close() throws IOException {
if (this.handle != null) {
this.handle.close();
}
}
public int getIOCount() {
return ioCount;
}
public String searchByStr(String ip) throws Exception {
long ipAddr = checkIpAddr(ip);
return search(ipAddr);
}
public String search(long ip) throws IOException {
// reset the global counter
this.ioCount = 0;
// locate the segment index block based on the vector index
int sPtr = 0, ePtr = 0;
int il0 = (int) ((ip >> 24) & 0xFF);
int il1 = (int) ((ip >> 16) & 0xFF);
int idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize;
// System.out.printf("il0: %d, il1: %d, idx: %d\n", il0, il1, idx);
if (vectorIndex != null) {
sPtr = getInt(vectorIndex, idx);
ePtr = getInt(vectorIndex, idx + 4);
} else if (contentBuff != null) {
sPtr = getInt(contentBuff, HeaderInfoLength + idx);
ePtr = getInt(contentBuff, HeaderInfoLength + idx + 4);
} else {
final byte[] buff = new byte[8];
read(HeaderInfoLength + idx, buff);
sPtr = getInt(buff, 0);
ePtr = getInt(buff, 4);
}
// System.out.printf("sPtr: %d, ePtr: %d\n", sPtr, ePtr);
// binary search the segment index block to get the region info
final byte[] buff = new byte[SegmentIndexSize];
int dataLen = -1, dataPtr = -1;
int l = 0, h = (ePtr - sPtr) / SegmentIndexSize;
while (l <= h) {
int m = (l + h) >> 1;
int p = sPtr + m * SegmentIndexSize;
// read the segment index
read(p, buff);
long sip = getIntLong(buff, 0);
if (ip < sip) {
h = m - 1;
} else {
long eip = getIntLong(buff, 4);
if (ip > eip) {
l = m + 1;
} else {
dataLen = getInt2(buff, 8);
dataPtr = getInt(buff, 10);
break;
}
}
}
// empty match interception
// System.out.printf("dataLen: %d, dataPtr: %d\n", dataLen, dataPtr);
if (dataPtr < 0) {
return null;
}
// load and return the region data
final byte[] regionBuff = new byte[dataLen];
read(dataPtr, regionBuff);
return new String(regionBuff);
}
protected void read(int offset, byte[] buffer) throws IOException {
// check the in-memory buffer first
if (contentBuff != null) {
// @TODO: reduce data copying, directly decode the data ?
System.arraycopy(contentBuff, offset, buffer, 0, buffer.length);
return;
}
// read from the file handle
assert handle != null;
handle.seek(offset);
this.ioCount++;
int rLen = handle.read(buffer);
if (rLen != buffer.length) {
throw new IOException("incomplete read: read bytes should be " + buffer.length);
}
}
// --- static cache util function
public static Header loadHeader(RandomAccessFile handle) throws IOException {
handle.seek(0);
final byte[] buff = new byte[HeaderInfoLength];
handle.read(buff);
return new Header(buff);
}
public static Header loadHeaderFromFile(String dbPath) throws IOException {
RandomAccessFile handle = new RandomAccessFile(dbPath, "r");
return loadHeader(handle);
}
public static byte[] loadVectorIndex(RandomAccessFile handle) throws IOException {
handle.seek(HeaderInfoLength);
int len = VectorIndexRows * VectorIndexCols * SegmentIndexSize;
final byte[] buff = new byte[len];
int rLen = handle.read(buff);
if (rLen != len) {
throw new IOException("incomplete read: read bytes should be " + len);
}
return buff;
}
public static byte[] loadVectorIndexFromFile(String dbPath) throws IOException {
RandomAccessFile handle = new RandomAccessFile(dbPath, "r");
return loadVectorIndex(handle);
}
public static byte[] loadContent(RandomAccessFile handle) throws IOException {
handle.seek(0);
final byte[] buff = new byte[(int) handle.length()];
int rLen = handle.read(buff);
if (rLen != buff.length) {
throw new IOException("incomplete read: read bytes should be " + buff.length);
}
return buff;
}
public static byte[] loadContentFromFile(String dbPath) throws IOException {
RandomAccessFile handle = new RandomAccessFile(dbPath, "r");
return loadContent(handle);
}
// --- End cache load util function
// --- static util method
/* get an int from a byte array start from the specified offset */
public static long getIntLong(byte[] b, int offset) {
return (
((b[offset++] & 0x000000FFL)) |
((b[offset++] << 8) & 0x0000FF00L) |
((b[offset++] << 16) & 0x00FF0000L) |
((b[offset ] << 24) & 0xFF000000L)
);
}
public static int getInt(byte[] b, int offset) {
return (
((b[offset++] & 0x000000FF)) |
((b[offset++] << 8) & 0x0000FF00) |
((b[offset++] << 16) & 0x00FF0000) |
((b[offset ] << 24) & 0xFF000000)
);
}
public static int getInt2(byte[] b, int offset) {
return (
(b[offset++] & 0x000000FF) |
(b[offset ] & 0x0000FF00)
);
}
/* long int to ip string */
public static String long2ip( long ip )
{
return String.valueOf((ip >> 24) & 0xFF) + '.' +
((ip >> 16) & 0xFF) + '.' + ((ip >> 8) & 0xFF) + '.' + ((ip) & 0xFF);
}
public static final byte[] shiftIndex = {24, 16, 8, 0};
/* check the specified ip address */
public static long checkIpAddr(String ip) throws Exception {
String[] ps = ip.split("\\.");
if (ps.length != 4) {
throw new Exception("invalid ip address `" + ip + "`");
}
long ipAddr = 0;
for (int i = 0; i < ps.length; i++) {
int val = Integer.parseInt(ps[i]);
if (val > 255) {
throw new Exception("ip part `"+ps[i]+"` should be less then 256");
}
ipAddr |= ((long) val << shiftIndex[i]);
}
return ipAddr & 0xFFFFFFFFL;
}
}
\ No newline at end of file
# ip2region php 查询客户端实现 # ip2region xdb php 查询客户端实现
# 使用方式 # 使用方式
### 完全基于文件的查询
```php
$dbFile = "ip2region.xdb file path";
try {
$searcher = XdbSearcher::newWithFileOnly($dbFile);
} catch (Exception $e) {
printf("failed to create searcher with '%s': %s\n", $dbFile, $e);
return;
}
$ip = '1.2.3.4';
$sTime = XdbSearcher::now();
$region = $searcher->search($ip);
if ($region === null) {
// something is wrong
printf("failed search(%s)\n", $ip);
return;
}
printf("{region: %s, took: %.5f ms}\n", $region, XdbSearcher::now() - $sTime);
// 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象。
```
### 缓存 `VectorIndex` 索引
如果你的 php 母环境支持,可以预先加载 vectorIndex 缓存,然后做成全局变量,每次创建 Searcher 的时候使用全局的 vectorIndex,可以减少一次固定的 IO 操作从而加速查询,减少 io 压力。
```php
// 1、从 dbPath 加载 VectorIndex 缓存,把下述的 vIndex 变量缓存到内存里面。
$vIndex = XdbSearcher::loadVectorFromFile($dbPath);
if ($vIndex === null) {
printf("failed to load vector index from '%s'\n", $dbPath);
return;
}
// 2、使用全局的 vIndex 创建带 VectorIndex 缓存的查询对象。
try {
$searcher = XdbSearcher::newWithVectorIndex($dbFile, $vIndex);
} catch (Exception $e) {
printf("failed to create vectorIndex cached searcher with '%s': %s\n", $dbFile, $e);
return;
}
// 3、查询
$sTime = XdbSearcher::now();
$region = $searcher->search('1.2.3.4');
if ($region === null) {
printf("failed search(1.2.3.4)\n");
return;
}
printf("{region: %s, took: %.5f ms}\n", $region, XdbSearcher::now() - $sTime);
// 备注:并发使用,每个线程或者协程需要创建一个独立的 searcher 对象,但是都共享统一的只读 vectorIndex。
```
### 缓存整个 xdb 数据
如果你的 PHP 目环境支持,可以预先加载整个 `xdb` 的数据到内存,这样可以实现完全基于内存的查询,类似之前的 memory search 查询。
```php
// 1、从 dbPath 加载整个 xdb 到内存。
$cBuff = XdbSearcher::loadContentFromFile($dbPath);
if ($cBuff === null) {
printf("failed to load content buffer from '%s'\n", $dbPath);
return;
}
// 2、使用全局的 cBuff 创建带完全基于内存的查询对象。
try {
$searcher = XdbSearcher::newWithBuffer($cBuff);
} catch (Exception $e) {
printf("failed to create buffer cached searcher: %s\n", $dbFile, $e);
return;
}
// 3、查询
$sTime = XdbSearcher::now();
$region = $searcher->search('1.2.3.4');
if ($region === null) {
printf("failed search(1.2.3.4)\n");
return;
}
printf("{region: %s, took: %.5f ms}\n", $region, XdbSearcher::now() - $sTime);
// 备注:并发使用,用整个 xdb 缓存创建的 searcher 对象可以安全用于并发。
```
# 查询测试 # 查询测试
通过 `search_test.php` 脚本来进行查询测试:
```bash
➜ php git:(v2.0_xdb) ✗ php ./search_test.php
php ./search_test.php [command options]
options:
--db string ip2region binary xdb file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:使用默认的 data/ip2region.xdb 进行查询测试:
```bash
➜ php git:(v2.0_xdb) ✗ php ./search_test.php --db=../../data/ip2region.xdb --cache-policy=vectorIndex
ip2region xdb searcher test program, cachePolicy: vectorIndex
type 'quit' to exit
ip2region>> 1.2.3.4
{region: 美国|0|华盛顿|0|谷歌, ioCount: 7, took: 0.04492 ms}
ip2region>>
```
输入 ip 即可进行查询测试。也可以设置 `cache-policy` 来分别测试 file/vectorIndex/content 三种不同缓存类型的查询。
# bench 测试 # bench 测试
通过 `bench_test.php` 脚本来进行自动 bench 测试,一方面确保 `xdb` 文件没有错误,一方面通过大量的查询测试平均查询性能:
```bash
➜ php git:(v2.0_xdb) ✗ php ./bench_test.php
php ./bench_test.php [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:通过默认的 data/ip2region.xdb 和 data/ip.merge.txt 来进行 bench 测试:
```bash
➜ php git:(v2.0_xdb) ✗ php ./bench_test.php --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt --cache-policy=vectorIndex
Bench finished, {cachePolicy: vectorIndex, total: 3417955, took: 15s, cost: 0.005 ms/op}
```
可以通过设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 三种不同的缓存类型的性能。
@Note:请注意 bench 使用的 src 文件需要是生成对应的 xdb 文件的相同的源文件。
<?php
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
//
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/21
class XdbSearcher
{
const HeaderInfoLength = 256;
const VectorIndexRows = 256;
const VectorIndexCols = 256;
const VectorIndexSize = 8;
const SegmentIndexSize = 14;
// xdb file handle
private $handle = null;
// header info
private $header = null;
private $ioCount = 0;
// vector index in binary string.
// string decode will be faster than the map based Array.
private $vectorIndex = null;
// xdb content buffer
private $contentBuff = null;
// ---
// static function to create searcher
/**
* @throws Exception
*/
public static function newWithFileOnly($dbFile) {
return new XdbSearcher($dbFile, null, null);
}
/**
* @throws Exception
*/
public static function newWithVectorIndex($dbFile, $vIndex) {
return new XdbSearcher($dbFile, $vIndex);
}
/**
* @throws Exception
*/
public static function newWithBuffer($cBuff) {
return new XdbSearcher(null, null, $cBuff);
}
// --- End of static creator
/**
* initialize the xdb searcher
* @throws Exception
*/
function __construct($dbFile, $vectorIndex=null, $cBuff=null) {
// check the content buffer first
if ($cBuff != null) {
$this->vectorIndex = null;
$this->contentBuff = $cBuff;
} else {
// open the xdb binary file
$this->handle = fopen($dbFile, "r");
if ($this->handle === false) {
throw new Exception("failed to open xdb file '%s'", $dbFile);
}
$this->vectorIndex = $vectorIndex;
}
}
function close() {
if ($this->handle != null) {
fclose($this->handle);
}
}
function getIOCount() {
return $this->ioCount;
}
/**
* find the region info for the specified ip address
* @throws Exception
*/
function search($ip) {
// check and convert the sting ip to a 4-bytes long
if (is_string($ip)) {
$t = self::ip2long($ip);
if ($t === null) {
throw new Exception("invalid ip address `$ip`");
}
$ip = $t;
}
// reset the global counter
$this->ioCount = 0;
// locate the segment index block based on the vector index
$il0 = ($ip >> 24) & 0xFF;
$il1 = ($ip >> 16) & 0xFF;
$idx = $il0 * self::VectorIndexCols * self::VectorIndexSize + $il1 * self::VectorIndexSize;
if ($this->vectorIndex != null) {
$sPtr = self::getLong($this->vectorIndex, $idx);
$ePtr = self::getLong($this->vectorIndex, $idx + 4);
} else if ($this->contentBuff != null) {
$sPtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx);
$ePtr = self::getLong($this->contentBuff, self::HeaderInfoLength + $idx + 4);
} else {
// read the vector index block
$buff = $this->read(self::HeaderInfoLength + $idx, 8);
if ($buff === null) {
throw new Exception("failed to read vector index at ${idx}");
}
$sPtr = self::getLong($buff, 0);
$ePtr = self::getLong($buff, 4);
}
// printf("sPtr: %d, ePtr: %d\n", $sPtr, $ePtr);
// binary search the segment index to get the region info
$dataLen = 0;
$dataPtr = null;
$l = 0;
$h = ($ePtr - $sPtr) / self::SegmentIndexSize;
while ($l <= $h) {
$m = ($l + $h) >> 1;
$p = $sPtr + $m * self::SegmentIndexSize;
// read the segment index
$buff = $this->read($p, self::SegmentIndexSize);
if ($buff == null) {
throw new Exception("failed to read segment index at ${p}");
}
$sip = self::getLong($buff, 0);
if ($ip < $sip) {
$h = $m - 1;
} else {
$eip = self::getLong($buff, 4);
if ($ip > $eip) {
$l = $m + 1;
} else {
$dataLen = self::getShort($buff, 8);
$dataPtr = self::getLong($buff, 10);
break;
}
}
}
// match nothing interception.
// @TODO: could this even be a case ?
// printf("dataLen: %d, dataPtr: %d\n", $dataLen, $dataPtr);
if ($dataPtr == null) {
return null;
}
// load and return the region data
$buff = $this->read($dataPtr, $dataLen);
if ($buff == null) {
return null;
}
return $buff;
}
// read specified bytes from the specified index
private function read($offset, $len) {
// check the in-memory buffer first
if ($this->contentBuff != null) {
return substr($this->contentBuff, $offset, $len);
}
// read from the file
$r = fseek($this->handle, $offset);
if ($r == -1) {
return null;
}
$this->ioCount++;
$buff = fread($this->handle, $len);
if ($buff === false) {
return null;
}
if (strlen($buff) != $len) {
return null;
}
return $buff;
}
// --- static util functions ----
// convert a string ip to long
public static function ip2long($ip)
{
$ip = ip2long($ip);
if ($ip === false) {
return null;
}
// convert signed int to unsigned int if on 32 bit operating system
if ($ip < 0 && PHP_INT_SIZE == 4) {
$ip = sprintf("%u", $ip);
}
return $ip;
}
// read a 4bytes long from a byte buffer
public static function getLong($b, $idx)
{
$val = (ord($b[$idx])) | (ord($b[$idx+1]) << 8)
| (ord($b[$idx+2]) << 16) | (ord($b[$idx+3]) << 24);
// convert signed int to unsigned int if on 32 bit operating system
if ($val < 0 && PHP_INT_SIZE == 4) {
$val = sprintf("%u", $val);
}
return $val;
}
// read a 2bytes short from a byte buffer
public static function getShort($b, $idx)
{
return ((ord($b[$idx])) | (ord($b[$idx+1]) << 8));
}
// load header info from a specified file handle
public static function loadHeader($handle) {
if (fseek($handle, 0) == -1) {
return null;
}
$buff = fread($handle, self::HeaderInfoLength);
if ($buff === false) {
return null;
}
// read bytes length checking
if (strlen($buff) != self::HeaderInfoLength) {
return null;
}
// return the decoded header info
return array(
'version' => self::getShort($buff, 0),
'indexPolicy' => self::getShort($buff, 2),
'createdAt' => self::getLong($buff, 4),
'startIndexPtr' => self::getLong($buff, 8),
'endIndexPtr' => self::getLong($buff, 12)
);
}
// load header info from the specified xdb file path
public static function loadHeaderFromFile($dbFile) {
$handle = fopen($dbFile, 'r');
if ($handle === false) {
return null;
}
return self::loadHeader($handle);
}
// load vector index from a file handle
public static function loadVectorIndex($handle) {
if (fseek($handle, self::HeaderInfoLength) == -1) {
return null;
}
$rLen = self::VectorIndexRows * self::VectorIndexCols * self::SegmentIndexSize;
$buff = fread($handle, $rLen);
if ($buff === false) {
return null;
}
if (strlen($buff) != $rLen) {
return null;
}
return $buff;
}
// load vector index from a specified xdb file path
public static function loadVectorIndexFromFile($dbFile) {
$handle = fopen($dbFile, 'r');
if ($handle === false) {
return null;
}
return self::loadVectorIndex($handle);
}
// load the xdb content from a file handle
public static function loadContent($handle) {
if (fseek($handle, 0, SEEK_END) == -1) {
return null;
}
$size = ftell($handle);
if ($size === false) {
return null;
}
// seek to the head for reading
if (fseek($handle, 0) == -1) {
return null;
}
$buff = fread($handle, $size);
if ($buff === false) {
return null;
}
// read length checking
if (strlen($buff) != $size) {
return null;
}
return $buff;
}
// load the xdb content from a file path
public static function loadContentFromFile($dbFile) {
$str = file_get_contents($dbFile, false);
if ($str === false) {
return null;
} else {
return $str;
}
}
public static function now() {
return (microtime(true) * 1000);
}
}
<?php
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
//
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/22
require dirname(__FILE__) . '/XdbSearcher.class.php';
function printHelp($argv) {
printf("php %s [command options]\n", $argv[0]);
printf("options: \n");
printf(" --db string ip2region binary xdb file path\n");
printf(" --src string source ip text file path\n");
printf(" --cache-policy string cache policy: file/vectorIndex/content\n");
}
if($argc < 2) {
printHelp($argv);
return;
}
$dbFile = "";
$srcFile = "";
$cachePolicy = 'vectorIndex';
array_shift($argv);
foreach ($argv as $r) {
if (strlen($r) < 5) {
continue;
}
if (strpos($r, '--') != 0) {
continue;
}
$sIdx = strpos($r, "=");
if ($sIdx < 0) {
printf("missing = for args pair %s\n", $r);
return;
}
$key = substr($r, 2, $sIdx - 2);
$val = substr($r, $sIdx + 1);
if ($key == 'db') {
$dbFile = $val;
} else if ($key == 'src') {
$srcFile = $val;
} else if ($key == 'cache-policy') {
$cachePolicy = $val;
} else {
printf("undefined option `%s`\n", $r);
return;
}
}
if (strlen($dbFile) < 1 || strlen($srcFile) < 1) {
printHelp($argv);
return;
}
// printf("debug: dbFile: %s, cachePolicy: %s\n", $dbFile, $cachePolicy);
// create the xdb searcher by the cache-policy
switch ( $cachePolicy ) {
case 'file':
try {
$searcher = XdbSearcher::newWithFileOnly($dbFile);
} catch (Exception $e) {
printf("failed to create searcher with '%s': %s\n", $dbFile, $e);
return;
}
break;
case 'vectorIndex':
$vIndex = XdbSearcher::loadVectorIndexFromFile($dbFile);
if ($vIndex == null) {
printf("failed to load vector index from '%s'\n", $dbFile);
return;
}
try {
$searcher = XdbSearcher::newWithVectorIndex($dbFile, $vIndex);
} catch (Exception $e) {
printf("failed to create vector index cached searcher with '%s': %s\n", $dbFile, $e);
return;
}
break;
case 'content':
$cBuff = XdbSearcher::loadContentFromFile($dbFile);
if ($cBuff == null) {
printf("failed to load xdb content from '%s'\n", $dbFile);
return;
}
try {
$searcher = XdbSearcher::newWithBuffer($cBuff);
} catch (Exception $e) {
printf("failed to create content cached searcher: %s", $e);
return;
}
break;
default:
printf("undefined cache-policy `%s`\n", $cachePolicy);
return;
}
// do the bench test
$handle = fopen($srcFile, "r");
if ($handle === false) {
printf("failed to open source text file `%s`\n", $srcFile);
return null;
}
$count = 0;
$costs = 0;
$sTime = XdbSearcher::now();
while (!feof($handle)) {
$line = trim(fgets($handle, 1024));
if (strlen($line) < 1) {
continue;
}
$ps = explode('|', $line, 3);
if (count($ps) != 3) {
printf("invalid ip segment line `${line}`\n");
return;
}
$sip = XdbSearcher::ip2long($ps[0]);
if ($sip === null) {
printf("invalid start ip `%s`\n", $ps[0]);
return;
}
$eip = XdbSearcher::ip2long($ps[1]);
if ($eip === null) {
printf("invalid end ip `%s`\n", $ps[1]);
return;
}
if ($sip > $eip) {
printf("start ip(%s) should not be greater than end ip(%s)\n", $ps[0], $ps[1]);
return;
}
$mip = ($sip + $eip) >> 1;
foreach ([$sip, ($sip + $mip) >> 1, $mip, ($mip + $eip) >> 1, $eip] as $ip) {
try {
$cTime = XdbSearcher::now();
$region = $searcher->search($ip);
$costs += XdbSearcher::now() - $cTime;
} catch (Exception $e) {
printf("failed to search ip `%s`\n", long2ip($ip));
return;
}
if ($region == null) {
printf("failed to search ip `%s`\n", long2ip($ip));
return;
}
// check the region info
if ($region != $ps[2]) {
printf("failed search(%s) with (%s != %s)\n", long2ip($ip), $region, $ps[2]);
return;
}
$count++;
}
}
// close the searcher at last
fclose($handle);
$searcher->close();
printf("Bench finished, {cachePolicy: %s, total: %d, took: %ds, cost: %.3f ms/op}\n",
$cachePolicy, $count, (XdbSearcher::now() - $sTime)/1000, $count == 0 ? 0 : $costs/$count);
<?php
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
//
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/21
require dirname(__FILE__) . '/XdbSearcher.class.php';
function printHelp($argv) {
printf("php %s [command options]\n", $argv[0]);
printf("options: \n");
printf(" --db string ip2region binary xdb file path\n");
printf(" --cache-policy string cache policy: file/vectorIndex/content\n");
}
if($argc < 2) {
printHelp($argv);
return;
}
$dbFile = "";
$cachePolicy = 'vectorIndex';
array_shift($argv);
foreach ($argv as $r) {
if (strlen($r) < 5) {
continue;
}
if (strpos($r, '--') != 0) {
continue;
}
$sIdx = strpos($r, "=");
if ($sIdx < 0) {
printf("missing = for args pair %s\n", $r);
return;
}
$key = substr($r, 2, $sIdx - 2);
$val = substr($r, $sIdx + 1);
if ($key == 'db') {
$dbFile = $val;
} else if ($key == 'cache-policy') {
$cachePolicy = $val;
} else {
printf("undefined option `%s`\n", $r);
return;
}
}
if (strlen($dbFile) < 1) {
printHelp($argv);
return;
}
// printf("debug: dbFile: %s, cachePolicy: %s\n", $dbFile, $cachePolicy);
// create the xdb searcher by the cache-policy
switch ( $cachePolicy ) {
case 'file':
try {
$searcher = XdbSearcher::newWithFileOnly($dbFile);
} catch (Exception $e) {
printf("failed to create searcher with '%s': %s\n", $dbFile, $e);
return;
}
break;
case 'vectorIndex':
$vIndex = XdbSearcher::loadVectorIndexFromFile($dbFile);
if ($vIndex == null) {
printf("failed to load vector index from '%s'\n", $dbFile);
return;
}
try {
$searcher = XdbSearcher::newWithVectorIndex($dbFile, $vIndex);
} catch (Exception $e) {
printf("failed to create vector index cached searcher with '%s': %s\n", $dbFile, $e);
return;
}
break;
case 'content':
$cBuff = XdbSearcher::loadContentFromFile($dbFile);
if ($cBuff == null) {
printf("failed to load xdb content from '%s'\n", $dbFile);
return;
}
try {
$searcher = XdbSearcher::newWithBuffer($cBuff);
} catch (Exception $e) {
printf("failed to create content cached searcher: %s", $e);
return;
}
break;
default:
printf("undefined cache-policy `%s`\n", $cachePolicy);
return;
}
printf("ip2region xdb searcher test program, cachePolicy: ${cachePolicy}\ntype 'quit' to exit\n");
while ( true ) {
echo "ip2region>> ";
$line = trim(fgets(STDIN));
if (strlen($line) < 2) {
continue;
}
if ($line == 'quit') {
break;
}
if (XdbSearcher::ip2long($line) === null) {
echo "Error: invalid ip address\n";
continue;
}
$sTime = XdbSearcher::now();
try {
$region = $searcher->search($line);
} catch (Exception $e) {
printf("search call failed: %s\n", $e);
continue;
}
printf("{region: %s, ioCount: %d, took: %.5f ms}\n",
$region, $searcher->getIOCount(), XdbSearcher::now() - $sTime);
}
// close the searcher at last
$searcher->close();
printf("searcher test program exited, thanks for trying\n");
\ No newline at end of file
<?php
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
//
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/22
require dirname(__FILE__) . '/XdbSearcher.class.php';
function testLoadHeader() {
$header = XdbSearcher::loadHeaderFromFile('../../data/ip2region.xdb');
if ($header == null) {
printf("failed to load header from file\n");
return;
}
printf("header loaded: ");
print_r($header);
}
function testLoadVectorIndex() {
$vIndex = XdbSearcher::loadVectorIndexFromFile('../../data/ip2region.xdb');
if ($vIndex == null) {
printf("failed to load vector index from file\n");
return;
}
printf("vector index loaded: length=%d\n", strlen($vIndex));
}
function testLoadContent() {
$cBuff = XdbSearcher::loadContentFromFile('../../data/ip2region.xdb');
if ($cBuff == null) {
printf("failed to load content from file\n");
return;
}
printf("content loaded, length=%d\n", strlen($cBuff));
}
printf("testing loadHeader ... \n");
$now = XdbSearcher::now();
testLoadHeader();
printf("done, cost: %0.5f ms\n\n", XdbSearcher::now() - $now);
printf("testing loadVectorIndex ... \n");
$now = XdbSearcher::now();
testLoadVectorIndex();
printf("done, cost: %0.5f ms\n\n", XdbSearcher::now() - $now);
printf("testing loadContent ... \n");
$now = XdbSearcher::now();
testLoadContent();
printf("done, cost: %0.5f ms\n\n", XdbSearcher::now() - $now);
无法预览此类型文件
# ip2region golang maker makefile
all: build
.PHONY: all
build:
go build -o xdb_maker
test:
go test -v ./...
clean:
find ./ -name xdb_maker | xargs rm -f
# ip2region xdb golang 生成实现 # ip2region xdb golang 生成实现
# 程序编译 # 程序编译
通过如下方式编译得到 dbmaker 可执行程序:
通过如下方式编译得到 xdb_maker 可执行程序:
``` ```
# 切换到golang maker 根目录 # 切换到golang maker 根目录
cd ./ make
go build
``` ```
编译成功后会在当前目录生成一个 dbmaker 的可执行文件 编译成功后会在当前目录生成一个 xdb_maker 的可执行文件
# 数据生成 # 数据生成
通过 `dbmaker gen` 命令生成 ip2region.xdb 二进制文件: 通过 `xdb_maker gen` 命令生成 ip2region.xdb 二进制文件:
``` ```
➜ golang git:(v2.0_xdb) ✗ ./dbmaker gen ➜ golang git:(v2.0_xdb) ✗ ./xdb_maker gen
dbmaker gen [command options] ./xdb_maker gen [command options]
options: options:
--src string source ip text file path --src string source ip text file path
--dst string destination binary db file path --dst string destination binary xdb file path
``` ```
例如,使用默认的 data/ip.merge.txt 作为源数据,生成一个 ip2region.xdb 到当前目录: 例如,使用默认的 data/ip.merge.txt 作为源数据,生成一个 ip2region.xdb 到当前目录:
```bash ```bash
./dbmaker gen --src=../../data/ip.merge.txt --dst=./ip2region.xdb ➜ golang git:(v2.0_xdb) ✗ ./xdb_maker gen --src=../../data/ip.merge.txt --dst=./ip2region.xdb
# 会看到一堆输出,最终会看到类似如下输出表示运行结束 # 会看到一堆输出,最终会看到类似如下输出表示运行结束
... ...
2022/06/16 16:38:48 maker.go:317: write done, with 13804 data blocks and (683591, 720221) index blocks 2022/06/16 16:38:48 maker.go:317: write done, with 13804 data blocks and (683591, 720221) index blocks
...@@ -31,17 +31,18 @@ options: ...@@ -31,17 +31,18 @@ options:
# 数据查询 # 数据查询
通过 `dbmaker search` 命令来测试查询输入的 ip: 通过 `xdb_maker search` 命令来测试查询输入的 ip:
``` ```
dbmaker test [command options] ➜ golang git:(v2.0_xdb) ✗ ./xdb_maker search
./xdb_maker search [command options]
options: options:
--db string ip2region binary db file path --db string ip2region binary xdb file path
``` ```
例如,使用自带的 xdb 文件来运行查询测试: 例如,使用自带的 xdb 文件来运行查询测试:
```bash ```bash
./dbmaker search --db=../../data/ip2region.xdb ➜ golang git:(v2.0_xdb) ✗ ./xdb_maker search --db=../../data/ip2region.xdb
ip2region 2.0 test program, commands: ip2region xdb search test program, commands:
loadIndex : load the vector index for search speedup. loadIndex : load the vector index for search speedup.
clearIndex: clear the vector index. clearIndex: clear the vector index.
quit : exit the test program quit : exit the test program
...@@ -58,10 +59,10 @@ ip2region>> ...@@ -58,10 +59,10 @@ ip2region>>
# bench 测试 # bench 测试
如果你自主生成了 xdb 文件,请确保运行如下的 `dbmaker bench` 命令来确保制定的 xdb 文件的正确性: 如果你自主生成了 xdb 文件,请确保运行如下的 `xdb_maker bench` 命令来确保生成的的 xdb 文件的正确性:
``` ```
➜ golang git:(v2.0_xdb) ✗ ./dbmaker bench ➜ golang git:(v2.0_xdb) ✗ ./xdb_maker bench
dbmaker bench [command options] ./xdb_maker bench [command options]
options: options:
--db string ip2region binary xdb file path --db string ip2region binary xdb file path
--src string source ip text file path --src string source ip text file path
...@@ -70,7 +71,7 @@ options: ...@@ -70,7 +71,7 @@ options:
例如:使用 data/ip.merge.txt 源文件来 bench 测试 data/ip2region.xdb 这个 xdb 文件: 例如:使用 data/ip.merge.txt 源文件来 bench 测试 data/ip2region.xdb 这个 xdb 文件:
```bash ```bash
./dbmaker bench --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt ➜ golang git:(v2.0_xdb) ✗ ./xdb_maker bench --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt
# 会看到一堆输出,看到类似如下的数据表示 bench 测试通过了,否则就会报错 # 会看到一堆输出,看到类似如下的数据表示 bench 测试通过了,否则就会报错
... ...
try to bench segment: `224.0.0.0|255.255.255.255|0|0|0|内网IP|内网IP` try to bench segment: `224.0.0.0|255.255.255.255|0|0|0|内网IP|内网IP`
...@@ -81,4 +82,5 @@ try to bench segment: `224.0.0.0|255.255.255.255|0|0|0|内网IP|内网IP` ...@@ -81,4 +82,5 @@ try to bench segment: `224.0.0.0|255.255.255.255|0|0|0|内网IP|内网IP`
|-try to bench ip '255.255.255.255' ... --[Ok] |-try to bench ip '255.255.255.255' ... --[Ok]
Bench finished, {count: 3417955, failed: 0, took: 52.200116397s} Bench finished, {count: 3417955, failed: 0, took: 52.200116397s}
``` ```
*请注意 bench 测试使用的 `src` 文件需要是对应的生成 ip2region.xdb 的源文件相同*
如果运行过程中有错误会立马停止运行,也可以执行 --ignore-error=true 参数来忽略错误,在最后看 failed 的统计结果。 如果运行过程中有错误会立马停止运行,也可以执行 --ignore-error=true 参数来忽略错误,在最后看 failed 的统计结果。
module dbmaker module github.com/lionsoul2014/ip2region/maker/golang
go 1.17 go 1.17
\ No newline at end of file
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
...@@ -7,6 +7,7 @@ package main ...@@ -7,6 +7,7 @@ package main
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"github.com/lionsoul2014/ip2region/maker/golang/xdb"
"log" "log"
"os" "os"
"strings" "strings"
...@@ -14,18 +15,18 @@ import ( ...@@ -14,18 +15,18 @@ import (
) )
func printHelp() { func printHelp() {
fmt.Printf("ip2region dbmaker 2.0\n") fmt.Printf("ip2region xdb maker\n")
fmt.Printf("dbmaker [command] [command options]\n") fmt.Printf("%s [command] [command options]\n", os.Args[0])
fmt.Printf("Command: \n") fmt.Printf("Command: \n")
fmt.Printf(" gen generate the binary db file\n") fmt.Printf(" gen generate the binary db file\n")
fmt.Printf(" search binary db search test\n") fmt.Printf(" search binary xdb search test\n")
fmt.Printf(" bench binary db bench test\n") fmt.Printf(" bench binary xdb bench test\n")
} }
func genDb() { func genDb() {
var err error var err error
var srcFile, dstFile = "", "" var srcFile, dstFile = "", ""
var indexPolicy = VectorIndexPolicy var indexPolicy = xdb.VectorIndexPolicy
for i := 2; i < len(os.Args); i++ { for i := 2; i < len(os.Args); i++ {
r := os.Args[i] r := os.Args[i]
if len(r) < 5 { if len(r) < 5 {
...@@ -36,54 +37,60 @@ func genDb() { ...@@ -36,54 +37,60 @@ func genDb() {
continue continue
} }
var eIdx = strings.Index(r, "=") var sIdx = strings.Index(r, "=")
if eIdx < 0 { if sIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r) fmt.Printf("missing = for args pair '%s'\n", r)
return return
} }
switch r[2:eIdx] { switch r[2:sIdx] {
case "src": case "src":
srcFile = r[eIdx+1:] srcFile = r[sIdx+1:]
case "dst": case "dst":
dstFile = r[eIdx+1:] dstFile = r[sIdx+1:]
case "index": case "index":
indexPolicy, err = IndexPolicyFromString(r[eIdx+1:]) indexPolicy, err = xdb.IndexPolicyFromString(r[sIdx+1:])
if err != nil { if err != nil {
fmt.Printf("parse policy: %s", err.Error()) fmt.Printf("parse policy: %s", err.Error())
return return
} }
default:
fmt.Printf("undefine option `%s`\n", r)
return
} }
} }
if srcFile == "" || dstFile == "" { if srcFile == "" || dstFile == "" {
fmt.Printf("dbmaker gen [command options]\n") fmt.Printf("%s gen [command options]\n", os.Args[0])
fmt.Printf("options:\n") fmt.Printf("options:\n")
fmt.Printf(" --src string source ip text file path\n") fmt.Printf(" --src string source ip text file path\n")
fmt.Printf(" --dst string destination binary db file path\n") fmt.Printf(" --dst string destination binary xdb file path\n")
return return
} }
// make the binary file // make the binary file
tStart := time.Now() tStart := time.Now()
maker, err := NewMaker(indexPolicy, srcFile, dstFile) maker, err := xdb.NewMaker(indexPolicy, srcFile, dstFile)
if err != nil { if err != nil {
log.Fatalf("failed to create maker: %s", err) fmt.Printf("failed to create %s\n", err)
return
} }
err = maker.Init() err = maker.Init()
if err != nil { if err != nil {
log.Fatalf("failed Init: %s", err) fmt.Printf("failed Init: %s\n", err)
return
} }
err = maker.Start() err = maker.Start()
if err != nil { if err != nil {
log.Fatalf("failed Start: %s", err) fmt.Printf("failed Start: %s\n", err)
return
} }
err = maker.End() err = maker.End()
if err != nil { if err != nil {
log.Fatalf("failed End: %s", err) fmt.Printf("failed End: %s\n", err)
} }
log.Printf("Done, elapsed: %s\n", time.Since(tStart)) log.Printf("Done, elapsed: %s\n", time.Since(tStart))
...@@ -111,26 +118,30 @@ func testSearch() { ...@@ -111,26 +118,30 @@ func testSearch() {
switch r[2:eIdx] { switch r[2:eIdx] {
case "db": case "db":
dbFile = r[eIdx+1:] dbFile = r[eIdx+1:]
default:
fmt.Printf("undefined option '%s'\n", r)
return
} }
} }
if dbFile == "" { if dbFile == "" {
fmt.Printf("dbmaker test [command options]\n") fmt.Printf("%s search [command options]\n", os.Args[0])
fmt.Printf("options:\n") fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary db file path\n") fmt.Printf(" --db string ip2region binary xdb file path\n")
return return
} }
searcher, err := NewSearcher(dbFile) searcher, err := xdb.NewSearcher(dbFile)
if err != nil { if err != nil {
log.Fatalf("failed to create searcher: %s", err.Error()) fmt.Printf("failed to create searcher with `%s`: %s\n", dbFile, err.Error())
return
} }
defer func() { defer func() {
searcher.Close() searcher.Close()
fmt.Printf("test program exited, thanks for trying\n") fmt.Printf("test program exited, thanks for trying\n")
}() }()
fmt.Println(`ip2region 2.0 test program, commands: fmt.Println(`ip2region xdb search test program, commands:
loadIndex : load the vector index for search speedup. loadIndex : load the vector index for search speedup.
clearIndex: clear the vector index. clearIndex: clear the vector index.
quit : exit the test program`) quit : exit the test program`)
...@@ -163,7 +174,7 @@ quit : exit the test program`) ...@@ -163,7 +174,7 @@ quit : exit the test program`)
break break
} }
ip, err := CheckIP(line) ip, err := xdb.CheckIP(line)
if err != nil { if err != nil {
fmt.Printf("invalid ip address `%s`\n", line) fmt.Printf("invalid ip address `%s`\n", line)
continue continue
...@@ -193,19 +204,19 @@ func testBench() { ...@@ -193,19 +204,19 @@ func testBench() {
continue continue
} }
var eIdx = strings.Index(r, "=") var sIdx = strings.Index(r, "=")
if eIdx < 0 { if sIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r) fmt.Printf("missing = for args pair '%s'\n", r)
return return
} }
switch r[2:eIdx] { switch r[2:sIdx] {
case "db": case "db":
dbFile = r[eIdx+1:] dbFile = r[sIdx+1:]
case "src": case "src":
srcFile = r[eIdx+1:] srcFile = r[sIdx+1:]
case "ignore-error": case "ignore-error":
v := r[eIdx+1:] v := r[sIdx+1:]
if v == "true" || v == "1" { if v == "true" || v == "1" {
ignoreError = true ignoreError = true
} else if v == "false" || v == "0" { } else if v == "false" || v == "0" {
...@@ -214,11 +225,14 @@ func testBench() { ...@@ -214,11 +225,14 @@ func testBench() {
fmt.Printf("invalid value for ignore-error option, could be false/0 or true/1\n") fmt.Printf("invalid value for ignore-error option, could be false/0 or true/1\n")
return return
} }
default:
fmt.Printf("undefined option '%s'\n", r)
return
} }
} }
if dbFile == "" || srcFile == "" { if dbFile == "" || srcFile == "" {
fmt.Printf("dbmaker bench [command options]\n") fmt.Printf("%s bench [command options]\n", os.Args[0])
fmt.Printf("options:\n") fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n") fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --src string source ip text file path\n") fmt.Printf(" --src string source ip text file path\n")
...@@ -226,7 +240,11 @@ func testBench() { ...@@ -226,7 +240,11 @@ func testBench() {
return return
} }
searcher, err := NewSearcher(dbFile) searcher, err := xdb.NewSearcher(dbFile)
if err != nil {
fmt.Printf("failed to create searcher with `%s`: %s\n", dbFile, err)
return
}
defer func() { defer func() {
searcher.Close() searcher.Close()
}() }()
...@@ -248,13 +266,13 @@ func testBench() { ...@@ -248,13 +266,13 @@ func testBench() {
return return
} }
sip, err := CheckIP(ps[0]) sip, err := xdb.CheckIP(ps[0])
if err != nil { if err != nil {
fmt.Printf("check start ip `%s`: %s\n", ps[0], err) fmt.Printf("check start ip `%s`: %s\n", ps[0], err)
return return
} }
eip, err := CheckIP(ps[1]) eip, err := xdb.CheckIP(ps[1])
if err != nil { if err != nil {
fmt.Printf("check end ip `%s`: %s\n", ps[1], err) fmt.Printf("check end ip `%s`: %s\n", ps[1], err)
return return
...@@ -266,12 +284,12 @@ func testBench() { ...@@ -266,12 +284,12 @@ func testBench() {
} }
fmt.Printf("try to bench segment: `%s`\n", l) fmt.Printf("try to bench segment: `%s`\n", l)
mip := MidIP(sip, eip) mip := xdb.MidIP(sip, eip)
for _, ip := range []uint32{sip, MidIP(sip, mip), mip, MidIP(mip, eip), eip} { for _, ip := range []uint32{sip, xdb.MidIP(sip, mip), mip, xdb.MidIP(mip, eip), eip} {
fmt.Printf("|-try to bench ip '%s' ... ", Long2IP(ip)) fmt.Printf("|-try to bench ip '%s' ... ", xdb.Long2IP(ip))
region, _, err := searcher.Search(ip) region, _, err := searcher.Search(ip)
if err != nil { if err != nil {
fmt.Printf("failed to search ip '%s': %s\n", Long2IP(ip), err) fmt.Printf("failed to search ip '%s': %s\n", xdb.Long2IP(ip), err)
return return
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a Apache2.0-style // Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package main package xdb
import ( import (
"encoding/binary" "encoding/binary"
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
// +------------+-----------+---------------+------------+ // +------------+-----------+---------------+------------+
// start ip end ip data length data ptr // start ip end ip data length data ptr
package main package xdb
import ( import (
"bufio" "bufio"
...@@ -115,16 +115,16 @@ func (m *Maker) initDbHeader() error { ...@@ -115,16 +115,16 @@ func (m *Maker) initDbHeader() error {
binary.LittleEndian.PutUint16(header, uint16(VersionNo)) binary.LittleEndian.PutUint16(header, uint16(VersionNo))
// 2, index policy code // 2, index policy code
binary.LittleEndian.PutUint16(header, uint16(m.indexPolicy)) binary.LittleEndian.PutUint16(header[2:], uint16(m.indexPolicy))
// 3, generate unix timestamp // 3, generate unix timestamp
binary.LittleEndian.PutUint32(header[2:], uint32(time.Now().Unix())) binary.LittleEndian.PutUint32(header[4:], uint32(time.Now().Unix()))
// 4, index block start ptr // 4, index block start ptr
binary.LittleEndian.PutUint32(header[6:], uint32(0)) binary.LittleEndian.PutUint32(header[8:], uint32(0))
// 5, index block end ptr // 5, index block end ptr
binary.LittleEndian.PutUint32(header[10:], uint32(0)) binary.LittleEndian.PutUint32(header[12:], uint32(0))
_, err = m.dstHandle.Write(header) _, err = m.dstHandle.Write(header)
if err != nil { if err != nil {
...@@ -260,7 +260,7 @@ func (m *Maker) Start() error { ...@@ -260,7 +260,7 @@ func (m *Maker) Start() error {
// 2, write the index block and cache the super index block // 2, write the index block and cache the super index block
log.Printf("try to write the segment index block ... ") log.Printf("try to write the segment index block ... ")
var counter = 0 var counter, startIndexPtr, endIndexPtr = 0, int64(-1), int64(-1)
for _, seg := range m.segments { for _, seg := range m.segments {
dataPtr, has := m.regionPool[seg.Region] dataPtr, has := m.regionPool[seg.Region]
if !has { if !has {
...@@ -296,10 +296,18 @@ func (m *Maker) Start() error { ...@@ -296,10 +296,18 @@ func (m *Maker) Start() error {
log.Printf("|-segment index: %d, ptr: %d, segment: %s\n", counter, pos, s.String()) log.Printf("|-segment index: %d, ptr: %d, segment: %s\n", counter, pos, s.String())
m.setVectorIndex(s.StartIP, uint32(pos)) m.setVectorIndex(s.StartIP, uint32(pos))
counter++ counter++
// check and record the start index ptr
if startIndexPtr == -1 {
startIndexPtr = pos
}
endIndexPtr = pos
} }
} }
// synchronized the vector index block // synchronized the vector index block
log.Printf("try to write the vector index block ... ")
_, err = m.dstHandle.Seek(int64(HeaderInfoLength), 0) _, err = m.dstHandle.Seek(int64(HeaderInfoLength), 0)
if err != nil { if err != nil {
return fmt.Errorf("seek vector index first ptr: %w", err) return fmt.Errorf("seek vector index first ptr: %w", err)
...@@ -314,7 +322,23 @@ func (m *Maker) Start() error { ...@@ -314,7 +322,23 @@ func (m *Maker) Start() error {
} }
} }
log.Printf("write done, with %d data blocks and (%d, %d) index blocks", len(m.regionPool), len(m.segments), counter) // synchronized the segment index info
log.Printf("try to write the segment index ptr ... ")
var buff = make([]byte, 8)
binary.LittleEndian.PutUint32(buff, uint32(startIndexPtr))
binary.LittleEndian.PutUint32(buff[4:], uint32(endIndexPtr))
_, err = m.dstHandle.Seek(8, 0)
if err != nil {
return fmt.Errorf("seek segment index ptr: %w", err)
}
_, err = m.dstHandle.Write(buff)
if err != nil {
return fmt.Errorf("write segment index ptr: %w", err)
}
log.Printf("write done, dataBlocks: %d, indexBlocks: (%d, %d), indexPtr: (%d, %d)",
len(m.regionPool), len(m.segments), counter, startIndexPtr, endIndexPtr)
return nil return nil
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
// please use the searcher in binding/golang for production use. // please use the searcher in binding/golang for production use.
// And this is a Not thread safe implementation. // And this is a Not thread safe implementation.
package main package xdb
import ( import (
"encoding/binary" "encoding/binary"
...@@ -43,9 +43,11 @@ func NewSearcher(dbFile string) (*Searcher, error) { ...@@ -43,9 +43,11 @@ func NewSearcher(dbFile string) (*Searcher, error) {
} }
func (s *Searcher) Close() { func (s *Searcher) Close() {
err := s.handle.Close() if s.handle != nil {
if err != nil { err := s.handle.Close()
return if err != nil {
return
}
} }
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a Apache2.0-style // Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package main package xdb
import ( import (
"fmt" "fmt"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a Apache2.0-style // Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package main package xdb
import ( import (
"encoding/binary" "encoding/binary"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a Apache2.0-style // Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package main package xdb
import ( import (
"fmt" "fmt"
......