提交 022847e0 编写于 作者: G gongzhengyang

perf(add more benchmark and change function calls):

上级 cc876c93
......@@ -8,10 +8,11 @@ fn main() {
env::var("XDB_FILEPATH").unwrap_or_else(|_| {
let matches = cmd::get_matches();
let xdb_filepath = matches
.get_one::<String>("xdb")
.expect("you must use --xdb in command or set XDB_FILEPATH environment");
env::set_var("XDB_FILEPATH", xdb_filepath);
xdb_filepath.to_owned()
.get_one::<String>("xdb");
if xdb_filepath.is_some() {
env::set_var("XDB_FILEPATH", xdb_filepath.unwrap());
}
"".to_owned()
});
search::global_searcher();
......
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand;
use search::search_by_ip;
use search::{buffer_value, get_block_by_size, get_start_end_ptr, global_searcher, search_by_ip};
fn ip_search_benchmark(c: &mut Criterion) {
fn ip_search_bench(c: &mut Criterion) {
c.bench_function("ip_search_bench", |b| {
b.iter(|| {
let ip = rand::random::<u32>();
search_by_ip(ip).unwrap();
search_by_ip(rand::random::<u32>()).unwrap();
})
});
}
criterion_group!(benches, ip_search_benchmark);
fn buffer_value_bench(c: &mut Criterion) {
c.bench_function("buffer_value", |b| {
b.iter(|| {
let offset = rand::random::<u16>();
let length = rand::random::<u8>();
buffer_value(offset as usize, length as usize);
});
});
}
fn get_block_by_size_bench(c: &mut Criterion) {
c.bench_function("get_block_by_size", |b| {
b.iter(||{
get_block_by_size(&global_searcher().buffer, rand::random::<u16>() as usize, 4);
})
});
}
fn get_start_end_ptr_bench(c: &mut Criterion) {
c.bench_function("get_start_end_ptr", |b| {
b.iter(|| {
get_start_end_ptr(rand::random::<u32>());
})
});
}
criterion_group!(benches, ip_search_bench, buffer_value_bench, get_block_by_size_bench, get_start_end_ptr_bench);
criterion_main!(benches);
mod ip_value;
use std::env;
use std::error::Error;
use std::fmt;
use std::fmt::Formatter;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use once_cell::sync::OnceCell;
use ip_value::ToUIntIP;
const HEADER_INFO_LENGTH: u32 = 256;
// const VECTOR_INDEX_ROWS: u32 = 256;
const VECTOR_INDEX_COLS: u32 = 256;
const VECTOR_INDEX_SIZE: u32 = 8;
mod ip_value;
const HEADER_INFO_LENGTH: usize = 256;
const VECTOR_INDEX_COLS: usize = 256;
const VECTOR_INDEX_SIZE: usize = 8;
const SEGMENT_INDEX_SIZE: usize = 14;
/// store the xdb file in memory totally
pub struct Searcher {
pub buffer: Vec<u8>,
}
impl Searcher {
/// you can set the XDB_FILEPATH
/// or super dir has data dir with the file ip2region.xdb
/// it will check ../data/ip2region.xdb, ../../data/ip2region.xdb, ../../../data/ip2region.xdb
pub fn new() -> Result<Self, Box<dyn Error>> {
let xdb_filepath = env::var("XDB_FILEPATH")
.unwrap_or_else(|_| {
let prefix = "../".to_owned();
for recurse in 1..4 {
let filepath = prefix.repeat(recurse) + "data/ip2region.xdb";
if Path::new(filepath.as_str()).exists() {
return filepath
}
};
panic!("you must set XDB_FILEPATH or put file in ../data/ip2region.xdb")
});
println!("load xdb searcher file at {xdb_filepath}");
let mut f = File::open(xdb_filepath)?;
let mut buffer = Vec::new();
f.read_to_end(&mut buffer)?;
Ok(Self { buffer })
}
}
/// global init searcher thread safely
pub fn global_searcher() -> &'static Searcher {
static SEARCHER: OnceCell<Searcher> = OnceCell::new();
SEARCHER.get_or_init(|| {
let xdp_filepath = env::var("XDB_FILEPATH").expect("you must set XDB_FILEPATH for search");
println!("init xdb searcher at {xdp_filepath}");
Searcher::new(xdp_filepath.as_str()).unwrap()
Searcher::new().unwrap()
})
}
......@@ -36,91 +60,89 @@ impl fmt::Display for Searcher {
}
}
pub fn get_start_end_ptr(ip: u32) -> (usize, usize) {
let il0= ((ip >> 24) & 0xFF) as usize;
let il1 = ((ip >> 16) & 0xFF) as usize;
let idx = VECTOR_INDEX_SIZE * (il0 * VECTOR_INDEX_COLS + il1);
let start_point = HEADER_INFO_LENGTH + idx;
let start_ptr = get_block_by_size(&global_searcher().buffer, start_point, 4);
let end_ptr = get_block_by_size(&global_searcher().buffer, start_point + 4, 4);
(start_ptr, end_ptr)
}
/// check https://mp.weixin.qq.com/s/ndjzu0BgaeBmDOCw5aqHUg for details
pub fn search_by_ip<T>(ip: T) -> Result<String, Box<dyn Error>>
where
T: ToUIntIP,
{
let changed_value = ip.to_u32_ip()?;
search_by_ip_u32(changed_value)
}
pub fn search_by_ip_u32(ip: u32) -> Result<String, Box<dyn Error>> {
let il0 = (ip >> 24) & 0xFF;
let il1 = (ip >> 16) & 0xFF;
let idx = VECTOR_INDEX_SIZE * (il0 * VECTOR_INDEX_COLS + il1);
let start_point = (HEADER_INFO_LENGTH + idx) as usize;
let buffer = &global_searcher().buffer;
let start_ptr = get_u32(buffer, start_point);
let end_ptr = get_u32(buffer, start_point + 4);
let ip = ip.to_u32_ip()?;
let (start_ptr, end_ptr) = get_start_end_ptr(ip);
let mut left: usize = 0;
let mut right: usize = ((end_ptr - start_ptr) as usize) / SEGMENT_INDEX_SIZE;
let mut right: usize = (end_ptr - start_ptr) / SEGMENT_INDEX_SIZE;
while left <= right {
let mid = (left + right) >> 1;
let offset = (start_ptr as usize) + mid * SEGMENT_INDEX_SIZE;
let offset = &start_ptr + mid * SEGMENT_INDEX_SIZE;
let buffer_ip_value = buffer_value(offset, SEGMENT_INDEX_SIZE);
let start_ip = get_u32(buffer_ip_value, 0);
if ip < start_ip {
let start_ip = get_block_by_size(&buffer_ip_value, 0, 4);
if &ip < &(start_ip as u32) {
right = mid - 1;
} else if ip > get_u32(buffer_ip_value, 4) {
} else if &ip > &(get_block_by_size(&buffer_ip_value, 4, 4) as u32) {
left = mid + 1;
} else {
let length = (buffer_ip_value[8] as usize & 0x000000FF)
| (buffer_ip_value[9] as usize & 0x0000FF00);
let offset = get_u32(buffer_ip_value, 10);
let result = buffer_value(offset as usize, length)
.iter()
.map(|x| x.to_owned())
.collect::<Vec<u8>>();
return Ok(String::from_utf8(result)?);
let data_length = get_block_by_size(&buffer_ip_value, 8, 2);
let data_offset = get_block_by_size(&buffer_ip_value, 10, 4);
let result = String::from_utf8(
buffer_value(data_offset, data_length)
.to_vec());
return Ok(result?);
}
}
Err("not matched".into())
}
pub fn start_end_buffer_value(bytes: &[u8], offset: usize, length: usize) -> &[u8] {
&bytes[offset..offset+length]
}
pub fn buffer_value(offset: usize, length: usize) -> &'static [u8] {
&global_searcher().buffer[offset..offset + length]
}
impl Searcher {
pub fn new(filepath: &str) -> Result<Self, Box<dyn Error>> {
let mut f = File::open(filepath)?;
let mut buffer = Vec::new();
f.read_to_end(&mut buffer)?;
Ok(Self { buffer })
#[inline]
pub fn get_block_by_size<T>(bytes: &[T], offset: usize, length: usize) -> usize
where
T: Clone,
usize: From<T>,
{
let mut result: usize = 0;
for (index, value) in bytes[offset..offset+length].iter().enumerate() {
result |= usize::from(value.clone()) << (index*8);
}
}
fn get_u32(bytes: &[u8], offset: usize) -> u32 {
(bytes[offset] as u32) & 0x000000FF
| ((bytes[offset + 1] as u32) << 8) & 0x0000FF00
| ((bytes[offset + 2] as u32) << 16) & 0x00FF0000
| ((bytes[offset + 3] as u32) << 24) & 0xFF000000
result
}
#[cfg(test)]
mod tests {
use super::*;
use std::net::Ipv4Addr;
use std::str::FromStr;
use std::thread;
const TEST_IP_FILEPATH: &str = "../../../data/ip.test.txt";
use super::*;
///test all types find correct
#[test]
fn test_search_by_ip() {
fn test_multi_type_ip() {
search_by_ip("2.0.0.0").unwrap();
search_by_ip("32").unwrap();
search_by_ip(32).unwrap();
search_by_ip(4294408949).unwrap();
search_by_ip(Ipv4Addr::from_str("1.1.1.1").unwrap()).unwrap();
}
/// test find ip correct use the file ip.test.txt in ../../data
#[test]
fn test_random_choose_ip() {
let mut file = File::open(TEST_IP_FILEPATH).unwrap();
fn test_match_all_ip_correct() {
let mut file = File::open("../../../data/ip.test.txt").unwrap();
let mut contents = String::new();
file.read_to_string(&mut contents).unwrap();
for line in contents.split("\n") {
......@@ -138,7 +160,7 @@ mod tests {
}
#[test]
fn test_multi_thread() {
fn test_multi_thread_only_load_xdb_once() {
let handle = thread::spawn(|| {
let result = search_by_ip("2.2.2.2").unwrap();
println!("ip search in spawn: {result}");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册