fix transfer

9a69e3b1 · Ybjjwwang · ba84e617 · 9a69e3b1 · 9a69e3b1 · 9a69e3b1
3 changed file
--- a/core/cube/cube-server/include/cube/slim_hash_map.h
+++ b/core/cube/cube-server/include/cube/slim_hash_map.h
@@ -212,7 +212,7 @@ class slim_hash_map {
  int copy_data_from(const slim_hash_map& rhs) {
    destroy();
+    LOG(INFO) << "start copy data, rhs info, mHashSize: " << rhs.m_nHashSize;
    if (rhs.m_nHashSize > 0) {
      m_hashTable = new (std::nothrow) uint32_t[rhs.m_nHashSize];
      if (!m_hashTable) {
@@ -231,7 +231,7 @@ class slim_hash_map {
                   << sizeof(hash_node_t) * BLOCK_SIZE;
        return -1;
      }
+      LOG(INFO) << "copy data, m_nBlockNum: " << m_nBlockNum << " , copy size:" << sizeof(hash_node_t) * BLOCK_SIZE;
      memcpy(m_blockAddr[m_nBlockNum],
             rhs.m_blockAddr[m_nBlockNum],
             sizeof(hash_node_t) * BLOCK_SIZE);
@@ -265,11 +265,13 @@ class slim_hash_map {
    }
    size_type index = key % m_nHashSize;
    hash_node_t* node = get_node(m_hashTable[index]);
+    int node_cnt = 0;
    while (node != NULL && node->data.first != key) {
+      LOG(INFO) << "node link get:" << node->data.first;
+      node_cnt++;
      node = get_node(node->next);
    }
+    LOG(INFO) << "key: " << key << " , found count: " << node_cnt;  
    if (node == NULL) {
      return end();
    }
@@ -390,7 +392,6 @@ class slim_hash_map {
    if (node != NULL) {
      return node->data.second;
    }
    return add_node(index, key)->data.second;
  }
  void clear() {
@@ -399,16 +400,16 @@ class slim_hash_map {
    m_nFreeEntries = 0;
    m_nSize = 0;
  }
-  bool load(const char* file) {
+  bool load(const char* file, uint32_t block_id) {
    // clear();
+    // bias = 0 means base mode, bias = K means patch mode, and base dict has size K
    int size = sizeof(key_t) + sizeof(value_t);
    FILE* fp = fopen(file, "rb");
    char* buf = reinterpret_cast<char*>(malloc(size * 100000));
+    LOG(INFO) << "current block id: " << block_id;
    if (fp == NULL || buf == NULL) {
      return false;
    }
    size_t read_count;
    bool err = false;
    key_t key;
@@ -423,6 +424,8 @@ class slim_hash_map {
      for (int i = 0; i < static_cast<int>(read_count); ++i) {
        key = *(reinterpret_cast<key_t*>(buf + i * size));
        value = *(reinterpret_cast<value_t*>(buf + i * size + sizeof(key_t)));
+        value = ((uint64_t)block_id << 32) | value;
+        LOG(INFO) << "slim map key: " << key << " , value: " << value; 
        (*this)[key] = value;
      }
    }
@@ -557,7 +560,6 @@ class slim_hash_map {
  }
  hash_node_t* add_node(uint32_t index, const key_type& key) {
    ++m_nSize;
    if (m_nFreeEntries) {
      uint32_t addr = m_nFreeEntries;
      hash_node_t* node = get_node(addr);
@@ -569,7 +571,7 @@ class slim_hash_map {
    }
    uint32_t block = ((m_nNextEntry & 0xFF800000) >> 23);
+    //LOG(INFO) << "key: " << key << " here. index: " << index << " , m_nNextEntry: "<< m_nNextEntry << " , block:" << block<< ", m_nBlockNum:" << m_nBlockNum;
    if (block >= m_nBlockNum) {
      try {
        m_blockAddr[m_nBlockNum++] = new hash_node_t[BLOCK_SIZE];
@@ -581,7 +583,6 @@ class slim_hash_map {
        return NULL;
      }
    }
    uint32_t addr = m_nNextEntry;
    ++m_nNextEntry;
    hash_node_t* node = get_node(addr);

--- a/core/cube/cube-server/src/dict.cpp
+++ b/core/cube/cube-server/src/dict.cpp
@@ -51,13 +51,12 @@ int Dict::load(const std::string& dict_path,
               bool in_mem,
               const std::string& v_path) {
  TIME_FLAG(load_start);
  int ret = load_index(dict_path, v_path);
  if (ret != E_OK) {
    LOG(WARNING) << "load index failed";
    return ret;
  }
+  LOG(INFO) << "load index in mem mode: " << in_mem ;
  if (in_mem) {
    ret = load_data(dict_path, v_path);
    if (ret != E_OK) {
@@ -81,8 +80,11 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
  std::string index_n_path(dict_path);
  index_n_path.append(v_path);
  index_n_path.append("/index.n");
+  uint32_t cur_block_id = 0;
+  if (_base_dict) cur_block_id = _base_dict->_block_set.size(); 
  LOG(INFO) << "index file path: " << index_n_path;
+  //ERR HERE
  std::unique_ptr<FILE, decltype(&fclose)> pf(fopen(index_n_path.c_str(), "rb"),
                                              &fclose);
  if (pf.get() == NULL) {
@@ -150,12 +152,16 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
        return E_DATA_ERROR;
      }
    } else {
+      if (_slim_table.copy_data_from(_base_dict->_slim_table) != 0) {
+        LOG(ERROR) << "copy data from old index failed in patch mode";
+        return E_DATA_ERROR;
+      }
      file_idx = 0;
      LOG(INFO)
-          << "index check file len failed in patch mode, set file_idx to 0";
+          << "index check fail, direct copy";
    }
  }
+  LOG(INFO) << "resize slim table, new count: " << count/2;
  _slim_table.resize(count / 2);
  char file[1024];
@@ -167,6 +173,7 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
             dict_path.c_str(),
             v_path.c_str(),
             file_idx);
+    LOG(INFO) << "load file str: " << file;
    if (stat(file, &fstat) < 0) {
      if (errno == ENOENT) {
        LOG(WARNING) << "index." << file_idx << " not exist";
@@ -181,8 +188,8 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
                 << (uint64_t)fstat.st_size;
      return E_DATA_ERROR;
    }
-    LOG(INFO) << "loading from index." << file_idx;
+    LOG(INFO) << "loading from index." << file_idx << " . table size: " << _slim_table.size();
-    if (!_slim_table.load(file) || _slim_table.size() > count) {
+    if (!_slim_table.load(file, cur_block_id)) {
      return E_DATA_ERROR;
    }
@@ -193,8 +200,15 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
 }
 int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
+  std::vector<uint32_t> block_size;
+  uint64_t total_data_size = 0;
  if (_base_dict) {
    _block_set = _base_dict->_block_set;
+    LOG(INFO)<< "load data base dict block set size: " << _block_set[0].size;
+    for (size_t i = 0; i < _block_set.size(); ++i) {
+      block_size.push_back(_block_set[i].size); 
+      total_data_size += _block_set[i].size;     
+    }
  }
  std::string data_n_path(dict_path);
@@ -212,8 +226,6 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
    return E_DATA_ERROR;
  }
-  std::vector<uint32_t> block_size;
-  uint64_t total_data_size = 0;
  for (uint32_t i = 0; i < count; ++i) {
    uint32_t size = 0;
    if (fread(reinterpret_cast<void*>(&size), sizeof(uint32_t), 1, pf) != 1) {
@@ -222,6 +234,7 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
      return E_DATA_ERROR;
    }
    block_size.push_back(size);
+    LOG(INFO) << "new block size: " << size;
    total_data_size += size;
  }
  g_data_size << (total_data_size / 1024 / 1024);
@@ -229,36 +242,35 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
  pf = NULL;
  uint32_t old_size = _block_set.size();
+  LOG(INFO) << "load data old size: " << old_size;
  for (size_t i = 0; i < old_size; ++i) {
    if (_block_set[i].size != block_size[i]) {
      old_size = 0;
      break;
    }
  }
-  _block_set.resize(count);
+  LOG(INFO) << "load data block set count: " << count << " , old size: " << old_size;
+  _block_set.resize(count + old_size);
  for (size_t i = old_size; i < _block_set.size(); ++i) {
    char data_path[1024];
    LOG(INFO) << "load from data." << i;
-    snprintf(
+    //snprintf(
-        data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i);
+      //  data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i);
+    snprintf(data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i - old_size);
    FILE* data_file = fopen(data_path, "rb");
    if (data_file == NULL) {
-      LOG(WARNING) << "open data file [" << data_path << " failed";
+      LOG(WARNING) << "open data file [" << data_path << " ]failed";
      _block_set[i].s_data.reset();
      _block_set[i].size = 0;
      continue;
    }
+    _block_set[i].s_data.reset(reinterpret_cast<char*>(malloc(block_size[i] * sizeof(char))));
-    _block_set[i].s_data.reset(
-        reinterpret_cast<char*>(malloc(block_size[i] * sizeof(char))));
    if (_block_set[i].s_data.get() == NULL) {
      LOG(ERROR) << "malloc data failed";
      fclose(data_file);
      return E_OOM;
    }
    _block_set[i].size = block_size[i];
    if (fread(reinterpret_cast<void*>(_block_set[i].s_data.get()),
              sizeof(char),
              _block_set[i].size,
@@ -267,7 +279,10 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
      fclose(data_file);
      return E_DATA_ERROR;
    }
+    LOG(INFO) << "load new data to BlockSet succ";
+    for (size_t ii = 0; ii < 20; ++ii) {
+       LOG(INFO) << "data ptr: " << (int)(_block_set[i].s_data.get()[ii]);
+    }
    fclose(data_file);
  }
@@ -386,12 +401,11 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) {
  uint64_t flag = it->second;
  uint32_t id = (uint32_t)(flag >> 32);
  uint64_t addr = (uint32_t)(flag);
+  LOG(INFO) << "search key: " << id << " , addr: " << addr;
  if (_block_set.size() > id) {
    uint32_t block_size = _block_set[id].size;
    char* block_data = NULL;
    block_data = _block_set[id].s_data.get();
    if (block_data && addr + sizeof(uint32_t) <= block_size) {
      uint32_t len = *(reinterpret_cast<uint32_t*>(block_data + addr));
      if (addr + len <= block_size && len >= sizeof(uint32_t)) {
@@ -405,6 +419,7 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) {
                     << default_buffer_size;
          return false;
        }
+        LOG(INFO) << "seek key: " << key << " , addr: " << addr;
        memcpy(buff,
               (block_data + addr + sizeof(uint32_t)),
               len - sizeof(uint32_t));

--- a/doc/CUBE_TEST_CN.md
+++ b/doc/CUBE_TEST_CN.md
 ## 如果获得稀疏参数索引Cube所需的模型输入
-#### 背景知识
+### 背景知识
 推荐系统需要大规模稀疏参数索引来帮助分布式部署，可在`python/example/criteo_ctr_with_cube`或是[PaddleRec](https://github.com/paddlepaddle/paddlerec)了解推荐模型。
@@ -11,11 +11,11 @@
 用户在调试Cube服务功能时，可以自定义KV对生成SequenceFile格式文件来进行调试。
 用户在验证Cube的配送正确性时，可以转换SequenceFile格式文件至可读文字来进行比对验证。
-#### 预备知识
+### 预备知识
 - 需要会编译Paddle Serving，参见[编译文档](./COMPILE.md)
-#### 用法
+### 用法
 在编译结束后的安装文件，可以得到 seq_reader 和 kv_to_seqfile.py。