#include #include "common.h" #include "ram.h" #include "compress.h" // #define TLB_UNITTEST #ifdef WITH_DRAMSIM3 #include "cosimulation.h" CoDRAMsim3 *dram = NULL; #endif static uint64_t *ram; static long img_size = 0; static pthread_mutex_t ram_mutex; void* get_img_start() { return &ram[0]; } long get_img_size() { return img_size; } void* get_ram_start() { return &ram[0]; } long get_ram_size() { return EMU_RAM_SIZE; } #ifdef TLB_UNITTEST void addpageSv39() { //three layers //addr range: 0x0000000080000000 - 0x0000000088000000 for 128MB from 2GB - 2GB128MB //the first layer: one entry for 1GB. (512GB in total by 512 entries). need the 2th entries //the second layer: one entry for 2MB. (1GB in total by 512 entries). need the 0th-63rd entries //the third layer: one entry for 4KB (2MB in total by 512 entries). need 64 with each one all #define TOPSIZE (128 * 1024 * 1024) #define PAGESIZE (4 * 1024) // 4KB = 2^12B #define ENTRYNUM (PAGESIZE / 8) //512 2^9 #define PTEVOLUME (PAGESIZE * ENTRYNUM) // 2MB #define PTENUM (TOPSIZE / PTEVOLUME) // 128MB / 2MB = 64 #define PDDENUM 1 #define PDENUM 1 #define PDDEADDR (0x88000000 - (PAGESIZE * (PTENUM + 2))) //0x88000000 - 0x1000*66 #define PDEADDR (0x88000000 - (PAGESIZE * (PTENUM + 1))) //0x88000000 - 0x1000*65 #define PTEADDR(i) (0x88000000 - (PAGESIZE * PTENUM) + (PAGESIZE * i)) //0x88000000 - 0x100*64 #define PTEMMIONUM 128 #define PDEMMIONUM 1 #define PTEDEVNUM 128 #define PDEDEVNUM 1 uint64_t pdde[ENTRYNUM]; uint64_t pde[ENTRYNUM]; uint64_t pte[PTENUM][ENTRYNUM]; // special addr for mmio 0x40000000 - 0x4fffffff uint64_t pdemmio[ENTRYNUM]; uint64_t ptemmio[PTEMMIONUM][ENTRYNUM]; // special addr for internal devices 0x30000000-0x3fffffff uint64_t pdedev[ENTRYNUM]; uint64_t ptedev[PTEDEVNUM][ENTRYNUM]; // dev: 0x30000000-0x3fffffff pdde[0] = (((PDDEADDR-PAGESIZE*(PDEMMIONUM+PTEMMIONUM+PDEDEVNUM)) & 0xfffff000) >> 2) | 0x1; for (int i = 0; i < PTEDEVNUM; i++) { pdedev[ENTRYNUM-PTEDEVNUM+i] = (((PDDEADDR-PAGESIZE*(PDEMMIONUM+PTEMMIONUM+PDEDEVNUM+PTEDEVNUM-i)) & 0xfffff000) >> 2) | 0x1; } for(int outidx = 0; outidx < PTEDEVNUM; outidx++) { for(int inidx = 0; inidx < ENTRYNUM; inidx++) { ptedev[outidx][inidx] = (((0x30000000 + outidx*PTEVOLUME + inidx*PAGESIZE) & 0xfffff000) >> 2) | 0xf; } } // mmio: 0x40000000 - 0x4fffffff pdde[1] = (((PDDEADDR-PAGESIZE*PDEMMIONUM) & 0xfffff000) >> 2) | 0x1; for(int i = 0; i < PTEMMIONUM; i++) { pdemmio[i] = (((PDDEADDR-PAGESIZE*(PTEMMIONUM+PDEMMIONUM-i)) & 0xfffff000) >> 2) | 0x1; } for(int outidx = 0; outidx < PTEMMIONUM; outidx++) { for(int inidx = 0; inidx < ENTRYNUM; inidx++) { ptemmio[outidx][inidx] = (((0x40000000 + outidx*PTEVOLUME + inidx*PAGESIZE) & 0xfffff000) >> 2) | 0xf; } } //0x800000000 - 0x87ffffff pdde[2] = ((PDEADDR & 0xfffff000) >> 2) | 0x1; //pdde[2] = ((0x80000000&0xc0000000) >> 2) | 0xf; for(int i = 0; i < PTENUM ;i++) { // pde[i] = ((PTEADDR(i)&0xfffff000)>>2) | 0x1; pde[i] = (((0x80000000+i*2*1024*1024)&0xffe00000)>>2) | 0xf; } for(int outidx = 0; outidx < PTENUM; outidx++ ) { for(int inidx = 0; inidx < ENTRYNUM; inidx++ ) { pte[outidx][inidx] = (((0x80000000 + outidx*PTEVOLUME + inidx*PAGESIZE) & 0xfffff000)>>2) | 0xf; } } printf("try to add identical tlb page to ram\n"); memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM+PTEDEVNUM)),ptedev,PAGESIZE*PTEDEVNUM); memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM)),pdedev,PAGESIZE*PDEDEVNUM); memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM)),ptemmio, PAGESIZE*PTEMMIONUM); memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM)), pdemmio, PAGESIZE*PDEMMIONUM); memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM)), pdde, PAGESIZE*PDDENUM); memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDENUM)), pde, PAGESIZE*PDENUM); memcpy((char *)ram+(TOPSIZE-PAGESIZE*PTENUM), pte, PAGESIZE*PTENUM); } #endif void init_ram(const char *img) { assert(img != NULL); printf("The image is %s\n", img); // initialize memory using Linux mmap printf("Using simulated %luMB RAM\n", EMU_RAM_SIZE / (1024 * 1024)); ram = (uint64_t *)mmap(NULL, EMU_RAM_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); if (ram == (uint64_t *)MAP_FAILED) { printf("Cound not mmap 0x%lx bytes\n", EMU_RAM_SIZE); assert(0); } #ifdef TLB_UNITTEST //new add addpageSv39(); //new end #endif int ret; if (isGzFile(img)) { printf("Gzip file detected and loading image from extracted gz file\n"); img_size = readFromGz(ram, img, EMU_RAM_SIZE, LOAD_RAM); assert(img_size >= 0); } else { FILE *fp = fopen(img, "rb"); if (fp == NULL) { printf("Can not open '%s'\n", img); assert(0); } fseek(fp, 0, SEEK_END); img_size = ftell(fp); if (img_size > EMU_RAM_SIZE) { img_size = EMU_RAM_SIZE; } fseek(fp, 0, SEEK_SET); ret = fread(ram, img_size, 1, fp); assert(ret == 1); fclose(fp); } #ifdef WITH_DRAMSIM3 #if !defined(DRAMSIM3_CONFIG) || !defined(DRAMSIM3_OUTDIR) #error DRAMSIM3_CONFIG or DRAMSIM3_OUTDIR is not defined #endif assert(dram == NULL); // dram = new ComplexCoDRAMsim3(DRAMSIM3_CONFIG, DRAMSIM3_OUTDIR); dram = new SimpleCoDRAMsim3(10); #endif pthread_mutex_init(&ram_mutex, 0); } void ram_finish() { munmap(ram, EMU_RAM_SIZE); #ifdef WITH_DRAMSIM3 dramsim3_finish(); #endif pthread_mutex_destroy(&ram_mutex); } extern "C" uint64_t ram_read_helper(uint8_t en, uint64_t rIdx) { if (en && rIdx >= EMU_RAM_SIZE / sizeof(uint64_t)) { rIdx %= EMU_RAM_SIZE / sizeof(uint64_t); } pthread_mutex_lock(&ram_mutex); uint64_t rdata = (en) ? ram[rIdx] : 0; pthread_mutex_unlock(&ram_mutex); return rdata; } extern "C" void ram_write_helper(uint64_t wIdx, uint64_t wdata, uint64_t wmask, uint8_t wen) { if (wen) { if (wIdx >= EMU_RAM_SIZE / sizeof(uint64_t)) { printf("ERROR: ram wIdx = 0x%lx out of bound!\n", wIdx); assert(wIdx < EMU_RAM_SIZE / sizeof(uint64_t)); } pthread_mutex_lock(&ram_mutex); ram[wIdx] = (ram[wIdx] & ~wmask) | (wdata & wmask); pthread_mutex_unlock(&ram_mutex); } } #ifdef WITH_DRAMSIM3 #include void dramsim3_finish() { delete dram; } #define MAX_AXI_DATA_LEN 8 // currently does not support masked read or write struct dramsim3_meta { uint8_t len; uint8_t size; uint8_t offset; uint8_t id; uint64_t data[MAX_AXI_DATA_LEN]; }; void axi_read_data(const axi_ar_channel &ar, dramsim3_meta *meta) { uint64_t address = ar.addr % EMU_RAM_SIZE; uint64_t beatsize = 1 << ar.size; uint8_t beatlen = ar.len + 1; uint64_t transaction_size = beatsize * beatlen; assert((transaction_size % sizeof(uint64_t)) == 0); // axi burst FIXED if (ar.burst == 0x0) { std::cout << "axi burst FIXED not supported!" << std::endl; assert(0); } // axi burst INCR else if (ar.burst == 1) { assert(transaction_size / sizeof(uint64_t) <= MAX_AXI_DATA_LEN); for (int i = 0; i < transaction_size / sizeof(uint64_t); i++) { meta->data[i] = ram[address / sizeof(uint64_t)]; address += sizeof(uint64_t); } } // axi burst WRAP else if (ar.burst == 2) { uint64_t low = (address / transaction_size) * transaction_size; uint64_t high = low + transaction_size; assert(transaction_size / sizeof(uint64_t) <= MAX_AXI_DATA_LEN); for (int i = 0; i < transaction_size / sizeof(uint64_t); i++) { if (address == high) { address = low; } meta->data[i] = ram[address / sizeof(uint64_t)]; address += sizeof(uint64_t); } } else { std::cout << "reserved arburst!" << std::endl; assert(0); } meta->len = beatlen; meta->size = beatsize; meta->offset = 0; meta->id = ar.id; } CoDRAMRequest *dramsim3_request(const axi_channel &axi, bool is_write) { uint64_t address = (is_write) ? axi.aw.addr : axi.ar.addr; dramsim3_meta *meta = new dramsim3_meta; // WRITE if (is_write) { meta->len = axi.aw.len + 1; meta->size = 1 << axi.aw.size; meta->offset = 0; meta->id = axi.aw.id; } else { axi_read_data(axi.ar, meta); } CoDRAMRequest *req = new CoDRAMRequest(); req->address = address; req->is_write = is_write; req->meta = meta; return req; } static CoDRAMResponse *wait_resp_r = NULL; static CoDRAMResponse *wait_resp_b = NULL; static CoDRAMRequest *wait_req_w = NULL; // currently only accept one in-flight read + one in-flight write static uint64_t raddr, roffset = 0, rlen; static uint64_t waddr, woffset = 0, wlen; void dramsim3_helper_rising(const axi_channel &axi) { // ticks DRAMsim3 according to CPU_FREQ:DRAM_FREQ dram->tick(); // read data fire: check the last read request if (axi_check_rdata_fire(axi)) { if (wait_resp_r == NULL) { printf("ERROR: There's no in-flight read request.\n"); assert(wait_resp_r != NULL); } dramsim3_meta *meta = static_cast(wait_resp_r->req->meta); meta->offset++; // check whether the last rdata response has finished if (meta->offset == meta->len) { delete meta; delete wait_resp_r->req; delete wait_resp_r; wait_resp_r = NULL; } } // read address fire: accept a new request if (axi_check_raddr_fire(axi)) { dram->add_request(dramsim3_request(axi, false)); } // the last write transaction is acknowledged if (axi_check_wack_fire(axi)) { if (wait_resp_b == NULL) { printf("ERROR: write response fire for nothing in-flight.\n"); assert(wait_resp_b != NULL); } // flush data to memory uint64_t waddr = wait_resp_b->req->address % EMU_RAM_SIZE; dramsim3_meta *meta = static_cast(wait_resp_b->req->meta); void *start_addr = ram + (waddr / sizeof(uint64_t)); memcpy(start_addr, meta->data, meta->len * meta->size); for (int i = 0; i < meta->len; i++) { // uint64_t address = wait_resp_b->req->address % EMU_RAM_SIZE; // ram[address / sizeof(uint64_t) + i] = meta->data[i]; // printf("flush write to memory[0x%ld] = 0x%lx\n", address) } delete meta; delete wait_resp_b->req; delete wait_resp_b; wait_resp_b = NULL; } // write address fire: accept a new write request if (axi_check_waddr_fire(axi)) { if (wait_req_w != NULL) { printf("ERROR: The last write request has not finished.\n"); assert(wait_req_w == NULL); } wait_req_w = dramsim3_request(axi, true); // printf("accept a new write request to addr = 0x%lx, len = %d\n", axi.aw.addr, axi.aw.len); } // write data fire: for the last write transaction if (axi_check_wdata_fire(axi)) { if (wait_req_w == NULL) { printf("ERROR: wdata fire for nothing in-flight.\n"); assert(wait_req_w != NULL); } dramsim3_meta *meta = static_cast(wait_req_w->meta); void *data_start = meta->data + meta->offset * meta->size / sizeof(uint64_t); axi_get_wdata(axi, data_start, meta->size); meta->offset++; // if this is the last beat if (meta->offset == meta->len) { assert(dram->will_accept(wait_req_w->address, true)); dram->add_request(wait_req_w); wait_req_w = NULL; } // printf("accept a new write data\n"); } } void dramsim3_helper_falling(axi_channel &axi) { // default branch to avoid wrong handshake axi.aw.ready = 0; axi.w.ready = 0; axi.b.valid = 0; axi.ar.ready = 0; axi.r.valid = 0; // RDATA: if finished, we try the next rdata response if (!wait_resp_r) wait_resp_r = dram->check_read_response(); // if there's some data response, put it onto axi bus if (wait_resp_r) { dramsim3_meta *meta = static_cast(wait_resp_r->req->meta); // printf("meta->size %d offset %d\n", meta->size, meta->offset*meta->size/sizeof(uint64_t)); void *data_start = meta->data + meta->offset*meta->size / sizeof(uint64_t); axi_put_rdata(axi, data_start, meta->size, meta->offset == meta->len - 1, meta->id); } // RADDR: check whether the read request can be accepted axi_addr_t raddr; if (axi_get_raddr(axi, raddr) && dram->will_accept(raddr, false)) { axi_accept_raddr(axi); // printf("try to accept read request to 0x%lx\n", raddr); } // WREQ: check whether the write request can be accepted // Note: block the next write here to simplify logic axi_addr_t waddr; if (wait_req_w == NULL && axi_get_waddr(axi, waddr) && dram->will_accept(waddr, false)) { axi_accept_waddr(axi); axi_accept_wdata(axi); // printf("try to accept write request to 0x%lx\n", waddr); } // WDATA: check whether the write data can be accepted if (wait_req_w != NULL) { axi_accept_wdata(axi); } // WRESP: if finished, we try the next write response if (!wait_resp_b) wait_resp_b = dram->check_write_response(); // if there's some write response, put it onto axi bus if (wait_resp_b) { dramsim3_meta *meta = static_cast(wait_resp_b->req->meta); axi_put_wack(axi, meta->id); } } #endif