#include "ooo_cpu.h"

#define IPREF_LOOKAHEAD 11	// Lookahead depth
#define IPREF_START_DEPTH 0	// Start prefetch injection from this depth

// PTB configuration
#define LOG_NUM_IPREF_PTB_SETS 11
#define NUM_IPREF_PTB_SETS (1 << LOG_NUM_IPREF_PTB_SETS)
#define NUM_IPREF_PTB_WAYS 14

// Partial tag length for PTB
#define IPREF_TAG_BITS 12
#define IPREF_TAG_MASK ((1 << IPREF_TAG_BITS) - 1)

// Lower target bits for prefetch target
#define IPREF_TARGET_BITS 14
#define IPREF_TARGET_MASK ((1 << IPREF_TARGET_BITS) - 1)

// Upper target pointer length for compressing prefetch target
#define IPREF_UPPER_TARGET_POINTER_BITS 5

// Number of dictionary entries
#define IPREF_UPPER_TARGET_DICTIONARY_SIZE (1 << IPREF_UPPER_TARGET_POINTER_BITS)

// Recent access filter size
#define IPREF_FILTER_SIZE 15

// PTB entry; size = 36 bits
class IPREF_PTBEntry {
   public:
   uint32_t tag;			// 12 bits
   uint32_t lower_target;		// 14 bits
   uint8_t  upper_target_pointer;	//  5 bits
   uint8_t  pht_counter;		//  2 bits
   uint8_t  rrpv;			//  2 bits
   bool     valid;			//  1 bit
};

IPREF_PTBEntry iprefPTB[NUM_IPREF_PTB_SETS][NUM_IPREF_PTB_WAYS];     // size = 2048x14x36 bits = 126 KB

// Dictionary entry; size = 50 bits
class IPREF_dictionaryEntry {
   public:
   uint64_t upper_target;	// 44 bits
   uint64_t lru;		//  5 bits
   bool     valid;		//  1 bit
};

IPREF_dictionaryEntry iprefDictionary[IPREF_UPPER_TARGET_DICTIONARY_SIZE];  // 32x50 bits = 1600 bits

// Allocate a new target in dictionary if not present
static uint8_t iprefDictionaryAllocate (uint64_t upper_target)
{
   uint8_t i;

   for (i=0; i<IPREF_UPPER_TARGET_DICTIONARY_SIZE; i++) {
      if (iprefDictionary[i].valid && (iprefDictionary[i].upper_target == upper_target)) {
         break;
      }
   }

   if (i == IPREF_UPPER_TARGET_DICTIONARY_SIZE) {  // Miss: replace
      for (i=0; i<IPREF_UPPER_TARGET_DICTIONARY_SIZE; i++) {
         if (!iprefDictionary[i].valid) break;
      }
      if (i == IPREF_UPPER_TARGET_DICTIONARY_SIZE) {
         // LRU replacement
         uint64_t maxlru = 0;
         int index = -1;
         for (i=0; i<IPREF_UPPER_TARGET_DICTIONARY_SIZE; i++) {
            if (iprefDictionary[i].lru > maxlru) {
               maxlru = iprefDictionary[i].lru;
               index = i;
            }
         }
         i = index;
      }
      iprefDictionary[i].upper_target = upper_target;
      iprefDictionary[i].valid = true;
   }

   // Update LRU state
   for (uint8_t j=0; j<IPREF_UPPER_TARGET_DICTIONARY_SIZE; j++) iprefDictionary[j].lru++;
   iprefDictionary[i].lru = 0;

   return i;
}

// Update LRU states in dictionary on a read
static void iprefDictionaryUpdateLRU (uint8_t read_index)
{
   for (uint8_t j=0; j<IPREF_UPPER_TARGET_DICTIONARY_SIZE; j++) iprefDictionary[j].lru++;
   iprefDictionary[read_index].lru = 0;
}

// Look up PTB and retrieve target and counter values
static bool iprefPTBLookup (uint64_t signature, uint64_t ip, uint64_t *target, uint8_t *counter)
{
   uint32_t iprefPTBIndex = (signature & (NUM_IPREF_PTB_SETS - 1));
   uint64_t iprefPTBTag = ip & IPREF_TAG_MASK;
   uint32_t i;

   for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
      if (iprefPTB[iprefPTBIndex][i].valid && (iprefPTB[iprefPTBIndex][i].tag == iprefPTBTag)) {
         // Hit
         (*target) = (iprefDictionary[iprefPTB[iprefPTBIndex][i].upper_target_pointer].upper_target<<IPREF_TARGET_BITS) | iprefPTB[iprefPTBIndex][i].lower_target;
         iprefDictionaryUpdateLRU (iprefPTB[iprefPTBIndex][i].upper_target_pointer);
         (*counter) = iprefPTB[iprefPTBIndex][i].pht_counter;
         iprefPTB[iprefPTBIndex][i].rrpv = 0;
         return true;
      }
   }

   return false;
}

// Allocate a new entry in PTB if not already present and taken
static void iprefPTBAllocate (uint64_t signature, uint64_t ip, uint64_t target, uint8_t taken)
{
   uint32_t iprefPTBIndex = (signature & (NUM_IPREF_PTB_SETS - 1));
   uint64_t iprefPTBTag = ip & IPREF_TAG_MASK;
   uint32_t i;

   for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
      if (iprefPTB[iprefPTBIndex][i].valid && (iprefPTB[iprefPTBIndex][i].tag == iprefPTBTag)) {
         // Hit
         if (taken) {
            iprefPTB[iprefPTBIndex][i].lower_target = target & IPREF_TARGET_MASK;
            iprefPTB[iprefPTBIndex][i].upper_target_pointer = iprefDictionaryAllocate(target >> IPREF_TARGET_BITS);
            if (iprefPTB[iprefPTBIndex][i].pht_counter < 3) iprefPTB[iprefPTBIndex][i].pht_counter++;
         }
         else {
            if (iprefPTB[iprefPTBIndex][i].pht_counter > 0) iprefPTB[iprefPTBIndex][i].pht_counter--;
         }
         iprefPTB[iprefPTBIndex][i].rrpv = 0;
         break;
      }
   }

   if (taken) { // Allocate only if taken
      if (i == NUM_IPREF_PTB_WAYS) {
         // Miss: replace
         // Victim search order:
         // (i) invalid entry
         // (ii) pht_counter == 0 (strongly not taken)
         // (iii) SRRIP
         for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
            if (!iprefPTB[iprefPTBIndex][i].valid) break;
         }
         if (i == NUM_IPREF_PTB_WAYS) {
            for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
               if (!iprefPTB[iprefPTBIndex][i].pht_counter) break;
            }
            if (i == NUM_IPREF_PTB_WAYS) {
               while (1) {
                  for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
                     if (iprefPTB[iprefPTBIndex][i].rrpv == 3) break;
                  }
                  if (i == NUM_IPREF_PTB_WAYS) {
                     for (i=0; i<NUM_IPREF_PTB_WAYS; i++) iprefPTB[iprefPTBIndex][i].rrpv++;
                  }
                  else break;
               }
            }
         }
         assert(i < NUM_IPREF_PTB_WAYS);
         iprefPTB[iprefPTBIndex][i].tag = iprefPTBTag;
         iprefPTB[iprefPTBIndex][i].lower_target = target & IPREF_TARGET_MASK;
         iprefPTB[iprefPTBIndex][i].upper_target_pointer = iprefDictionaryAllocate(target >> IPREF_TARGET_BITS);
         iprefPTB[iprefPTBIndex][i].pht_counter = 2;   // weakly taken
         iprefPTB[iprefPTBIndex][i].valid = true;
         iprefPTB[iprefPTBIndex][i].rrpv = 2;
      }
   }
}

// Recent access filter entry; size = 59 bits
class IPrefFilterEntry {
   public:
      bool     valid;	//  1 bit
      uint64_t tag;	// 58 bits
};

IPrefFilterEntry iprefFilter[IPREF_FILTER_SIZE];  // total size = 15x59 bits = 885 bits
uint8_t iprefFilterTailPtr;  // total size = 4 bits

// Search for an entry in the recent access filter
static bool iprefFilterLookup (uint64_t block_addr)
{
   for (int i=0; i<IPREF_FILTER_SIZE; i++) {
      if (iprefFilter[i].valid && (iprefFilter[i].tag == block_addr)) return true;
   }
   return false;
}

// Insert a new entry at the tail of the recent access  filter
static void iprefFilterInsert (uint64_t block_addr)
{
   iprefFilter[iprefFilterTailPtr].tag = block_addr;
   iprefFilter[iprefFilterTailPtr].valid = true;
   iprefFilterTailPtr = (iprefFilterTailPtr + 1) % IPREF_FILTER_SIZE;
}

uint64_t last_ip_block;  	// Last seen ip block address (58 bits)
bool last_ip_block_valid;       // 1 bit
uint32_t ipref_global_hist;  	// Block-grain global history (11 bits)
bool ipref_last_target_nonzero;	// True if last branch was predicted taken (1 bit)

void O3_CPU::l1i_prefetcher_initialize() 
{
   last_ip_block = 0;
   last_ip_block_valid = false;
   ipref_global_hist = 0;

   for (int i=0; i<NUM_IPREF_PTB_SETS; i++) for (int j=0; j<NUM_IPREF_PTB_WAYS; j++) iprefPTB[i][j].valid = false;
   for (int i=0; i<IPREF_FILTER_SIZE; i++) iprefFilter[i].valid = false;
   iprefFilterTailPtr = 0;

   ipref_last_target_nonzero = false;
}

void O3_CPU::l1i_prefetcher_branch_operate(uint64_t ip, uint8_t branch_type, uint64_t branch_target)
{
   uint64_t this_block;			// current block address (58 bits)
   bool ptbhit;                         // Set on a PTB hit (1 bit)
   uint64_t pred_target;                // Predicted prefetch target block (58 bits)
   uint8_t counter;                     // pattern history counter output value (2 bits)
   uint64_t ip_block;    		// staging block address (58 bits)
   uint32_t spec_ghist;         	// Speculative copy of block-grain global history (11 bits)

   this_block = ip >> LOG2_BLOCK_SIZE;

   if (last_ip_block_valid) {
      if (this_block != last_ip_block) {  // Skip over branches falling on the same cache block
         if (ipref_last_target_nonzero) {
            // Last dynamic branch was predicted taken
            // Last target block is not taken
            iprefPTBAllocate (ipref_global_hist ^ last_ip_block, last_ip_block, this_block, 0);
            ipref_global_hist = ipref_global_hist << 1;
         }
         else {
            if (this_block == (last_ip_block + 1)) {
               // Not taken
               iprefPTBAllocate (ipref_global_hist ^ last_ip_block, last_ip_block, this_block, 0);
               ipref_global_hist = ipref_global_hist << 1;
            }
            else {
               // Taken
               iprefPTBAllocate (ipref_global_hist ^ last_ip_block, last_ip_block, this_block, 1);
               ipref_global_hist = (ipref_global_hist << 1) | 1;
            }
         }
      }
   }

   if (branch_target) {  // Follow predicted taken target
      if ((branch_target >> LOG2_BLOCK_SIZE) != this_block) {  // Skip over if target and branch belong to the same cache block
         if ((branch_target >> LOG2_BLOCK_SIZE) == (this_block + 1)) {
            // Not taken
            iprefPTBAllocate (ipref_global_hist ^ this_block, this_block, branch_target >> LOG2_BLOCK_SIZE, 0);
            ipref_global_hist = ipref_global_hist << 1;
         }
         else {
            // Taken
            iprefPTBAllocate (ipref_global_hist ^ this_block, this_block, branch_target >> LOG2_BLOCK_SIZE, 1);
            ipref_global_hist = (ipref_global_hist << 1) | 1;
         }
      }

      // Inject prefetch for the block containing taken target
      if (!iprefFilterLookup (branch_target >> LOG2_BLOCK_SIZE) && (IPREF_START_DEPTH == 0)) {
         prefetch_code_line(branch_target);
         iprefFilterInsert (branch_target >> LOG2_BLOCK_SIZE);
      }

      // Look ahead prefetch following the taken branch target
      ip_block = branch_target >> LOG2_BLOCK_SIZE;
      spec_ghist = ipref_global_hist;			// Speculative copy of block-grain global history (11 bits)
      for (uint8_t i=0; i<IPREF_LOOKAHEAD; i++) {
         ptbhit = iprefPTBLookup (spec_ghist ^ ip_block, ip_block, &pred_target, &counter);
         if (!ptbhit || (counter < 2)) {
            // Predicted not taken
            pred_target = ip_block + 1;
            spec_ghist = spec_ghist << 1;
         }
         else {
            // Predicted taken
            spec_ghist = (spec_ghist << 1) | 1;
         }
         ip_block = pred_target;

         // Inject prefetch
         if (!iprefFilterLookup (pred_target) && ((i+1) >= IPREF_START_DEPTH)) {
            prefetch_code_line(pred_target << LOG2_BLOCK_SIZE);
            iprefFilterInsert (pred_target);
         }
      }
   }

   if (last_ip_block != this_block) {  // Skip over branches belonging to the same cache block
      // Inject prefetch for the block containing the current branch
      if (!iprefFilterLookup (this_block) && (IPREF_START_DEPTH == 0)) {
         prefetch_code_line(ip);
         iprefFilterInsert (this_block);
      }

      // Look ahead prefetch following the current block
      if (!branch_target) {
         ip_block = this_block;
         spec_ghist = ipref_global_hist;		// Speculative copy of block-grain global history (11 bits)
         for (uint8_t i=0; i<IPREF_LOOKAHEAD; i++) {
            ptbhit = iprefPTBLookup (spec_ghist ^ ip_block, ip_block, &pred_target, &counter);
            if (!ptbhit || (counter < 2)) {
               // Predicted not taken
               pred_target = ip_block + 1;
               spec_ghist = spec_ghist << 1;
            }
            else {
               // Predicted taken
               spec_ghist = (spec_ghist << 1) | 1;
            }
            ip_block = pred_target;

            // Inject prefetch
            if (!iprefFilterLookup (pred_target) && ((i+1) >= IPREF_START_DEPTH)) {
               prefetch_code_line(pred_target << LOG2_BLOCK_SIZE);
               iprefFilterInsert (pred_target);
            }
         }
      }
   }

   if (branch_target) {
      last_ip_block = branch_target >> LOG2_BLOCK_SIZE;
      ipref_last_target_nonzero = true;
   }
   else {
      last_ip_block = ip >> LOG2_BLOCK_SIZE;
      ipref_last_target_nonzero = false;
   }
   last_ip_block_valid = true;
}

void O3_CPU::l1i_prefetcher_cache_operate(uint64_t addr, uint8_t cache_hit, uint8_t prefetch_hit)
{
   // Update recent access filter on demand access
   if (!iprefFilterLookup (addr >> LOG2_BLOCK_SIZE)) iprefFilterInsert (addr >> LOG2_BLOCK_SIZE);
}

void O3_CPU::l1i_prefetcher_cycle_operate()
{

}

void O3_CPU::l1i_prefetcher_cache_fill(uint64_t v_addr, uint32_t set, uint32_t way, uint8_t prefetch, uint64_t evicted_v_addr)
{

}

void O3_CPU::l1i_prefetcher_final_stats()
{

}
