#include "ooo_cpu.h"

#define IPREF_LOOKAHEAD 11	// Prefetch lookahead

#define IPREF_START_DEPTH 0	// Start injecting prefetches from this lookahead depth

// PTB configuration
#define LOG_NUM_IPREF_PTB_SETS 11
#define NUM_IPREF_PTB_SETS (1 << LOG_NUM_IPREF_PTB_SETS)
#define NUM_IPREF_PTB_WAYS 14

// Partial tag length in PTB entry
#define IPREF_TAG_BITS 12
#define IPREF_TAG_MASK ((1 << IPREF_TAG_BITS) - 1)

// Lower target length in PTB entry
#define IPREF_TARGET_BITS 14
#define IPREF_TARGET_MASK ((1 << IPREF_TARGET_BITS) - 1)

// Upper target pointer size in PTB entry
#define IPREF_UPPER_TARGET_POINTER_BITS 5

// Number of dictionary entries
#define IPREF_UPPER_TARGET_DICTIONARY_SIZE (1 << IPREF_UPPER_TARGET_POINTER_BITS)

// PTB entry; size = 36 bits
class IPREF_PTBEntry {
   public:
   uint64_t tag;			// 12 bits
   uint32_t lower_target;		// 14 bits
   uint8_t  upper_target_pointer;	//  5 bits
   uint8_t  pht_counter;		//  2 bits
   uint8_t  rrpv;			//  2 bits
   bool     valid;			//  1 bit
};

IPREF_PTBEntry iprefPTB[NUM_IPREF_PTB_SETS][NUM_IPREF_PTB_WAYS];     // 2048x14x36 bits = 126 KB

// Dictionary entry; size = 50 bits
class IPREF_DictionaryEntry {
   public:
   uint64_t upper_target;	// 44 bits
   uint64_t lru;		//  5 bits
   bool     valid;		//  1 bit
};

IPREF_DictionaryEntry iprefDictionary[IPREF_UPPER_TARGET_DICTIONARY_SIZE];  // 32x50 bits = 1600 bits

// Allocate a new target in the dictionary if not already present
static uint32_t iprefDictionaryAllocate (uint64_t upper_target)
{
   uint8_t i;

   for (i=0; i<IPREF_UPPER_TARGET_DICTIONARY_SIZE; i++) {
      if (iprefDictionary[i].valid && (iprefDictionary[i].upper_target == upper_target)) {
         // Hit
         break;
      }
   }

   if (i == IPREF_UPPER_TARGET_DICTIONARY_SIZE) {
      // Miss: replace and allocate
      for (i=0; i<IPREF_UPPER_TARGET_DICTIONARY_SIZE; i++) {
         if (!iprefDictionary[i].valid) break;
      }
      if (i == IPREF_UPPER_TARGET_DICTIONARY_SIZE) {
         uint64_t maxlru = 0;
         int index = -1;
         for (i=0; i<IPREF_UPPER_TARGET_DICTIONARY_SIZE; i++) {
            if (iprefDictionary[i].lru > maxlru) {
               maxlru = iprefDictionary[i].lru;
               index = i;
            }
         }
         i = index;
      }
      iprefDictionary[i].upper_target = upper_target;
      iprefDictionary[i].valid = true;
   }

   for (uint8_t j=0; j<IPREF_UPPER_TARGET_DICTIONARY_SIZE; j++) iprefDictionary[j].lru++;
   iprefDictionary[i].lru = 0;

   return i;
}

// Update LRU states of the dictionary on a read
static void iprefDictionaryUpdateLRUStates (uint8_t read_index)
{
   for (uint8_t j=0; j<IPREF_UPPER_TARGET_DICTIONARY_SIZE; j++) iprefDictionary[j].lru++;
   iprefDictionary[read_index].lru = 0;
}

// Look up PTB and retrieve prefetch target and pattern history counter
static bool iprefPTBLookup (uint64_t signature, uint64_t ip, uint64_t *target, uint8_t *counter)
{
   uint32_t iprefPTBIndex = (signature & (NUM_IPREF_PTB_SETS - 1));
   uint64_t iprefPTBTag = ip & IPREF_TAG_MASK;
   uint32_t i;

   for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
      if (iprefPTB[iprefPTBIndex][i].valid && (iprefPTB[iprefPTBIndex][i].tag == iprefPTBTag)) {
         // Hit
         (*target) = (iprefDictionary[iprefPTB[iprefPTBIndex][i].upper_target_pointer].upper_target<<IPREF_TARGET_BITS) | iprefPTB[iprefPTBIndex][i].lower_target;
         iprefDictionaryUpdateLRUStates (iprefPTB[iprefPTBIndex][i].upper_target_pointer);
         (*counter) = iprefPTB[iprefPTBIndex][i].pht_counter;
         iprefPTB[iprefPTBIndex][i].rrpv = 0;
         return true;
      }
   }

   // Miss
   return false;
}

// Allocate a new PTB entry if it is already not there and taken
static void iprefPTBAllocate (uint64_t signature, uint64_t ip, uint64_t target, uint8_t taken)
{
   uint32_t iprefPTBIndex = (signature & (NUM_IPREF_PTB_SETS - 1));
   uint64_t iprefPTBTag = ip & IPREF_TAG_MASK;
   uint32_t i;

   for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
      if (iprefPTB[iprefPTBIndex][i].valid && (iprefPTB[iprefPTBIndex][i].tag == iprefPTBTag)) {
         // Hit
         if (taken) {
            iprefPTB[iprefPTBIndex][i].lower_target = target & IPREF_TARGET_MASK;
            iprefPTB[iprefPTBIndex][i].upper_target_pointer = iprefDictionaryAllocate(target >> IPREF_TARGET_BITS);
            if (iprefPTB[iprefPTBIndex][i].pht_counter < 3) iprefPTB[iprefPTBIndex][i].pht_counter++;
         }
         else {
            if (iprefPTB[iprefPTBIndex][i].pht_counter > 0) iprefPTB[iprefPTBIndex][i].pht_counter--;
         }
         iprefPTB[iprefPTBIndex][i].rrpv = 0;
         break;
      }
   }

   if (taken) {
      if (i == NUM_IPREF_PTB_WAYS) {
         // Miss: replace and allocate
         for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
            if (!iprefPTB[iprefPTBIndex][i].valid) break;
         }
         if (i == NUM_IPREF_PTB_WAYS) {
            for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
               if (!iprefPTB[iprefPTBIndex][i].pht_counter) break;
            }
            if (i == NUM_IPREF_PTB_WAYS) {
               while (1) {
                  for (i=0; i<NUM_IPREF_PTB_WAYS; i++) {
                     if (iprefPTB[iprefPTBIndex][i].rrpv == 3) break;
                  }
                  if (i == NUM_IPREF_PTB_WAYS) {
                     for (i=0; i<NUM_IPREF_PTB_WAYS; i++) iprefPTB[iprefPTBIndex][i].rrpv++;
                  }
                  else break;
               }
            }
         }
         assert(i < NUM_IPREF_PTB_WAYS);
         iprefPTB[iprefPTBIndex][i].tag = iprefPTBTag;
         iprefPTB[iprefPTBIndex][i].lower_target = target & IPREF_TARGET_MASK;
         iprefPTB[iprefPTBIndex][i].upper_target_pointer = iprefDictionaryAllocate(target >> IPREF_TARGET_BITS);
         iprefPTB[iprefPTBIndex][i].pht_counter = 2;	// weakly taken
         iprefPTB[iprefPTBIndex][i].valid = true;
         iprefPTB[iprefPTBIndex][i].rrpv = 2;
      }
   }
}

// Number of entries in the recent access filter
#define IPREF_FILTER_SIZE 15

// Recent access filter entry; size = 59 bits
class IPrefFilterEntry {
   public:
      bool     valid;	//  1 bit
      uint64_t tag;	// 58 bits
};

IPrefFilterEntry iprefFilter[IPREF_FILTER_SIZE];  // total size = 15x59 bits = 885 bits
uint8_t iprefFilterTailPtr;  // total size = 4 bits

// Search for a block in the recent access filter
static bool iprefFilterLookup (uint64_t block_addr)
{
   for (int i=0; i<IPREF_FILTER_SIZE; i++) {
      if (iprefFilter[i].valid && (iprefFilter[i].tag == block_addr)) return true;
   }
   return false;
}

// Enqueue a new block address at the tail
static void iprefFilterInsert (uint64_t block_addr)
{
   iprefFilter[iprefFilterTailPtr].tag = block_addr;
   iprefFilter[iprefFilterTailPtr].valid = true;
   iprefFilterTailPtr = (iprefFilterTailPtr + 1) % IPREF_FILTER_SIZE;
}

#define NUM_QUEUES 11		// Number of read scheduling FIFO queues

#define IPREF_FIFO_SIZE 13	// Number of entries in each read scheduling FIFO queue

// Read scheduling FIFO queue entry; size = 73 bits
class IPrefFIFOEntry {
   public:
   uint32_t ghist;	// 11 bits
   uint64_t ip_block;	// 58 bits
   uint8_t  count;	// pending look ahead prefetch count (4 bits)
};

IPrefFIFOEntry iprefFIFO[NUM_QUEUES][IPREF_FIFO_SIZE];  // 11x13x73 = 10439 bits
uint32_t iprefFIFOWP[NUM_QUEUES];  	// Write pointer for each read scheduling FIFO queue; size = 4x11 bits = 44 bits
uint32_t iprefFIFORP[NUM_QUEUES];  	// Read pointer for each read scheduling FIFO queue; size = 4x11 bits = 44 bits
uint8_t fifoPointer;  			// Which FIFO to populate next (4 bits)

#define NUM_PTB_BANKS 8		// Number of PTB banks
#define NUM_RPORTS_PER_BANK 4	// Number of read ports per PTB bank

uint8_t rports_available[NUM_PTB_BANKS];  // How many read ports available per bank; size = 8x2 bits = 16 bits

#define NUM_WPORTS_PER_BANK 1	// Number of write ports per PTB bank

uint8_t wports_available[NUM_PTB_BANKS];  // Bitmap for write port availability

#define IPREF_WFIFO_SIZE 4	// Number of entries in each write scheduling FIFO queue

// Write scheduling FIFO queue entry; size = PTB index in a bank + PTB tag + target + taken + valid = 8+12+58+1+1 bits = 80 bits
class IPrefWFIFOEntry {
   public:
   uint32_t ghist;
   uint64_t ip_block;
   uint64_t target;
   uint8_t  taken;
   bool     valid;
};

IPrefWFIFOEntry iprefWFIFO[NUM_PTB_BANKS][IPREF_WFIFO_SIZE];   // 8x4x80 = 2560 bits
uint32_t iprefWFIFOWP[NUM_PTB_BANKS];  // Write pointer for each write scheduling FIFO queue; size = 2x8 bits = 16 bits
uint32_t iprefWFIFORP[NUM_PTB_BANKS];  // Read pointer for each write scheduling FIFO queue; size = 2x8 bits = 16 bits

uint64_t last_ip_block;			// 58 bits
uint32_t ipref_global_hist;		// Block-grain global history (11 bits)
bool last_ip_block_valid; 		// 1 bit
bool ipref_last_target_nonzero;  	// 1 bit

void O3_CPU::l1i_prefetcher_initialize() 
{
   last_ip_block = 0;
   last_ip_block_valid = false;
   ipref_global_hist = 0;

   for (int i=0; i<NUM_IPREF_PTB_SETS; i++) for (int j=0; j<NUM_IPREF_PTB_WAYS; j++) iprefPTB[i][j].valid = false;
   for (int i=0; i<IPREF_FILTER_SIZE; i++) iprefFilter[i].valid = false;
   iprefFilterTailPtr = 0;

   ipref_last_target_nonzero = false;

   for (int i=0; i<NUM_QUEUES; i++) iprefFIFOWP[i] = 0;
   for (int i=0; i<NUM_QUEUES; i++) iprefFIFORP[i] = 0;
   for (int i=0; i<NUM_QUEUES; i++)for (int j=0; j<IPREF_FIFO_SIZE; j++) iprefFIFO[i][j].count = 0;
   fifoPointer = 0;

   for (int i=0; i<NUM_PTB_BANKS; i++) for (int j=0; j<IPREF_WFIFO_SIZE; j++) iprefWFIFO[i][j].valid = false;
   for (int i=0; i<NUM_PTB_BANKS; i++) iprefWFIFOWP[i] = 0;
   for (int i=0; i<NUM_PTB_BANKS; i++) iprefWFIFORP[i] = 0;
}

void O3_CPU::l1i_prefetcher_branch_operate(uint64_t ip, uint8_t branch_type, uint64_t branch_target)
{
   bool ptbhit;  				// Set on a PTB hit (1 bit)
   uint64_t ip_block, pred_target, this_block;	// 58+58+58 bits = 174 bits
   uint32_t spec_ghist;  			// Speculative copy of block-grain global history (11 bits)
   uint8_t counter;  				// 2 bits

   this_block = ip >> LOG2_BLOCK_SIZE;		// current block

   if (last_ip_block_valid) {
      if (this_block != last_ip_block) {
         if (ipref_last_target_nonzero) {
	    // Last dynamic branch was predicted taken
	    // The predicted taken target block is not taken
            if (wports_available[(ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1)]) {
	       // Write ports available in target PTB bank (least significant bits of set index is the bank number)
               iprefPTBAllocate (ipref_global_hist ^ last_ip_block, last_ip_block, this_block, 0);
               wports_available[(ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1)]--;
            }
            else {
 	       // Write ports not available in target PTB bank
	       // Need to enqueue this write in the write scheduling FIFO queue of the target PTB bank
               uint8_t bankid = (ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1);
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ghist = ipref_global_hist;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ip_block = last_ip_block;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].target = this_block;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].taken = 0;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].valid = true;
               iprefWFIFOWP[bankid] = (iprefWFIFOWP[bankid] + 1) % IPREF_WFIFO_SIZE;
            }
            ipref_global_hist = ipref_global_hist << 1;
         }
         else {
            if (this_block == (last_ip_block + 1)) {
               // Not taken
               if (wports_available[(ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1)]) {
                  // Write ports available in target PTB bank (least significant bits of set index is the bank number)
                  iprefPTBAllocate (ipref_global_hist ^ last_ip_block, last_ip_block, this_block, 0);
                  wports_available[(ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1)]--;
               }
               else {
                  // Write ports not available in target PTB bank
                  // Need to enqueue this write in the write scheduling FIFO queue of the target PTB bank
                  uint8_t bankid = (ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1);
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ghist = ipref_global_hist;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ip_block = last_ip_block;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].target = this_block;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].taken = 0;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].valid = true;
                  iprefWFIFOWP[bankid] = (iprefWFIFOWP[bankid] + 1) % IPREF_WFIFO_SIZE;
               }
               ipref_global_hist = ipref_global_hist << 1;
            }
            else {
               // Taken
               if (wports_available[(ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1)]) {
                  // Write ports available in target PTB bank (least significant bits of set index is the bank number)
                  iprefPTBAllocate (ipref_global_hist ^ last_ip_block, last_ip_block, this_block, 1);
                  wports_available[(ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1)]--;
               }
               else {
                  // Write ports not available in target PTB bank
                  // Need to enqueue this write in the write scheduling FIFO queue of the target PTB bank
                  uint8_t bankid = (ipref_global_hist ^ last_ip_block) & (NUM_PTB_BANKS - 1);
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ghist = ipref_global_hist;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ip_block = last_ip_block;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].target = this_block;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].taken = 1;
                  iprefWFIFO[bankid][iprefWFIFOWP[bankid]].valid = true;
                  iprefWFIFOWP[bankid] = (iprefWFIFOWP[bankid] + 1) % IPREF_WFIFO_SIZE;
               }
               ipref_global_hist = (ipref_global_hist << 1) | 1;
            }
         }
      }
   }

   if (branch_target) {
      if ((branch_target >> LOG2_BLOCK_SIZE) != this_block) {
         if ((branch_target >> LOG2_BLOCK_SIZE) == (this_block + 1)) {
            // Not taken
            if (wports_available[(ipref_global_hist ^ this_block) & (NUM_PTB_BANKS - 1)]) {
               // Write ports available in target PTB bank (least significant bits of set index is the bank number)
               iprefPTBAllocate (ipref_global_hist ^ this_block, this_block, branch_target >> LOG2_BLOCK_SIZE, 0);
               wports_available[(ipref_global_hist ^ this_block) & (NUM_PTB_BANKS - 1)]--;
            }
            else {
               // Write ports not available in target PTB bank
               // Need to enqueue this write in the write scheduling FIFO queue of the target PTB bank
               uint8_t bankid = (ipref_global_hist ^ this_block) & (NUM_PTB_BANKS - 1);
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ghist = ipref_global_hist;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ip_block = this_block;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].target = branch_target >> LOG2_BLOCK_SIZE;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].taken = 0;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].valid = true;
               iprefWFIFOWP[bankid] = (iprefWFIFOWP[bankid] + 1) % IPREF_WFIFO_SIZE;
            }
            ipref_global_hist = ipref_global_hist << 1;
         }
         else {
            // Taken
            if (wports_available[(ipref_global_hist ^ this_block) & (NUM_PTB_BANKS - 1)]) {
               // Write ports available in target PTB bank (least significant bits of set index is the bank number)
               iprefPTBAllocate (ipref_global_hist ^ this_block, this_block, branch_target >> LOG2_BLOCK_SIZE, 1);
               wports_available[(ipref_global_hist ^ this_block) & (NUM_PTB_BANKS - 1)]--;
            }
            else {
               // Write ports not available in target PTB bank
               // Need to enqueue this write in the write scheduling FIFO queue of the target PTB bank
               uint8_t bankid = (ipref_global_hist ^ this_block) & (NUM_PTB_BANKS - 1);
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ghist = ipref_global_hist;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].ip_block = this_block;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].target = branch_target >> LOG2_BLOCK_SIZE;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].taken = 1;
               iprefWFIFO[bankid][iprefWFIFOWP[bankid]].valid = true;
               iprefWFIFOWP[bankid] = (iprefWFIFOWP[bankid] + 1) % IPREF_WFIFO_SIZE;
            }
            ipref_global_hist = (ipref_global_hist << 1) | 1;
         }
      }

      // Inject prefetch to the block containing the branch target
      if (!iprefFilterLookup (branch_target >> LOG2_BLOCK_SIZE) && (IPREF_START_DEPTH == 0)) {
         prefetch_code_line(branch_target);
         iprefFilterInsert (branch_target >> LOG2_BLOCK_SIZE);
      }

      // Look ahead prefetch following the branch target
      ip_block = branch_target >> LOG2_BLOCK_SIZE;
      spec_ghist = ipref_global_hist;		// Speculative copy of block-grain global history
      if (rports_available[(spec_ghist ^ ip_block) & (NUM_PTB_BANKS - 1)]) {
         // Read ports available in target PTB bank (least significant bits of set index is the bank number)
         rports_available[(spec_ghist ^ ip_block) & (NUM_PTB_BANKS - 1)]--;
         ptbhit = iprefPTBLookup (spec_ghist ^ ip_block, ip_block, &pred_target, &counter);
         if (!ptbhit || (counter < 2)) {
            // Predicted not taken
            pred_target = ip_block + 1;
            spec_ghist = spec_ghist << 1;
         }
         else {
            // Predicted taken
            spec_ghist = (spec_ghist << 1) | 1;
         }
         ip_block = pred_target;

	 // Inject prefetch
         if (!iprefFilterLookup (pred_target) && (IPREF_START_DEPTH <= 1)) {
            prefetch_code_line(pred_target << LOG2_BLOCK_SIZE);
            iprefFilterInsert (pred_target);
         }
         // Remaining look ahead prefetches will be issued gradually from the read scheduling queues
	 // Set pending count of prefetches to IPREF_LOOKAHEAD - 1
         iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].count = IPREF_LOOKAHEAD - 1;
      }
      else {
         // Read port not available; all lookahead prefetches will be issued gradually from the read scheduling queues
         iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].count = IPREF_LOOKAHEAD;
      }
      // Set up the remaining fields of the read scheduling FIFO queue entry
      iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].ghist = spec_ghist;
      iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].ip_block = ip_block;
      iprefFIFOWP[fifoPointer] = (iprefFIFOWP[fifoPointer] + 1) % IPREF_FIFO_SIZE;
      fifoPointer = (fifoPointer + 1) % NUM_QUEUES;
   }

   if (last_ip_block != this_block) {
      // Inject prefetch for the current block
      if (!iprefFilterLookup (this_block) && (IPREF_START_DEPTH == 0)) {
         prefetch_code_line(ip);
         iprefFilterInsert (this_block);
      }
      if (!branch_target) {
         // Look ahead prefetch following the current block
         ip_block = this_block;
         spec_ghist = ipref_global_hist;	// Speculative copy of global history
         if (rports_available[(spec_ghist ^ ip_block) & (NUM_PTB_BANKS - 1)]) {
            // Read ports available in the target PTB bank
            rports_available[(spec_ghist ^ ip_block) & (NUM_PTB_BANKS - 1)]--;
            ptbhit = iprefPTBLookup (spec_ghist ^ ip_block, ip_block, &pred_target, &counter);
            if (!ptbhit || (counter < 2)) {
	       // Predicted not taken
               pred_target = ip_block + 1;
               spec_ghist = spec_ghist << 1;
            }
            else {
               // Predicted taken
               spec_ghist = (spec_ghist << 1) | 1;
            }
            ip_block = pred_target;

	    // Inject prefetch
            if (!iprefFilterLookup (pred_target) && (IPREF_START_DEPTH <= 1)) {
               prefetch_code_line(pred_target << LOG2_BLOCK_SIZE);
               iprefFilterInsert (pred_target);
            }
            // Remaining look ahead prefetches will be issued gradually from the read scheduling queues
            // Set pending count of prefetches to IPREF_LOOKAHEAD - 1
            iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].count = IPREF_LOOKAHEAD - 1;
         }
         else {
            // Read port not available; all lookahead prefetches will be issued gradually from the read scheduling queues
            iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].count = IPREF_LOOKAHEAD;
         }
         // Set up the remaining fields of the read scheduling FIFO queue entry
         iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].ghist = spec_ghist;
         iprefFIFO[fifoPointer][iprefFIFOWP[fifoPointer]].ip_block = ip_block;
         iprefFIFOWP[fifoPointer] = (iprefFIFOWP[fifoPointer] + 1) % IPREF_FIFO_SIZE;
         fifoPointer = (fifoPointer + 1) % NUM_QUEUES;
      }
   }

   if (branch_target) {
      last_ip_block = branch_target >> LOG2_BLOCK_SIZE;
      ipref_last_target_nonzero = true;
   }
   else {
      last_ip_block = ip >> LOG2_BLOCK_SIZE;
      ipref_last_target_nonzero = false;
   }
   last_ip_block_valid = true;
}

void O3_CPU::l1i_prefetcher_cache_operate(uint64_t addr, uint8_t cache_hit, uint8_t prefetch_hit)
{
   // Insert demand block address into recent access filter
   if (!iprefFilterLookup (addr >> LOG2_BLOCK_SIZE)) iprefFilterInsert (addr >> LOG2_BLOCK_SIZE);
}

void O3_CPU::l1i_prefetcher_cycle_operate()
{
   // Each cycle, the pending PTB writes and reads are scheduled according to port availability

   uint64_t ip_block, spec_ghist, pred_target;  // 12+11+58 = 81 bits
   uint8_t  counter;  				// 2 bits
   bool     ptbhit;  				// 1 bit

   // Initialize write and read port availability
   for (int i=0; i<NUM_PTB_BANKS; i++) wports_available[i] = NUM_WPORTS_PER_BANK;
   for (int i=0; i<NUM_PTB_BANKS; i++) rports_available[i] = NUM_RPORTS_PER_BANK;

   // Write scheduling loop that iterates over the PTB banks
   for (uint8_t i=0; i<NUM_PTB_BANKS; i++) {
      if (iprefWFIFO[i][iprefWFIFORP[i]].valid) {  // Read pointer points to a valid entry
         assert(i == ((iprefWFIFO[i][iprefWFIFORP[i]].ghist ^ iprefWFIFO[i][iprefWFIFORP[i]].ip_block) & (NUM_PTB_BANKS - 1)));
         if (wports_available[i]) {
            // Write port available
            iprefPTBAllocate (iprefWFIFO[i][iprefWFIFORP[i]].ghist ^ iprefWFIFO[i][iprefWFIFORP[i]].ip_block, iprefWFIFO[i][iprefWFIFORP[i]].ip_block, iprefWFIFO[i][iprefWFIFORP[i]].target, iprefWFIFO[i][iprefWFIFORP[i]].taken);
            wports_available[i]--;
            iprefWFIFO[i][iprefWFIFORP[i]].valid = false;
            iprefWFIFORP[i] = (iprefWFIFORP[i] + 1) % IPREF_WFIFO_SIZE;
         }
      }
   }

   // Read scheduling loop that iterates over the read scheduling FIFO queues 
   for (uint8_t i=0; i<NUM_QUEUES; i++) {
      if (iprefFIFO[i][iprefFIFORP[i]].count) {  // Read pointer points to an entry with pending prefetches
         spec_ghist = iprefFIFO[i][iprefFIFORP[i]].ghist;
         ip_block = iprefFIFO[i][iprefFIFORP[i]].ip_block;
         if (rports_available[(spec_ghist ^ ip_block) & (NUM_PTB_BANKS - 1)]) {
            // Read ports available
            rports_available[(spec_ghist ^ ip_block) & (NUM_PTB_BANKS - 1)]--;
            ptbhit = iprefPTBLookup (spec_ghist ^ ip_block, ip_block, &pred_target, &counter);
            if (!ptbhit || (counter < 2)) {
               // Predicted not taken
               pred_target = ip_block + 1;
               spec_ghist = spec_ghist << 1;
            }
            else {
               spec_ghist = (spec_ghist << 1) | 1;
            }
            ip_block = pred_target;
           
            // Inject prefetch
            if (!iprefFilterLookup (pred_target) && ((IPREF_LOOKAHEAD - iprefFIFO[i][iprefFIFORP[i]].count + 1) >= IPREF_START_DEPTH)) {
               prefetch_code_line(pred_target << LOG2_BLOCK_SIZE);
               iprefFilterInsert (pred_target);
            }
            iprefFIFO[i][iprefFIFORP[i]].count--;	// Account for the prefetch
	    // If all prefetches are injected, advance read pointer to the next FIFO entry
            if (iprefFIFO[i][iprefFIFORP[i]].count == 0) iprefFIFORP[i] = (iprefFIFORP[i] + 1) % IPREF_FIFO_SIZE;
            else {
               iprefFIFO[i][iprefFIFORP[i]].ghist = spec_ghist;
               iprefFIFO[i][iprefFIFORP[i]].ip_block = ip_block;
            }
         }
      }
   }
}

void O3_CPU::l1i_prefetcher_cache_fill(uint64_t v_addr, uint32_t set, uint32_t way, uint8_t prefetch, uint64_t evicted_v_addr)
{

}

void O3_CPU::l1i_prefetcher_final_stats()
{

}
