intel
diff --git a/‎visa/BuildIR.h
Lines changed: 1 addition & 0 deletions b/‎visa/BuildIR.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎visa/GraphColor.cpp
Lines changed: 45 additions & 1 deletion b/‎visa/GraphColor.cpp
Lines changed: 45 additions & 1 deletion
diff --git a/‎visa/GraphColor.h
Lines changed: 24 additions & 0 deletions b/‎visa/GraphColor.h
Lines changed: 24 additions & 0 deletions
@@ -641,6 +641,7 @@ class IR_Builder
     void createBuiltinDecls();
 
     G4_Declare* getSpillFillHeader();
+    bool hasValidSpillFillHeader() { return spillFillHeader; }
 
     G4_Declare* getEUFusionWATmpVar();
 
 
@@ -7534,7 +7534,17 @@ bool GraphColor::regAlloc(
     if (reserveSpillReg)
     {
         failSafeIter = reserveSpillReg;
-        gra.determineSpillRegSize(spillRegSize, indrSpillRegSize);
+
+        if (kernel.getOption(vISA_NewFailSafeRA))
+        {
+            spillRegSize = gra.getNumReservedGRFs();
+            indrSpillRegSize = 0;
+        }
+        else
+        {
+            gra.determineSpillRegSize(spillRegSize, indrSpillRegSize);
+        }
+
         reserveSpillSize = spillRegSize + indrSpillRegSize;
         MUST_BE_TRUE(reserveSpillSize < kernel.getNumCalleeSaveRegs(), "Invalid reserveSpillSize in fail-safe RA!");
         totalGRFRegCount -= reserveSpillSize;
@@ -10908,6 +10918,36 @@ int GlobalRA::coloringRegAlloc()
                     loopSplit.run();
                 }
 
+                // Very few spills in this iter. Check if we can convert this to fail safe iter.
+                // By converting this iter to fail safe we can save (at least) 1 additional iter
+                // to allocate spilled temps. But converting to fail safe needs extra checks
+                // because no reserved GRF may exist at this point. So push/pop needs to succeed
+                // without additional GRF potentially.
+                if (!kernel.getOption(vISA_Debug) &&
+                    iterationNo >= 1 && kernel.getOption(vISA_NewFailSafeRA) && !reserveSpillReg &&
+                    coloring.getSpilledLiveRanges().size() <= BoundedRA::MaxSpillNumVars &&
+                    liveAnalysis.getNumSelectedVar() > BoundedRA::LargeProgramSize)
+                {
+                    // Stack call always has free GRF so it is safe to convert this iter to fail safe
+                    if (builder.usesStack() ||
+                        // If LSC has to be used for spill/fill then we need to ensure spillHeader is created
+                        (!useLscForNonStackCallSpillFill || builder.hasValidSpillFillHeader()) ||
+                        // If scratch is to be used then max spill offset must be within addressable range
+                        ((nextSpillOffset + BoundedRA::getNumPhyVarSlots(kernel)) < SCRATCH_MSG_LIMIT))
+                    {
+                        // Few ranges are spilled but this was not executed as fail
+                        // safe iteration. However, we've the capability of doing
+                        // push/pop with new fail safe RA implementation. So for very
+                        // few spills, we insert push/pop to free up some GRFs rather
+                        // than executing a new RA iteration. When doing so, we mark
+                        // this RA iteration as fail safe.
+                        reserveSpillReg = true;
+                        coloring.markFailSafeIter(true);
+                        // No reserved GRFs
+                        setNumReservedGRFsFailSafe(0);
+                    }
+                }
+
                 //Calculate the spill caused by send to decide if global splitting is required or not
                 for (auto spilled : coloring.getSpilledLiveRanges())
                 {
@@ -11037,6 +11077,10 @@ int GlobalRA::coloringRegAlloc()
 
                 if (!reserveSpillReg && !disableSpillCoalecse && builder.useSends())
                 {
+                    if (builder.getOption(vISA_RATrace))
+                    {
+                        std::cout << "\t--spill/fill cleanup\n";
+                    }
                     CoalesceSpillFills c(kernel, liveAnalysis, coloring, spillGRF, iterationNo, rpe, *this);
                     c.run();
                 }
 
@@ -611,6 +611,7 @@ namespace vISA
         G4_SrcRegRegion* getScratchSurface() const;
         LiveRange** getLRs() const { return lrs; }
         unsigned int getNumVars() const { return numVar; }
+        void markFailSafeIter(bool f) { failSafeIter = f; }
     };
 
     struct BundleConflict
@@ -797,6 +798,8 @@ namespace vISA
         uint32_t numGRFSpill = 0;
         uint32_t numGRFFill = 0;
 
+        unsigned int numReservedGRFsFailSafe = BoundedRA::NOT_FOUND;
+
         bool spillFillIntrinUsesLSC(G4_INST* spillFillIntrin);
         void expandFillLSC(G4_BB* bb, INST_LIST_ITER& instIt);
         void expandSpillLSC(G4_BB* bb, INST_LIST_ITER& instIt);
@@ -1310,6 +1313,27 @@ namespace vISA
         {
             dontRemat.insert(inst);
         }
+
+        unsigned int getNumReservedGRFs()
+        {
+            // Return # GRFs reserved for new fail safe mechanism
+            // 1. If fail safe mechanism is invoked before coloring then
+            //    # reserved GRFs is updated explicitly before this method
+            //    is invoked.
+            // 2. If a regular (ie, non-fail safe) RA iteration spill
+            //    very little then we may convert it to fail safe but with
+            //    0 reserved GRFs as it it too late to reserve a GRF after
+            //    coloring.
+            if (numReservedGRFsFailSafe == BoundedRA::NOT_FOUND)
+                numReservedGRFsFailSafe = kernel.getSimdSize() == kernel.numEltPerGRF<Type_UD>() ? 1 : 2;
+
+            return numReservedGRFsFailSafe;
+        }
+
+        void setNumReservedGRFsFailSafe(unsigned int num)
+        {
+            numReservedGRFsFailSafe = num;
+        }
     };
 
     inline G4_Declare* Interference::getGRFDclForHRA(int GRFNum) const