ChorusOS 5.0 Board Support Package Developer's Guide

Stuck Interrupts

A persistently asserted interrupt will severely affect system performance, almost certainly stalling a single processor board. An interrupt handler needs to be able to identify whether it has been called as a result of a hoax interrupt.

A hardened driver's interrupt handler will return a BUS_INTR_UNCLAIMED result unless it detects that the device legitimately asserted the interrupt. Conceptually, an interrupt is legitimate if the device actually requires the driver to do some useful work.

A hardened bus driver is able to detect whether an interrupt line is defective. It disables the defective interrupt line (through the bus controller) and notifies any attached child drivers by calling their event handler, specifying a BUS_INTR_DEFECTIVE event, and passing the child driver the interrupt identifier as an argument.

To detect a defective interrupt line, a bus driver should maintain a count of unclaimed interrupts for each interrupt line. The bus driver may count unclaimed interrupts occurring between two claimed interrupts, resetting the total when an interrupt is claimed. Alternatively, it may count the unclaimed interrupts occurring during a given, configurable period of time, resetting the counter on a time-out invocation. In both cases, if the counter reaches a predetermined, configurable watermark, the bus driver should consider the interrupt line defective. Note that, in such a model, all devices sharing the same interrupt line will fail if stuck interrupts are detected on that line.

Code Example 13-6 illustrates how stuck interrupts may be detected by both the bus and device driver interrupt handlers. The Raven handler counts consecutive unclaimed interrupts, and raises a PCI_INTR_DEFECTIVE event when this count reaches a configured value. This handler also forbids enabling defective interrupt lines.


Example 13-6 Raven interrupt handler

#define IS_INTR_DEFECTIVE(raven, l)  (raven->unclaimed[(l)] == (uint32_f)-1)
#define SET_INTR_DEFECTIVE(raven, l) (raven->unclaimed[(l)]  = (uint32_f)-1)

    static void
unmask (PciIntrId intrId)
{
    RavenData* raven = ((PciIntr*)intrId)->devId->pciId;

        /*
         * Check if interrupt line is defective
         */
    if (IS_INTR_DEFECTIVE(raven, ((PciIntr*)intrId)->intrLine)) {
        return;
    }
        /*
         * Mask all PCI interrupts while working on MPIC registers
         */
    raven->intrOps->mask(raven->intrId);
    OPIC_INTR_UNMASK(raven->mpicIoOps,
                     raven->mpicIoId,
                     ((PciIntr*)intrId)->intrLine);
    raven->intrOps->unmask(raven->intrId);
}

    /*
     * Declare an interrupt line as defective
     */
    static void
intrDefective(RavenData* raven, uint32_f line)
{
   PciIntr* intr;
   PciDev*  dev;
        /*
         * Mask defective interrupt line at interrupt controller level
         */
    raven->intrOps->mask(raven->intrId);
    OPIC_INTR_MASK(raven->mpicIoOps, raven->mpicIoId, line);
    SET_INTR_DEFECTIVE(raven, line);
    raven->intrOps->unmask(raven->intrId);
        /*
         * Raise an event to all devices attached to this interrupt.
         * Interrupt identifier is passed as a specific argument.
         */
   for (intr = raven->intr[line] ; intr ; intr = intr->next) {
       dev = intr->devId;
       if (dev->evtHandler) {
          dev->evtHandler(dev->cookie, PCI_INTR_DEFECTIVE, (PciIntrId)intr);
       }
   }
}
        /*
         * PowerPC external interrupts handler.
         * It is called from DKI after context have been saved in stack.
         * This handler manages the RAVEN internal MPIC which is OpenPIC
         * compliant.
         */
    static CpuIntrStatus
intrHandler (RavenData* raven)
{
   uint32_f      vector;
   PciIntrStatus intrStatus = PCI_INTR_UNCLAIMED;
   uint32_f      cpu        = mfspr_PIR ();  /* processor id register */
   PciIoOps*     mpicIoOps  = raven->mpicIoOps;
   PciIoId       mpicIoId   = raven->mpicIoId;
   CpuIntrOps*   intrOps    = raven->intrOps;
   CpuIntrId     intrId     = raven->intrId;
   PciIntr*      pciIntr;
   int           claimed    = 0;
       /*
        * Get vector to identify the interrupt source
        */
   vector = OPIC_INTR_ACKNOWLEDGE(mpicIoOps, mpicIoId, cpu);

       /*
        * Ignore spurious interrupt requests
        */
   if (vector == MPIC_SPURIOUS_INTR_VECTOR) {
      raven->spurious++;
      return CPU_INTR_CLAIMED;
   }

       /*
        * Enable external interrupts on CPU
        */
   intrOps->unmask(intrId);
       /*
        * Call device handlers attached to this interrupt vector
        */
   for (pciIntr = raven->intr[vector] ;
        pciIntr ;
        pciIntr = pciIntr->next) {
       intrStatus = pciIntr->intrHandler(pciIntr->intrCookie);
       if (intrStatus != PCI_INTR_UNCLAIMED) {
           claimed++;
       }
   }
       /*
        * Disable external interrupts on CPU
        */
   intrOps->mask(intrId);

   if (intrStatus == PCI_INTR_ACKNOWLEDGED) {
           /*
            * Interrupt handler has already done:
            * - enable()
            * - ....
            * - disable()
            * So we just:
            * - reset task priority to re-enable lower priority interrupts
            * - unmask current interrupt (masked by disable()).
            */
       OPIC_CURRENT_TASK_SET_PRIORITY(mpicIoOps, mpicIoId, cpu,
                                      OPIC_PRIORITY_MIN);
       OPIC_INTR_UNMASK(mpicIoOps, mpicIoId,  vector);
   } else {
           /*
            * Interrupt was just serviced by the handler.
            * Send a non-specific EOI command to open PIC
            */
       OPIC_INTR_EOI(mpicIoOps, mpicIoId, cpu);
   }

   if (claimed == 0) {
           /*
            * Increment unclaimed counter and check against max.
            */
       if (++(raven->unclaimed[vector]) > raven->maxUnclaimed) {
           intrDefective(raven, vector);
       }
   } else {
       raven->unclaimed[vector] = 0; /* Reset unclaimed counter */
   }

   return CPU_INTR_CLAIMED;
}

The dec21x4x interrupt handler, shown in code Example 13-7, checks for unexpected interrupts by masking them from the interrupt status register that is read. If an unexpected interrupt is received, it is considered unclaimed.


Example 13-7 dec21x4x interrupt handler

    /* 
     * The interrupt handler 
     */
    static PciIntrStatus
intrHandler (void* cookie)
{
    Dec21Data* dec21 = (Dec21Data*)cookie;
    uint32_f   csr5;
        /*
         * Get current status and acknowledge all interrupt sources ASAP.
         */
    csr5 = dec21->pciIoOps->load_32(dec21->pciIoId, CSR5);
    dec21->pciIoOps->store_32(dec21->pciIoId, CSR5, csr5);

#ifdef DEBUG_DEC21
    sysLog("%s: intrHandler csr5=0x%08x\n", dec21->path, csr5);
#endif
        /*
         * Check if an unmasked interrupt is pending
         */
    csr5 &= dec21->csr7;
    if (csr5 == 0) {
        return PCI_INTR_UNCLAIMED;
    }

        /*
         * Process Rx interrupt
         */
    if (csr5 & CSR5_RI) {
        CSR7_INTR_MASK(dec21, CSR7_RIE);
        dec21->clientOps->receiptNotify(dec21->clientCookie);
    }
        /*
         * Process Tx interrupt
         */
    if (csr5 & CSR5_TI) {
        CSR7_INTR_MASK(dec21, CSR7_TIE);        
        dec21->clientOps->transmitNotify(dec21->clientCookie);
    }
        /*
         * Process errors, if Abnormal error summary bit is set.
         */
    if (csr5 & CSR5_AIS) {
        intrErr(dec21, csr5);
    }

    return PCI_INTR_CLAIMED;
}

Refer to the ChorusOS man pages section 9DDI: Device Driver Interfaces for details about bus interrupt handling interfaces