Fault-tolerant cache coherence protocols for CMPs: Evaluation and trade-offs. Fernández-Pascual, R., García, J., M., Acacio, M., E., & Duato, J. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 5374 LNCS:555-568, 2008.
abstract   bibtex   
One way of dealing with transient faults that will affect the interconnection network of future large-scale Chip Multiprocessor (CMP) systems is by extending the cache coherence protocol. Fault tolerance at the level of the cache coherence protocol has been proven to achieve very low performance overhead in absence of faults while being able to support very high fault rates. In this work, we compare two already proposed fault-tolerant cache coherence protocols in a common framework and present a new one based in the cache coherence protocol used in AMD Opteron processors. Also, we thoroughly evaluate the performance of the three protocols, show how to adjust the fault tolerance parameters of the protocols to achieve a desired level of fault tolerance and measure the overhead achieved to be able to support very high transient fault rates. © 2008 Springer Berlin Heidelberg.
@article{
 title = {Fault-tolerant cache coherence protocols for CMPs: Evaluation and trade-offs},
 type = {article},
 year = {2008},
 identifiers = {[object Object]},
 pages = {555-568},
 volume = {5374 LNCS},
 id = {f6819278-f39e-35ef-b249-264b0cae1570},
 created = {2020-12-28T19:42:13.197Z},
 file_attached = {false},
 profile_id = {510a24b0-13c9-315d-ad34-c763f18f9d3e},
 group_id = {b2013bd2-d1ee-3382-aeb3-a063d2537a44},
 last_modified = {2020-12-28T19:42:13.197Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Fernandez-Pascual2008a},
 private_publication = {false},
 abstract = {One way of dealing with transient faults that will affect the interconnection network of future large-scale Chip Multiprocessor (CMP) systems is by extending the cache coherence protocol. Fault tolerance at the level of the cache coherence protocol has been proven to achieve very low performance overhead in absence of faults while being able to support very high fault rates. In this work, we compare two already proposed fault-tolerant cache coherence protocols in a common framework and present a new one based in the cache coherence protocol used in AMD Opteron processors. Also, we thoroughly evaluate the performance of the three protocols, show how to adjust the fault tolerance parameters of the protocols to achieve a desired level of fault tolerance and measure the overhead achieved to be able to support very high transient fault rates. © 2008 Springer Berlin Heidelberg.},
 bibtype = {article},
 author = {Fernández-Pascual, Ricardo and García, José M. and Acacio, Manuel E. and Duato, José},
 journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}
}
Downloads: 0