BibBase dehnavi, m

2025 (2)

SLoPe: Double-Pruned Sparse Plus Lazy Low-Rank Adapter Pretraining of LLMs. Mozaffari, M.; Yazdanbakhsh, A.; Zhang, Z.; and Dehnavi, M. M. In The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025, 2025. OpenReview.net

SLoPe: Double-Pruned Sparse Plus Lazy Low-Rank Adapter Pretraining of LLMs [link]

Paper link bibtex 1 download

@inproceedings{DBLP:conf/iclr/MozaffariYZD25,
  author       = {Mohammad Mozaffari and
                  Amir Yazdanbakhsh and
                  Zhao Zhang and
                  Maryam Mehri Dehnavi},
  title        = {SLoPe: Double-Pruned Sparse Plus Lazy Low-Rank Adapter Pretraining
                  of LLMs},
  booktitle    = {The Thirteenth International Conference on Learning Representations,
                  {ICLR} 2025, Singapore, April 24-28, 2025},
  publisher    = {OpenReview.net},
  year         = {2025},
  url          = {https://openreview.net/forum?id=lqHv6dxBkj},
  timestamp    = {Thu, 15 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/MozaffariYZD25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Adaptive Algebraic Reuse of Reordering in Cholesky Factorization with Dynamic Sparsity Pattern. Zarebavani, B.; Kaufman, D. M.; Levin, D. I. W.; and Dehnavi, M. M. CoRR, abs/2501.04011. 2025.

Adaptive Algebraic Reuse of Reordering in Cholesky Factorization with Dynamic Sparsity Pattern [link]

Paper doi link bibtex 2 downloads

@article{DBLP:journals/corr/abs-2501-04011,
  author       = {Behrooz Zarebavani and
                  Danny M. Kaufman and
                  David I. W. Levin and
                  Maryam Mehri Dehnavi},
  title        = {Adaptive Algebraic Reuse of Reordering in Cholesky Factorization with
                  Dynamic Sparsity Pattern},
  journal      = {CoRR},
  volume       = {abs/2501.04011},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2501.04011},
  doi          = {10.48550/ARXIV.2501.04011},
  eprinttype    = {arXiv},
  eprint       = {2501.04011},
  timestamp    = {Tue, 18 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2501-04011.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2024 (5)

Corrigendum to "Development of a knowledge-sharing parallel computing approach for calibrating distributed watershed hydrologic models" [Environ. Model. Software 164 (2023) 105708]. Asgari, M.; Yang, W.; Lindsay, J. B.; Shao, H.; Liu, Y.; de Queiroga Miranda, R.; and Dehnavi, M. M. Environ. Model. Softw., 175: 105822. 2024.

Paper doi link bibtex

@article{DBLP:journals/envsoft/AsgariYLSLMD24,
  author       = {Marjan Asgari and
                  Wanhong Yang and
                  John B. Lindsay and
                  Hui Shao and
                  Yongbo Liu and
                  Rodrigo de Queiroga Miranda and
                  Maryam Mehri Dehnavi},
  title        = {Corrigendum to &quot;Development of a knowledge-sharing parallel computing
                  approach for calibrating distributed watershed hydrologic models&quot;
                  [Environ. Model. Software 164 {(2023)} 105708]},
  journal      = {Environ. Model. Softw.},
  volume       = {175},
  pages        = {105822},
  year         = {2024},
  url          = {https://doi.org/10.1016/j.envsoft.2023.105822},
  doi          = {10.1016/J.ENVSOFT.2023.105822},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/envsoft/AsgariYLSLMD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

SpEQ: Translation of Sparse Codes using Equivalences. Laird, A.; Liu, B.; Bjørner, N. S.; and Dehnavi, M. M. Proc. ACM Program. Lang., 8(PLDI): 1680–1703. 2024.

SpEQ: Translation of Sparse Codes using Equivalences [link]

Paper doi link bibtex

@article{DBLP:journals/pacmpl/LairdLBD24,
  author       = {Avery Laird and
                  Bangtian Liu and
                  Nikolaj S. Bj{\o}rner and
                  Maryam Mehri Dehnavi},
  title        = {SpEQ: Translation of Sparse Codes using Equivalences},
  journal      = {Proc. {ACM} Program. Lang.},
  volume       = {8},
  number       = {{PLDI}},
  pages        = {1680--1703},
  year         = {2024},
  url          = {https://doi.org/10.1145/3656445},
  doi          = {10.1145/3656445},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/pacmpl/LairdLBD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

A Framework for Fine-Grained Synchronization of Dependent GPU Kernels. Jangda, A.; Maleki, S.; Dehnavi, M. M.; Musuvathi, M.; and Saarikivi, O. In Grosser, T.; Dubach, C.; Steuwer, M.; Xue, J.; Ottoni, G.; and Pereira, e. M. Q., editor(s), IEEE/ACM International Symposium on Code Generation and Optimization, CGO 2024, Edinburgh, United Kingdom, March 2-6, 2024, pages 93–105, 2024. IEEE

A Framework for Fine-Grained Synchronization of Dependent GPU Kernels [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/cgo/JangdaMDMS24,
  author       = {Abhinav Jangda and
                  Saeed Maleki and
                  Maryam Mehri Dehnavi and
                  Madan Musuvathi and
                  Olli Saarikivi},
  editor       = {Tobias Grosser and
                  Christophe Dubach and
                  Michel Steuwer and
                  Jingling Xue and
                  Guilherme Ottoni and
                  ernando Magno Quint{\~{a}}o Pereira},
  title        = {A Framework for Fine-Grained Synchronization of Dependent {GPU} Kernels},
  booktitle    = {{IEEE/ACM} International Symposium on Code Generation and Optimization,
                  {CGO} 2024, Edinburgh, United Kingdom, March 2-6, 2024},
  pages        = {93--105},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/CGO57630.2024.10444873},
  doi          = {10.1109/CGO57630.2024.10444873},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cgo/JangdaMDMS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

SLoPe: Double-Pruned Sparse Plus Lazy Low-Rank Adapter Pretraining of LLMs. Mozaffari, M.; Yazdanbakhsh, A.; Zhang, Z.; and Dehnavi, M. M. CoRR, abs/2405.16325. 2024.

Paper doi link bibtex

@article{DBLP:journals/corr/abs-2405-16325,
  author       = {Mohammad Mozaffari and
                  Amir Yazdanbakhsh and
                  Zhao Zhang and
                  Maryam Mehri Dehnavi},
  title        = {SLoPe: Double-Pruned Sparse Plus Lazy Low-Rank Adapter Pretraining
                  of LLMs},
  journal      = {CoRR},
  volume       = {abs/2405.16325},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2405.16325},
  doi          = {10.48550/ARXIV.2405.16325},
  eprinttype    = {arXiv},
  eprint       = {2405.16325},
  timestamp    = {Tue, 18 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2405-16325.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

SLiM: One-shot Quantized Sparse Plus Low-rank Approximation of LLMs. Mozaffari, M.; and Dehnavi, M. M. CoRR, abs/2410.09615. 2024.

SLiM: One-shot Quantized Sparse Plus Low-rank Approximation of LLMs [link]

Paper doi link bibtex

@article{DBLP:journals/corr/abs-2410-09615,
  author       = {Mohammad Mozaffari and
                  Maryam Mehri Dehnavi},
  title        = {SLiM: One-shot Quantized Sparse Plus Low-rank Approximation of LLMs},
  journal      = {CoRR},
  volume       = {abs/2410.09615},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2410.09615},
  doi          = {10.48550/ARXIV.2410.09615},
  eprinttype    = {arXiv},
  eprint       = {2410.09615},
  timestamp    = {Fri, 22 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2410-09615.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2023 (7)

Development of a knowledge-sharing parallel computing approach for calibrating distributed watershed hydrologic models. Asgari, M.; Yang, W.; Lindsay, J. B.; Shao, H.; Liu, Y.; de Queiroga Miranda, R.; and Dehnavi, M. M. Environ. Model. Softw., 164: 105708. 2023.

Development of a knowledge-sharing parallel computing approach for calibrating distributed watershed hydrologic models [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/envsoft/AsgariYLSLMD23,
  author       = {Marjan Asgari and
                  Wanhong Yang and
                  John B. Lindsay and
                  Hui Shao and
                  Yongbo Liu and
                  Rodrigo de Queiroga Miranda and
                  Maryam Mehri Dehnavi},
  title        = {Development of a knowledge-sharing parallel computing approach for
                  calibrating distributed watershed hydrologic models},
  journal      = {Environ. Model. Softw.},
  volume       = {164},
  pages        = {105708},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.envsoft.2023.105708},
  doi          = {10.1016/J.ENVSOFT.2023.105708},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/envsoft/AsgariYLSLMD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Register Tiling for Unstructured Sparsity in Neural Network Inference. Wilkinson, L.; Cheshmi, K.; and Dehnavi, M. M. Proc. ACM Program. Lang., 7(PLDI): 1995–2020. 2023.

Register Tiling for Unstructured Sparsity in Neural Network Inference [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/pacmpl/WilkinsonCD23,
  author       = {Lucas Wilkinson and
                  Kazem Cheshmi and
                  Maryam Mehri Dehnavi},
  title        = {Register Tiling for Unstructured Sparsity in Neural Network Inference},
  journal      = {Proc. {ACM} Program. Lang.},
  volume       = {7},
  number       = {{PLDI}},
  pages        = {1995--2020},
  year         = {2023},
  url          = {https://doi.org/10.1145/3591302},
  doi          = {10.1145/3591302},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/pacmpl/WilkinsonCD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

MKOR: Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 Updates. Mozaffari, M.; Li, S.; Zhang, Z.; and Dehnavi, M. M. In Oh, A.; Naumann, T.; Globerson, A.; Saenko, K.; Hardt, M.; and Levine, S., editor(s), Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023, 2023.

MKOR: Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 Updates [link]

Paper link bibtex 4 downloads

@inproceedings{DBLP:conf/nips/MozaffariLZD23,
  author       = {Mohammad Mozaffari and
                  Sikan Li and
                  Zhao Zhang and
                  Maryam Mehri Dehnavi},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {{MKOR:} Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1
                  Updates},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/39bc6e3cbf5a1991d33dc10ebff9a9cf-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/MozaffariLZD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Runtime Composition of Iterations for Fusing Loop-carried Sparse Dependence. Cheshmi, K.; Strout, M.; and Dehnavi, M. M. In Arnold, D.; Badia, R. M.; and Mohror, K. M., editor(s), Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2023, Denver, CO, USA, November 12-17, 2023, pages 89:1–89:15, 2023. ACM

Runtime Composition of Iterations for Fusing Loop-carried Sparse Dependence [link]

Paper doi link bibtex 1 download

@inproceedings{DBLP:conf/sc/CheshmiSD23,
  author       = {Kazem Cheshmi and
                  Michelle Strout and
                  Maryam Mehri Dehnavi},
  editor       = {Dorian Arnold and
                  Rosa M. Badia and
                  Kathryn M. Mohror},
  title        = {Runtime Composition of Iterations for Fusing Loop-carried Sparse Dependence},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2023, Denver, CO, USA, November
                  12-17, 2023},
  pages        = {89:1--89:15},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3581784.3607097},
  doi          = {10.1145/3581784.3607097},
  timestamp    = {Mon, 24 Jun 2024 15:20:25 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/CheshmiSD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, PPoPP 2023, Montreal, QC, Canada, 25 February 2023 - 1 March 2023. Dehnavi, M. M.; Kulkarni, M.; and Krishnamoorthy, S., editors. ACM. 2023.

Paper doi link bibtex 1 download

@proceedings{DBLP:conf/ppopp/2023,
  editor       = {Maryam Mehri Dehnavi and
                  Milind Kulkarni and
                  Sriram Krishnamoorthy},
  title        = {Proceedings of the 28th {ACM} {SIGPLAN} Annual Symposium on Principles
                  and Practice of Parallel Programming, PPoPP 2023, Montreal, QC, Canada,
                  25 February 2023 - 1 March 2023},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3572848},
  doi          = {10.1145/3572848},
  isbn         = {979-8-4007-0015-6},
  timestamp    = {Wed, 22 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ppopp/2023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

A Framework for Fine-Grained Synchronization of Dependent GPU Kernels. Jangda, A.; Maleki, S.; Dehnavi, M. M.; Musuvathi, M.; and Saarikivi, O. CoRR, abs/2305.13450. 2023.

Paper doi link bibtex 2 downloads

@article{DBLP:journals/corr/abs-2305-13450,
  author       = {Abhinav Jangda and
                  Saeed Maleki and
                  Maryam Mehri Dehnavi and
                  Madan Musuvathi and
                  Olli Saarikivi},
  title        = {A Framework for Fine-Grained Synchronization of Dependent {GPU} Kernels},
  journal      = {CoRR},
  volume       = {abs/2305.13450},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.13450},
  doi          = {10.48550/ARXIV.2305.13450},
  eprinttype    = {arXiv},
  eprint       = {2305.13450},
  timestamp    = {Mon, 05 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-13450.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

MKOR: Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1 Updates. Mozaffari, M.; Li, S.; Zhang, Z.; and Dehnavi, M. M. CoRR, abs/2306.01685. 2023.

Paper doi link bibtex 4 downloads

@article{DBLP:journals/corr/abs-2306-01685,
  author       = {Mohammad Mozaffari and
                  Sikan Li and
                  Zhao Zhang and
                  Maryam Mehri Dehnavi},
  title        = {{MKOR:} Momentum-Enabled Kronecker-Factor-Based Optimizer Using Rank-1
                  Updates},
  journal      = {CoRR},
  volume       = {abs/2306.01685},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.01685},
  doi          = {10.48550/ARXIV.2306.01685},
  eprinttype    = {arXiv},
  eprint       = {2306.01685},
  timestamp    = {Mon, 12 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-01685.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2022 (7)

A review of parallel computing applications in calibrating watershed hydrologic models. Asgari, M.; Yang, W.; Lindsay, J. B.; Tolson, B. A.; and Dehnavi, M. M. Environ. Model. Softw., 151: 105370. 2022.

A review of parallel computing applications in calibrating watershed hydrologic models [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/envsoft/AsgariYLTD22,
  author       = {Marjan Asgari and
                  Wanhong Yang and
                  John B. Lindsay and
                  Bryan A. Tolson and
                  Maryam Mehri Dehnavi},
  title        = {A review of parallel computing applications in calibrating watershed
                  hydrologic models},
  journal      = {Environ. Model. Softw.},
  volume       = {151},
  pages        = {105370},
  year         = {2022},
  url          = {https://doi.org/10.1016/j.envsoft.2022.105370},
  doi          = {10.1016/J.ENVSOFT.2022.105370},
  timestamp    = {Wed, 27 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/envsoft/AsgariYLTD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Randomized Gossiping With Effective Resistance Weights: Performance Guarantees and Applications. Can, B.; Soori, S.; Aybat, N. S.; Dehnavi, M. M.; and G"urb"uzbalaban, M. IEEE Trans. Control. Netw. Syst., 9(2): 524–536. 2022.

Randomized Gossiping With Effective Resistance Weights: Performance Guarantees and Applications [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/tcns/CanSADG22,
  author       = {Bugra Can and
                  Saeed Soori and
                  Necdet Serhat Aybat and
                  Maryam Mehri Dehnavi and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban},
  title        = {Randomized Gossiping With Effective Resistance Weights: Performance
                  Guarantees and Applications},
  journal      = {{IEEE} Trans. Control. Netw. Syst.},
  volume       = {9},
  number       = {2},
  pages        = {524--536},
  year         = {2022},
  url          = {https://doi.org/10.1109/TCNS.2022.3161201},
  doi          = {10.1109/TCNS.2022.3161201},
  timestamp    = {Thu, 25 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tcns/CanSADG22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Combining Run-Time Checks and Compile-Time Analysis to Improve Control Flow Auto-Vectorization. Liu, B.; Laird, A.; Tsang, W. H.; Mahjour, B.; and Dehnavi, M. M. In Kl"ockner, A.; and Moreira, J., editor(s), Proceedings of the International Conference on Parallel Architectures and Compilation Techniques, PACT 2022, Chicago, Illinois, October 8-12, 2022, pages 439–450, 2022. ACM

Combining Run-Time Checks and Compile-Time Analysis to Improve Control Flow Auto-Vectorization [link]

Paper doi link bibtex 5 downloads

@inproceedings{DBLP:conf/IEEEpact/LiuLTMD22,
  author       = {Bangtian Liu and
                  Avery Laird and
                  Wai Hung Tsang and
                  Bardia Mahjour and
                  Maryam Mehri Dehnavi},
  editor       = {Andreas Kl{\&quot;{o}}ckner and
                  Jos{\&apos;{e}} Moreira},
  title        = {Combining Run-Time Checks and Compile-Time Analysis to Improve Control
                  Flow Auto-Vectorization},
  booktitle    = {Proceedings of the International Conference on Parallel Architectures
                  and Compilation Techniques, {PACT} 2022, Chicago, Illinois, October
                  8-12, 2022},
  pages        = {439--450},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3559009.3569663},
  doi          = {10.1145/3559009.3569663},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/IEEEpact/LiuLTMD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

HDagg: Hybrid Aggregation of Loop-carried Dependence Iterations in Sparse Matrix Computations. Zarebavani, B.; Cheshmi, K.; Liu, B.; Strout, M. M.; and Dehnavi, M. M. In 2022 IEEE International Parallel and Distributed Processing Symposium, IPDPS 2022, Lyon, France, May 30 - June 3, 2022, pages 1217–1227, 2022. IEEE

HDagg: Hybrid Aggregation of Loop-carried Dependence Iterations in Sparse Matrix Computations [link]

Paper doi link bibtex 2 downloads

@inproceedings{DBLP:conf/ipps/ZarebavaniCLSD22,
  author       = {Behrooz Zarebavani and
                  Kazem Cheshmi and
                  Bangtian Liu and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  title        = {HDagg: Hybrid Aggregation of Loop-carried Dependence Iterations in
                  Sparse Matrix Computations},
  booktitle    = {2022 {IEEE} International Parallel and Distributed Processing Symposium,
                  {IPDPS} 2022, Lyon, France, May 30 - June 3, 2022},
  pages        = {1217--1227},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/IPDPS53621.2022.00121},
  doi          = {10.1109/IPDPS53621.2022.00121},
  timestamp    = {Fri, 22 Jul 2022 11:14:30 +0200},
  biburl       = {https://dblp.org/rec/conf/ipps/ZarebavaniCLSD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Optimizing sparse computations jointly. Cheshmi, K.; Strout, M. M.; and Dehnavi, M. M. In Lee, J.; Agrawal, K.; and Spear, M. F., editor(s), PPoPP '22: 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, Seoul, Republic of Korea, April 2 - 6, 2022, pages 459–460, 2022. ACM

Optimizing sparse computations jointly [link]

Paper doi link bibtex 2 downloads

@inproceedings{DBLP:conf/ppopp/CheshmiSD22,
  author       = {Kazem Cheshmi and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  editor       = {Jaejin Lee and
                  Kunal Agrawal and
                  Michael F. Spear},
  title        = {Optimizing sparse computations jointly},
  booktitle    = {PPoPP '22: 27th {ACM} {SIGPLAN} Symposium on Principles and Practice
                  of Parallel Programming, Seoul, Republic of Korea, April 2 - 6, 2022},
  pages        = {459--460},
  publisher    = {{ACM}},
  year         = {2022},
  url          = {https://doi.org/10.1145/3503221.3508439},
  doi          = {10.1145/3503221.3508439},
  timestamp    = {Mon, 07 Apr 2025 08:23:05 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/CheshmiSD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Vectorizing Sparse Matrix Computations with Partially-Strided Codelets. Cheshmi, K.; Cetinic, Z.; and Dehnavi, M. M. In Wolf, F.; Shende, S.; Culhane, C.; Alam, S. R.; and Jagode, H., editor(s), SC22: International Conference for High Performance Computing, Networking, Storage and Analysis, Dallas, TX, USA, November 13-18, 2022, pages 32:1–32:15, 2022. IEEE

Vectorizing Sparse Matrix Computations with Partially-Strided Codelets [link]

Paper doi link bibtex 3 downloads

@inproceedings{DBLP:conf/sc/CheshmiCD22,
  author       = {Kazem Cheshmi and
                  Zachary Cetinic and
                  Maryam Mehri Dehnavi},
  editor       = {Felix Wolf and
                  Sameer Shende and
                  Candace Culhane and
                  Sadaf R. Alam and
                  Heike Jagode},
  title        = {Vectorizing Sparse Matrix Computations with Partially-Strided Codelets},
  booktitle    = {{SC22:} International Conference for High Performance Computing, Networking,
                  Storage and Analysis, Dallas, TX, USA, November 13-18, 2022},
  pages        = {32:1--32:15},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SC41404.2022.00037},
  doi          = {10.1109/SC41404.2022.00037},
  timestamp    = {Wed, 24 May 2023 16:17:06 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/CheshmiCD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

HyLo: A Hybrid Low-Rank Natural Gradient Descent Method. Mu, B.; Soori, S.; Can, B.; G"urb"uzbalaban, M.; and Dehnavi, M. M. In Wolf, F.; Shende, S.; Culhane, C.; Alam, S. R.; and Jagode, H., editor(s), SC22: International Conference for High Performance Computing, Networking, Storage and Analysis, Dallas, TX, USA, November 13-18, 2022, pages 47:1–47:16, 2022. IEEE

HyLo: A Hybrid Low-Rank Natural Gradient Descent Method [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/sc/MuSCGD22,
  author       = {Baorun Mu and
                  Saeed Soori and
                  Bugra Can and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban and
                  Maryam Mehri Dehnavi},
  editor       = {Felix Wolf and
                  Sameer Shende and
                  Candace Culhane and
                  Sadaf R. Alam and
                  Heike Jagode},
  title        = {HyLo: {A} Hybrid Low-Rank Natural Gradient Descent Method},
  booktitle    = {{SC22:} International Conference for High Performance Computing, Networking,
                  Storage and Analysis, Dallas, TX, USA, November 13-18, 2022},
  pages        = {47:1--47:16},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SC41404.2022.00052},
  doi          = {10.1109/SC41404.2022.00052},
  timestamp    = {Wed, 24 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sc/MuSCGD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2021 (5)

L-DQN: An Asynchronous Limited-Memory Distributed Quasi-Newton Method. Can, B.; Soori, S.; Dehnavi, M. M.; and G"urb"uzbalaban, M. In 2021 60th IEEE Conference on Decision and Control (CDC), Austin, TX, USA, December 14-17, 2021, pages 2386–2393, 2021. IEEE

L-DQN: An Asynchronous Limited-Memory Distributed Quasi-Newton Method [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/cdc/CanSDG21,
  author       = {Bugra Can and
                  Saeed Soori and
                  Maryam Mehri Dehnavi and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban},
  title        = {{L-DQN:} An Asynchronous Limited-Memory Distributed Quasi-Newton Method},
  booktitle    = {2021 60th {IEEE} Conference on Decision and Control (CDC), Austin,
                  TX, USA, December 14-17, 2021},
  pages        = {2386--2393},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/CDC45484.2021.9682985},
  doi          = {10.1109/CDC45484.2021.9682985},
  timestamp    = {Tue, 17 May 2022 15:53:17 +0200},
  biburl       = {https://dblp.org/rec/conf/cdc/CanSDG21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

TENGraD: Time-Efficient Natural Gradient Descent with Exact Fisher-Block Inversion. Soori, S.; Can, B.; Mu, B.; G"urb"uzbalaban, M.; and Dehnavi, M. M. CoRR, abs/2106.03947. 2021.

TENGraD: Time-Efficient Natural Gradient Descent with Exact Fisher-Block Inversion [link]

Paper link bibtex 1 download

@article{DBLP:journals/corr/abs-2106-03947,
  author       = {Saeed Soori and
                  Bugra Can and
                  Baourun Mu and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban and
                  Maryam Mehri Dehnavi},
  title        = {TENGraD: Time-Efficient Natural Gradient Descent with Exact Fisher-Block
                  Inversion},
  journal      = {CoRR},
  volume       = {abs/2106.03947},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.03947},
  eprinttype    = {arXiv},
  eprint       = {2106.03947},
  timestamp    = {Thu, 10 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-03947.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

L-DQN: An Asynchronous Limited-Memory Distributed Quasi-Newton Method. Can, B.; Soori, S.; Dehnavi, M. M.; and G"urb"uzbalaban, M. CoRR, abs/2108.09365. 2021.

Paper link bibtex

@article{DBLP:journals/corr/abs-2108-09365,
  author       = {Bugra Can and
                  Saeed Soori and
                  Maryam Mehri Dehnavi and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban},
  title        = {{L-DQN:} An Asynchronous Limited-Memory Distributed Quasi-Newton Method},
  journal      = {CoRR},
  volume       = {abs/2108.09365},
  year         = {2021},
  url          = {https://arxiv.org/abs/2108.09365},
  eprinttype    = {arXiv},
  eprint       = {2108.09365},
  timestamp    = {Fri, 27 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2108-09365.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Composing Loop-carried Dependence with Other Loops. Cheshmi, K.; Strout, M. M.; and Dehnavi, M. M. CoRR, abs/2111.12238. 2021.

Composing Loop-carried Dependence with Other Loops [link]

Paper link bibtex

@article{DBLP:journals/corr/abs-2111-12238,
  author       = {Kazem Cheshmi and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  title        = {Composing Loop-carried Dependence with Other Loops},
  journal      = {CoRR},
  volume       = {abs/2111.12238},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.12238},
  eprinttype    = {arXiv},
  eprint       = {2111.12238},
  timestamp    = {Fri, 26 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-12238.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Differentiating-based Vectorization for Sparse Kernels. Cetinic, Z.; Cheshmi, K.; and Dehnavi, M. M. CoRR, abs/2111.12243. 2021.

Differentiating-based Vectorization for Sparse Kernels [link]

Paper link bibtex 2 downloads

@article{DBLP:journals/corr/abs-2111-12243,
  author       = {Zachary Cetinic and
                  Kazem Cheshmi and
                  Maryam Mehri Dehnavi},
  title        = {Differentiating-based Vectorization for Sparse Kernels},
  journal      = {CoRR},
  volume       = {abs/2111.12243},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.12243},
  eprinttype    = {arXiv},
  eprint       = {2111.12243},
  timestamp    = {Fri, 26 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-12243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2020 (4)

NASOQ: numerically accurate sparsity-oriented QP solver. Cheshmi, K.; Kaufman, D. M.; Kamil, S.; and Dehnavi, M. M. ACM Trans. Graph., 39(4): 96. 2020.

NASOQ: numerically accurate sparsity-oriented QP solver [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/tog/CheshmiKKD20,
  author       = {Kazem Cheshmi and
                  Danny M. Kaufman and
                  Shoaib Kamil and
                  Maryam Mehri Dehnavi},
  title        = {{NASOQ:} numerically accurate sparsity-oriented {QP} solver},
  journal      = {{ACM} Trans. Graph.},
  volume       = {39},
  number       = {4},
  pages        = {96},
  year         = {2020},
  url          = {https://doi.org/10.1145/3386569.3392486},
  doi          = {10.1145/3386569.3392486},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tog/CheshmiKKD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

DAve-QN: A Distributed Averaged Quasi-Newton Method with Local Superlinear Convergence Rate. Soori, S.; Mishchenko, K.; Mokhtari, A.; Dehnavi, M. M.; and G"urb"uzbalaban, M. In Chiappa, S.; and Calandra, R., editor(s), The 23rd International Conference on Artificial Intelligence and Statistics, AISTATS 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy], volume 108, of Proceedings of Machine Learning Research, pages 1965–1976, 2020. PMLR

DAve-QN: A Distributed Averaged Quasi-Newton Method with Local Superlinear Convergence Rate [link]

Paper link bibtex

@inproceedings{DBLP:conf/aistats/SooriMMDG20,
  author       = {Saeed Soori and
                  Konstantin Mishchenko and
                  Aryan Mokhtari and
                  Maryam Mehri Dehnavi and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban},
  editor       = {Silvia Chiappa and
                  Roberto Calandra},
  title        = {DAve-QN: {A} Distributed Averaged Quasi-Newton Method with Local Superlinear
                  Convergence Rate},
  booktitle    = {The 23rd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]},
  series       = {Proceedings of Machine Learning Research},
  volume       = {108},
  pages        = {1965--1976},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v108/soori20a.html},
  timestamp    = {Mon, 29 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/SooriMMDG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

ASYNC: A Cloud Engine with Asynchrony and History for Distributed Machine Learning. Soori, S.; Can, B.; G"urb"uzbalaban, M.; and Dehnavi, M. M. In 2020 IEEE International Parallel and Distributed Processing Symposium (IPDPS), New Orleans, LA, USA, May 18-22, 2020, pages 429–439, 2020. IEEE

ASYNC: A Cloud Engine with Asynchrony and History for Distributed Machine Learning [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/ipps/SooriCGD20,
  author       = {Saeed Soori and
                  Bugra Can and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban and
                  Maryam Mehri Dehnavi},
  title        = {{ASYNC:} {A} Cloud Engine with Asynchrony and History for Distributed
                  Machine Learning},
  booktitle    = {2020 {IEEE} International Parallel and Distributed Processing Symposium
                  (IPDPS), New Orleans, LA, USA, May 18-22, 2020},
  pages        = {429--439},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/IPDPS47924.2020.00052},
  doi          = {10.1109/IPDPS47924.2020.00052},
  timestamp    = {Wed, 22 Jul 2020 15:53:25 +0200},
  biburl       = {https://dblp.org/rec/conf/ipps/SooriCGD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

MatRox: modular approach for improving data locality in hierarchical (Mat)rix App(Rox)imation. Liu, B.; Cheshmi, K.; Soori, S.; Strout, M. M.; and Dehnavi, M. M. In Gupta, R.; and Shen, X., editor(s), PPoPP '20: 25th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, San Diego, California, USA, February 22-26, 2020, pages 389–402, 2020. ACM

MatRox: modular approach for improving data locality in hierarchical (Mat)rix App(Rox)imation [link]

Paper doi link bibtex 2 downloads

@inproceedings{DBLP:conf/ppopp/LiuCSSD20,
  author       = {Bangtian Liu and
                  Kazem Cheshmi and
                  Saeed Soori and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  editor       = {Rajiv Gupta and
                  Xipeng Shen},
  title        = {MatRox: modular approach for improving data locality in hierarchical
                  (Mat)rix App(Rox)imation},
  booktitle    = {PPoPP '20: 25th {ACM} {SIGPLAN} Symposium on Principles and Practice
                  of Parallel Programming, San Diego, California, USA, February 22-26,
                  2020},
  pages        = {389--402},
  publisher    = {{ACM}},
  year         = {2020},
  url          = {https://doi.org/10.1145/3332466.3374548},
  doi          = {10.1145/3332466.3374548},
  timestamp    = {Sun, 12 Jun 2022 19:46:08 +0200},
  biburl       = {https://dblp.org/rec/conf/ppopp/LiuCSSD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2019 (2)

Sparse computation data dependence simplification for efficient compiler-generated inspectors. Mohammadi, M. S.; Yuki, T.; Cheshmi, K.; Davis, E. C.; Hall, M. W.; Dehnavi, M. M.; Nandy, P.; Olschanowsky, C.; Venkat, A.; and Strout, M. M. In McKinley, K. S.; and Fisher, K., editor(s), Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2019, Phoenix, AZ, USA, June 22-26, 2019, pages 594–609, 2019. ACM

Sparse computation data dependence simplification for efficient compiler-generated inspectors [link]

Paper doi link bibtex 2 downloads

@inproceedings{DBLP:conf/pldi/MohammadiYCDHDN19,
  author       = {Mahdi Soltan Mohammadi and
                  Tomofumi Yuki and
                  Kazem Cheshmi and
                  Eddie C. Davis and
                  Mary W. Hall and
                  Maryam Mehri Dehnavi and
                  Payal Nandy and
                  Catherine Olschanowsky and
                  Anand Venkat and
                  Michelle Mills Strout},
  editor       = {Kathryn S. McKinley and
                  Kathleen Fisher},
  title        = {Sparse computation data dependence simplification for efficient compiler-generated
                  inspectors},
  booktitle    = {Proceedings of the 40th {ACM} {SIGPLAN} Conference on Programming
                  Language Design and Implementation, {PLDI} 2019, Phoenix, AZ, USA,
                  June 22-26, 2019},
  pages        = {594--609},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3314221.3314646},
  doi          = {10.1145/3314221.3314646},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pldi/MohammadiYCDHDN19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

ASYNC: Asynchronous Machine Learning on Distributed Systems. Soori, S.; Can, B.; G"urb"uzbalaban, M.; and Dehnavi, M. M. CoRR, abs/1907.08526. 2019.

ASYNC: Asynchronous Machine Learning on Distributed Systems [link]

Paper link bibtex

@article{DBLP:journals/corr/abs-1907-08526,
  author       = {Saeed Soori and
                  Bugra Can and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban and
                  Maryam Mehri Dehnavi},
  title        = {{ASYNC:} Asynchronous Machine Learning on Distributed Systems},
  journal      = {CoRR},
  volume       = {abs/1907.08526},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.08526},
  eprinttype    = {arXiv},
  eprint       = {1907.08526},
  timestamp    = {Wed, 24 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-08526.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2018 (7)

CSTF: Large-Scale Sparse Tensor Factorizations on Distributed Platforms. Blanco, Z.; Liu, B.; and Dehnavi, M. M. In Proceedings of the 47th International Conference on Parallel Processing, ICPP 2018, Eugene, OR, USA, August 13-16, 2018, pages 21:1–21:10, 2018. ACM

CSTF: Large-Scale Sparse Tensor Factorizations on Distributed Platforms [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/icpp/BlancoLD18,
  author       = {Zachary Blanco and
                  Bangtian Liu and
                  Maryam Mehri Dehnavi},
  title        = {{CSTF:} Large-Scale Sparse Tensor Factorizations on Distributed Platforms},
  booktitle    = {Proceedings of the 47th International Conference on Parallel Processing,
                  {ICPP} 2018, Eugene, OR, USA, August 13-16, 2018},
  pages        = {21:1--21:10},
  publisher    = {{ACM}},
  year         = {2018},
  url          = {https://doi.org/10.1145/3225058.3225133},
  doi          = {10.1145/3225058.3225133},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpp/BlancoLD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Reducing Communication in Proximal Newton Methods for Sparse Least Squares Problems. Soori, S.; Devarakonda, A.; Blanco, Z.; Demmel, J.; G"urb"uzbalaban, M.; and Dehnavi, M. M. In Proceedings of the 47th International Conference on Parallel Processing, ICPP 2018, Eugene, OR, USA, August 13-16, 2018, pages 22:1–22:10, 2018. ACM

Reducing Communication in Proximal Newton Methods for Sparse Least Squares Problems [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/icpp/SooriDBDGD18,
  author       = {Saeed Soori and
                  Aditya Devarakonda and
                  Zachary Blanco and
                  James Demmel and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban and
                  Maryam Mehri Dehnavi},
  title        = {Reducing Communication in Proximal Newton Methods for Sparse Least
                  Squares Problems},
  booktitle    = {Proceedings of the 47th International Conference on Parallel Processing,
                  {ICPP} 2018, Eugene, OR, USA, August 13-16, 2018},
  pages        = {22:1--22:10},
  publisher    = {{ACM}},
  year         = {2018},
  url          = {https://doi.org/10.1145/3225058.3225131},
  doi          = {10.1145/3225058.3225131},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpp/SooriDBDGD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Sparsity-Aware Storage Format Selection. Cheshmi, K.; Cheshmi, L.; and Dehnavi, M. M. In 2018 International Conference on High Performance Computing & Simulation, HPCS 2018, Orleans, France, July 16-20, 2018, pages 1034–1037, 2018. IEEE

Sparsity-Aware Storage Format Selection [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/ieeehpcs/CheshmiCD18,
  author       = {Kazem Cheshmi and
                  Leila Cheshmi and
                  Maryam Mehri Dehnavi},
  title        = {Sparsity-Aware Storage Format Selection},
  booktitle    = {2018 International Conference on High Performance Computing {\&amp;}
                  Simulation, {HPCS} 2018, Orleans, France, July 16-20, 2018},
  pages        = {1034--1037},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/HPCS.2018.00162},
  doi          = {10.1109/HPCS.2018.00162},
  timestamp    = {Wed, 16 Oct 2019 14:14:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ieeehpcs/CheshmiCD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Extending Index-Array Properties for Data Dependence Analysis. Mohammadi, M. S.; Cheshmi, K.; Dehnavi, M. M.; Venkat, A.; Yuki, T.; and Strout, M. M. In Hall, M. W.; and Sundar, H., editor(s), Languages and Compilers for Parallel Computing - 31st International Workshop, LCPC 2018, Salt Lake City, UT, USA, October 9-11, 2018, Revised Selected Papers, volume 11882, of Lecture Notes in Computer Science, pages 78–93, 2018. Springer

Extending Index-Array Properties for Data Dependence Analysis [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/lcpc/MohammadiCDVYS18,
  author       = {Mahdi Soltan Mohammadi and
                  Kazem Cheshmi and
                  Maryam Mehri Dehnavi and
                  Anand Venkat and
                  Tomofumi Yuki and
                  Michelle Mills Strout},
  editor       = {Mary W. Hall and
                  Hari Sundar},
  title        = {Extending Index-Array Properties for Data Dependence Analysis},
  booktitle    = {Languages and Compilers for Parallel Computing - 31st International
                  Workshop, {LCPC} 2018, Salt Lake City, UT, USA, October 9-11, 2018,
                  Revised Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {11882},
  pages        = {78--93},
  publisher    = {Springer},
  year         = {2018},
  url          = {https://doi.org/10.1007/978-3-030-34627-0\_7},
  doi          = {10.1007/978-3-030-34627-0\_7},
  timestamp    = {Wed, 13 Nov 2019 15:26:10 +0100},
  biburl       = {https://dblp.org/rec/conf/lcpc/MohammadiCDVYS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

ParSy: inspection and transformation of sparse matrix computations for parallelism. Cheshmi, K.; Kamil, S.; Strout, M. M.; and Dehnavi, M. M. In Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis, SC 2018, Dallas, TX, USA, November 11-16, 2018, pages 62:1–62:15, 2018. IEEE / ACM

ParSy: inspection and transformation of sparse matrix computations for parallelism [link]

Paper link bibtex

@inproceedings{DBLP:conf/sc/CheshmiKSD18,
  author       = {Kazem Cheshmi and
                  Shoaib Kamil and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  title        = {ParSy: inspection and transformation of sparse matrix computations
                  for parallelism},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage, and Analysis, {SC} 2018, Dallas, TX, USA, November
                  11-16, 2018},
  pages        = {62:1--62:15},
  publisher    = {{IEEE} / {ACM}},
  year         = {2018},
  url          = {http://dl.acm.org/citation.cfm?id=3291739},
  timestamp    = {Thu, 24 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/CheshmiKSD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Sparse Matrix Code Dependence Analysis Simplification at Compile Time. Mohammadi, M. S.; Cheshmi, K.; Gopalakrishnan, G.; Hall, M. W.; Dehnavi, M. M.; Venkat, A.; Yuki, T.; and Strout, M. M. CoRR, abs/1807.10852. 2018.

Sparse Matrix Code Dependence Analysis Simplification at Compile Time [link]

Paper link bibtex

@article{DBLP:journals/corr/abs-1807-10852,
  author       = {Mahdi Soltan Mohammadi and
                  Kazem Cheshmi and
                  Ganesh Gopalakrishnan and
                  Mary W. Hall and
                  Maryam Mehri Dehnavi and
                  Anand Venkat and
                  Tomofumi Yuki and
                  Michelle Mills Strout},
  title        = {Sparse Matrix Code Dependence Analysis Simplification at Compile Time},
  journal      = {CoRR},
  volume       = {abs/1807.10852},
  year         = {2018},
  url          = {http://arxiv.org/abs/1807.10852},
  eprinttype    = {arXiv},
  eprint       = {1807.10852},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1807-10852.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

MatRox: A Model-Based Algorithm with an Efficient Storage Format for Parallel HSS-Structured Matrix Approximations. Liu, B.; Cheshmi, K.; Soori, S.; and Dehnavi, M. M. CoRR, abs/1812.07152. 2018.

MatRox: A Model-Based Algorithm with an Efficient Storage Format for Parallel HSS-Structured Matrix Approximations [link]

Paper link bibtex

@article{DBLP:journals/corr/abs-1812-07152,
  author       = {Bangtian Liu and
                  Kazem Cheshmi and
                  Saeed Soori and
                  Maryam Mehri Dehnavi},
  title        = {MatRox: {A} Model-Based Algorithm with an Efficient Storage Format
                  for Parallel HSS-Structured Matrix Approximations},
  journal      = {CoRR},
  volume       = {abs/1812.07152},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.07152},
  eprinttype    = {arXiv},
  eprint       = {1812.07152},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-07152.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2017 (7)

Autotuning divide-and-conquer stencil computations. Natarajan, E. P.; Dehnavi, M. M.; and Leiserson, C. E. Concurr. Comput. Pract. Exp., 29(17). 2017.

Autotuning divide-and-conquer stencil computations [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/concurrency/NatarajanDL17,
  author       = {Ekanathan Palamadai Natarajan and
                  Maryam Mehri Dehnavi and
                  Charles E. Leiserson},
  title        = {Autotuning divide-and-conquer stencil computations},
  journal      = {Concurr. Comput. Pract. Exp.},
  volume       = {29},
  number       = {17},
  year         = {2017},
  url          = {https://doi.org/10.1002/cpe.4127},
  doi          = {10.1002/CPE.4127},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/concurrency/NatarajanDL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

A Unified Optimization Approach for Sparse Tensor Operations on GPUs. Liu, B.; Wen, C.; Sarwate, A. D.; and Dehnavi, M. M. In 2017 IEEE International Conference on Cluster Computing, CLUSTER 2017, Honolulu, HI, USA, September 5-8, 2017, pages 47–57, 2017. IEEE Computer Society

A Unified Optimization Approach for Sparse Tensor Operations on GPUs [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/cluster/LiuWSD17,
  author       = {Bangtian Liu and
                  Chengyao Wen and
                  Anand D. Sarwate and
                  Maryam Mehri Dehnavi},
  title        = {A Unified Optimization Approach for Sparse Tensor Operations on GPUs},
  booktitle    = {2017 {IEEE} International Conference on Cluster Computing, {CLUSTER}
                  2017, Honolulu, HI, USA, September 5-8, 2017},
  pages        = {47--57},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.org/10.1109/CLUSTER.2017.75},
  doi          = {10.1109/CLUSTER.2017.75},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cluster/LiuWSD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Sympiler: transforming sparse matrix codes by decoupling symbolic analysis. Cheshmi, K.; Kamil, S.; Strout, M. M.; and Dehnavi, M. M. In Mohr, B.; and Raghavan, P., editor(s), Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2017, Denver, CO, USA, November 12 - 17, 2017, pages 13, 2017. ACM

Sympiler: transforming sparse matrix codes by decoupling symbolic analysis [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/sc/CheshmiKSD17,
  author       = {Kazem Cheshmi and
                  Shoaib Kamil and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  editor       = {Bernd Mohr and
                  Padma Raghavan},
  title        = {Sympiler: transforming sparse matrix codes by decoupling symbolic
                  analysis},
  booktitle    = {Proceedings of the International Conference for High Performance Computing,
                  Networking, Storage and Analysis, {SC} 2017, Denver, CO, USA, November
                  12 - 17, 2017},
  pages        = {13},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3126908.3126936},
  doi          = {10.1145/3126908.3126936},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sc/CheshmiKSD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Power grid safety control via fine-grained multi-persona programmable logic controllers. Salles-Loustau, G.; Garcia, L.; Sun, P.; Dehnavi, M. M.; and Zonouz, S. A. In 2017 IEEE International Conference on Smart Grid Communications, SmartGridComm 2017, Dresden, Germany, October 23-27, 2017, pages 283–288, 2017. IEEE

Power grid safety control via fine-grained multi-persona programmable logic controllers [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/smartgridcomm/Salles-LoustauG17,
  author       = {Gabriel Salles{-}Loustau and
                  Luis Garcia and
                  Pengfei Sun and
                  Maryam Mehri Dehnavi and
                  Saman A. Zonouz},
  title        = {Power grid safety control via fine-grained multi-persona programmable
                  logic controllers},
  booktitle    = {2017 {IEEE} International Conference on Smart Grid Communications,
                  SmartGridComm 2017, Dresden, Germany, October 23-27, 2017},
  pages        = {283--288},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/SmartGridComm.2017.8340684},
  doi          = {10.1109/SMARTGRIDCOMM.2017.8340684},
  timestamp    = {Mon, 17 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/smartgridcomm/Salles-LoustauG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Sympiler: Transforming Sparse Matrix Codes by Decoupling Symbolic Analysis. Cheshmi, K.; Kamil, S.; Strout, M. M.; and Dehnavi, M. M. CoRR, abs/1705.06575. 2017.

Paper link bibtex

@article{DBLP:journals/corr/CheshmiKSD17,
  author       = {Kazem Cheshmi and
                  Shoaib Kamil and
                  Michelle Mills Strout and
                  Maryam Mehri Dehnavi},
  title        = {Sympiler: Transforming Sparse Matrix Codes by Decoupling Symbolic
                  Analysis},
  journal      = {CoRR},
  volume       = {abs/1705.06575},
  year         = {2017},
  url          = {http://arxiv.org/abs/1705.06575},
  eprinttype    = {arXiv},
  eprint       = {1705.06575},
  timestamp    = {Thu, 24 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/CheshmiKSD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

A Unified Optimization Approach for Sparse Tensor Operations on GPUs. Liu, B.; Wen, C.; Sarwate, A. D.; and Dehnavi, M. M. CoRR, abs/1705.09905. 2017.

Paper link bibtex

@article{DBLP:journals/corr/LiuWSD17,
  author       = {Bangtian Liu and
                  Chengyao Wen and
                  Anand D. Sarwate and
                  Maryam Mehri Dehnavi},
  title        = {A Unified Optimization Approach for Sparse Tensor Operations on GPUs},
  journal      = {CoRR},
  volume       = {abs/1705.09905},
  year         = {2017},
  url          = {http://arxiv.org/abs/1705.09905},
  eprinttype    = {arXiv},
  eprint       = {1705.09905},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LiuWSD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Avoiding Communication in Proximal Methods for Convex Optimization Problems. Soori, S.; Devarakonda, A.; Demmel, J.; G"urb"uzbalaban, M.; and Dehnavi, M. M. CoRR, abs/1710.08883. 2017.

Avoiding Communication in Proximal Methods for Convex Optimization Problems [link]

Paper link bibtex

@article{DBLP:journals/corr/abs-1710-08883,
  author       = {Saeed Soori and
                  Aditya Devarakonda and
                  James Demmel and
                  Mert G{\&quot;{u}}rb{\&quot;{u}}zbalaban and
                  Maryam Mehri Dehnavi},
  title        = {Avoiding Communication in Proximal Methods for Convex Optimization
                  Problems},
  journal      = {CoRR},
  volume       = {abs/1710.08883},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.08883},
  eprinttype    = {arXiv},
  eprint       = {1710.08883},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-08883.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2015 (1)

Parallel finite element technique using Gaussian belief propagation. El-Kurdi, Y.; Dehnavi, M. M.; Gross, W. J.; and Giannacopoulos, D. Comput. Phys. Commun., 193: 38–48. 2015.

Parallel finite element technique using Gaussian belief propagation [link]

Paper doi link bibtex

@article{DBLP:journals/cphysics/El-KurdiDGG15,
  author       = {Yousef El{-}Kurdi and
                  Maryam Mehri Dehnavi and
                  Warren J. Gross and
                  Dennis Giannacopoulos},
  title        = {Parallel finite element technique using Gaussian belief propagation},
  journal      = {Comput. Phys. Commun.},
  volume       = {193},
  pages        = {38--48},
  year         = {2015},
  url          = {https://doi.org/10.1016/j.cpc.2015.03.019},
  doi          = {10.1016/J.CPC.2015.03.019},
  timestamp    = {Fri, 21 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cphysics/El-KurdiDGG15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2014 (4)

Survey on Grid Resource Allocation Mechanisms. Qureshi, M. B.; Dehnavi, M. M.; Min-Allah, N.; Qureshi, M. S.; Hussain, H.; Rentifis, I.; Tziritas, N.; Loukopoulos, T.; Khan, S. U.; Xu, C.; and Zomaya, A. Y. J. Grid Comput., 12(2): 399–441. 2014.

Survey on Grid Resource Allocation Mechanisms [link]

Paper doi link bibtex

@article{DBLP:journals/grid/QureshiDMQHRTLKXZ14,
  author       = {Muhammad Bilal Qureshi and
                  Maryam Mehri Dehnavi and
                  Nasro Min{-}Allah and
                  Muhammad Shuaib Qureshi and
                  Hameed Hussain and
                  Ilias Rentifis and
                  Nikos Tziritas and
                  Thanasis Loukopoulos and
                  Samee Ullah Khan and
                  Cheng{-}Zhong Xu and
                  Albert Y. Zomaya},
  title        = {Survey on Grid Resource Allocation Mechanisms},
  journal      = {J. Grid Comput.},
  volume       = {12},
  number       = {2},
  pages        = {399--441},
  year         = {2014},
  url          = {https://doi.org/10.1007/s10723-014-9292-9},
  doi          = {10.1007/S10723-014-9292-9},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/grid/QureshiDMQHRTLKXZ14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Evaluating multi-core and many-core architectures through accelerating the three-dimensional Lax-Wendroff correction stencil. You, Y.; Fu, H.; Song, S. L.; Dehnavi, M. M.; Gan, L.; Huang, X.; and Yang, G. Int. J. High Perform. Comput. Appl., 28(3): 301–318. 2014.

Evaluating multi-core and many-core architectures through accelerating the three-dimensional Lax-Wendroff correction stencil [link]

Paper doi link bibtex

@article{DBLP:journals/ijhpca/YouFSDGHY14,
  author       = {Yang You and
                  Haohuan Fu and
                  Shuaiwen Leon Song and
                  Maryam Mehri Dehnavi and
                  Lin Gan and
                  Xiaomeng Huang and
                  Guangwen Yang},
  title        = {Evaluating multi-core and many-core architectures through accelerating
                  the three-dimensional Lax-Wendroff correction stencil},
  journal      = {Int. J. High Perform. Comput. Appl.},
  volume       = {28},
  number       = {3},
  pages        = {301--318},
  year         = {2014},
  url          = {https://doi.org/10.1177/1094342014524807},
  doi          = {10.1177/1094342014524807},
  timestamp    = {Tue, 20 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijhpca/YouFSDGHY14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

Designing a Heuristic Cross-Architecture Combination for Breadth-First Search. You, Y.; Bader, D. A.; and Dehnavi, M. M. In 43rd International Conference on Parallel Processing, ICPP 2014, Minneapolis, MN, USA, September 9-12, 2014, pages 70–79, 2014. IEEE Computer Society

Paper doi link bibtex

@inproceedings{DBLP:conf/icpp/YouBD14,
  author       = {Yang You and
                  David A. Bader and
                  Maryam Mehri Dehnavi},
  title        = {Designing a Heuristic Cross-Architecture Combination for Breadth-First
                  Search},
  booktitle    = {43rd International Conference on Parallel Processing, {ICPP} 2014,
                  Minneapolis, MN, USA, September 9-12, 2014},
  pages        = {70--79},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICPP.2014.16},
  doi          = {10.1109/ICPP.2014.16},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpp/YouBD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

MIC-SVM: Designing a Highly Efficient Support Vector Machine for Advanced Modern Multi-core and Many-Core Architectures. You, Y.; Song, S. L.; Fu, H.; Marquez, A.; Dehnavi, M. M.; Barker, K. J.; Cameron, K. W.; Randles, A. P.; and Yang, G. In 2014 IEEE 28th International Parallel and Distributed Processing Symposium, Phoenix, AZ, USA, May 19-23, 2014, pages 809–818, 2014. IEEE Computer Society

MIC-SVM: Designing a Highly Efficient Support Vector Machine for Advanced Modern Multi-core and Many-Core Architectures [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/ipps/YouSFMDBCRY14,
  author       = {Yang You and
                  Shuaiwen Leon Song and
                  Haohuan Fu and
                  Andres Marquez and
                  Maryam Mehri Dehnavi and
                  Kevin J. Barker and
                  Kirk W. Cameron and
                  Amanda Peters Randles and
                  Guangwen Yang},
  title        = {{MIC-SVM:} Designing a Highly Efficient Support Vector Machine for
                  Advanced Modern Multi-core and Many-Core Architectures},
  booktitle    = {2014 {IEEE} 28th International Parallel and Distributed Processing
                  Symposium, Phoenix, AZ, USA, May 19-23, 2014},
  pages        = {809--818},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/IPDPS.2014.88},
  doi          = {10.1109/IPDPS.2014.88},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ipps/YouSFMDBCRY14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

2013 (1)

Parallel Sparse Approximate Inverse Preconditioning on Graphic Processing Units. Dehnavi, M. M.; Fernandez, D. M.; Gaudiot, J.; and Giannacopoulos, D. D. IEEE Trans. Parallel Distributed Syst., 24(9): 1852–1862. 2013.

Parallel Sparse Approximate Inverse Preconditioning on Graphic Processing Units [link]

Paper doi link bibtex

@article{DBLP:journals/tpds/DehnaviFGG13,
  author       = {Maryam Mehri Dehnavi and
                  David M. Fernandez and
                  Jean{-}Luc Gaudiot and
                  Dennis D. Giannacopoulos},
  title        = {Parallel Sparse Approximate Inverse Preconditioning on Graphic Processing
                  Units},
  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
  volume       = {24},
  number       = {9},
  pages        = {1852--1862},
  year         = {2013},
  url          = {https://doi.org/10.1109/TPDS.2012.286},
  doi          = {10.1109/TPDS.2012.286},
  timestamp    = {Fri, 02 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tpds/DehnaviFGG13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}