<script src="https://bibbase.org/show?bib=http://mahdi.ch/writings/cheraghchi.bib&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=http://mahdi.ch/writings/cheraghchi.bib");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=http://mahdi.ch/writings/cheraghchi.bib"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@article{ref:CR23, author = {Cheraghchi, Mahdi and Ribeiro, Jo\~{a}o}, title = {Simple Codes and Sparse Recovery with Fast Decoding}, journal = {SIAM Journal on Discrete Mathematics}, volume = {37}, number = {2}, pages = {612-631}, year = {2023}, doi = {10.1137/21M1465354}, URL = {https://doi.org/10.1137/21M1465354}, eprint = {https://doi.org/10.1137/21M1465354}, abstract = {Construction of error-correcting codes achieving a designated minimum distance parameter is a central problem in coding theory. In this work, we study a very simple construction of binary linear codes that correct a given number of errors $K$. Moreover, we design a simple, nearly optimal syndrome decoder for the code as well. The running time of the decoder is only logarithmic in the block length of the code and nearly linear in the number of errors $K$. This decoder can be applied to exact for-all sparse recovery over any field, improving upon previous results with the same number of measurements. Furthermore, computation of the syndrome from a received word can be done in nearly linear time in the block length. We also demonstrate an application of these techniques in nonadaptive group testing and construct simple explicit measurement schemes with $O(K^2 \log^2 N)$ tests and $O(K^3 \log^2 N)$ recovery time for identifying up to $K$ defectives in a population of size $N$.} }
@INPROCEEDINGS{ref:BCGR23, author = {Huck Bennett and Mahdi Cheraghchi and Venkatesan Guruswami and Jo\~{a}o Ribeiro}, title = {Parameterized inapproximability of the minimum distance problem over all fields and the shortest vector problem in all $\ell_p$ norms}, booktitle = {Proceedings of the 55th Annual ACM Symposium on Theory of Computing (STOC)}, year = 2023, url_Paper = {https://eccc.weizmann.ac.il/report/2022/156/}, abstract = {We prove that the Minimum Distance Problem (MDP) on linear codes over any fixed finite field and parameterized by the input distance bound is W[1]-hard to approximate within any constant factor. We also prove analogous results for the parameterized Shortest Vector Problem (SVP) on integer lattices. Specifically, we prove that SVP in the $\ell_p$ norm is W[1]-hard to approximate within any constant factor for any fixed $p > 1$ and W[1]-hard to approximate within a factor approaching 2 for $p = 1$. (These results all hold under randomized reductions.) These results answer the main questions left open (and asked) by Bhattacharyya, Bonnet, Egri, Ghoshal, Karthik C. S., Lin, Manurangsi, and Marx (Journal of the ACM, 2021) on the complexity of parameterized MDP and SVP. For MDP, they established similar hardness for binary linear codes and left the case of general fields open. For SVP in $\ell_p$ norms with $p > 1$, they showed inapproximability within some constant factor (depending on $p$) and left open showing such hardness for arbitrary constant factors. They also left open showing W[1]-hardness even of exact SVP in the $\ell_1$ norm.} }
@article{ref:CHMY22, title={One-tape turing machine and branching program lower bounds for {MCSP}}, author={Cheraghchi, Mahdi and Hirahara, Shuichi and Myrisiotis, Dimitrios and Yoshida, Yuichi}, journal={Theory of Computing Systems}, pages={1--32}, year={2022}, publisher={Springer}, doi = {10.1007/s00224-022-10113-9}, url_Link = {https://link.springer.com/article/10.1007/s00224-022-10113-9}, url_Paper = {https://eccc.weizmann.ac.il/report/2020/103/}, abstract = {For a size parameter $s:\mathbb{N}\to\mathbb{N}$, the Minimum Circuit Size Problem (denoted by $MCSP[s(n)]$) is the problem of deciding whether the minimum circuit size of a given function $f: \{0,1\}^n \to \{0,1\}$ (represented by a string of length $N := 2^n$) is at most a threshold $s(n)$. A recent line of work exhibited ``hardness magnification'' phenomena for MCSP: A very weak lower bound for MCSP implies a breakthrough result in complexity theory. For example, McKay, Murray, and Williams (STOC 2019) implicitly showed that, for some constant $\mu_1 > 0$, if $MCSP[2^{\mu_1\cdot n}]$ cannot be computed by a one-tape Turing machine (with an additional one-way read-only input tape) running in time $N^{1.01}$, then $P\neq NP$. In this paper, we present the following new lower bounds against one-tape Turing machines and branching programs: 1) A randomized two-sided error one-tape Turing machine (with an additional one-way read-only input tape) cannot compute $MCSP[2^{\mu_2\cdot n}]$ in time $N^{1.99}$, for some constant $\mu_2 > \mu_1$. 2) A non-deterministic (or parity) branching program of size $o(N^{1.5}/\log N)$ cannot compute MKTP, which is a time-bounded Kolmogorov complexity analogue of MCSP. This is shown by directly applying the Ne\v{c}iporuk method to MKTP, which previously appeared to be difficult. These results are the first non-trivial lower bounds for MCSP and MKTP against one-tape Turing machines and non-deterministic branching programs, and essentially match the best-known lower bounds for any explicit functions against these computational models. The first result is based on recent constructions of pseudorandom generators for read-once oblivious branching programs (ROBPs) and combinatorial rectangles (Forbes and Kelley, FOCS 2018; Viola 2019). En route, we obtain several related results: 1)There exists a (local) hitting set generator with seed length $\widetilde{O}(\sqrt{N})$ secure against read-once polynomial-size non-deterministic branching programs on $N$-bit inputs. 2) Any read-once co-non-deterministic branching program computing MCSP must have size at least $2^{\widetilde{\Omega}(N)}$. } }
@ARTICLE{ref:CDRV22, author = {Mahdi Cheraghchi and Joseph Downs and Jo\~{a}o Ribeiro and Alexandra Veliche}, title = {Mean-Based Trace Reconstruction over Oblivious Synchronization Channels}, year = 2022, journal={IEEE Transactions on Information Theory}, url_Paper = {https://arxiv.org/abs/2102.09490}, url_Link = {https://ieeexplore.ieee.org/document/9743284}, doi = {10.1109/TIT.2022.3157383}, abstract = {Mean-based reconstruction is a fundamental, natural approach to worst-case trace reconstruction over channels with synchronization errors. It is known that $\exp(\Theta(n^{1/3}))$ traces are necessary and sufficient for mean-based worst-case trace reconstruction over the deletion channel, and this result was also extended to certain channels combining deletions and geometric insertions of uniformly random bits. In this work, we use a simple extension of the original complex-analytic approach to show that these results are examples of a much more general phenomenon. We introduce oblivious synchronization channels, which map each input bit to an arbitrarily distributed sequence of replications and insertions of random bits. This general class captures all previously considered synchronization channels. We show that for any oblivious synchronization channel whose output length follows a sub-exponential distribution either mean-based trace reconstruction is impossible or $\exp(O(n^{1/3}))$ traces suffice for this task.} }
@ARTICLE{9513255, author={Bui, Thach V. and Cheraghchi, Mahdi and Echizen, Isao}, journal={IEEE Transactions on Information Theory}, title={Improved non-adaptive algorithms for threshold group testing with a gap}, year={2021}, volume={}, number={}, pages={}, doi = {DOI: 10.1109/TIT.2021.3104670}, doi={10.1109/TIT.2021.3104670}}
@ARTICLE{ref:jour:CR21, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {Non-Asymptotic Capacity Upper Bounds for the Discrete-Time {Poisson} Channel with Positive Dark Current}, journal = {IEEE Communications Letters}, year = 2021, doi = {10.1109/LCOMM.2021.3120706}, url_Paper = {https://arxiv.org/abs/2010.14858} }
@INPROCEEDINGS{ref:conf:ACMTV21, author = {Eric Allender and Mahdi Cheraghchi and Dimitrios Myrisiotis and Harsha Tirumala, and Ilya Volkovich}, title = {One-way functions and a conditional variant of MKTP}, booktitle = {Proceedings of the 41st conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS)}, year = 2021, url_Paper = {https://eccc.weizmann.ac.il/report/2021/009/} }
@INPROCEEDINGS{ref:CGM21, author = {Mahdi Cheraghchi and Ryan Gabrys and Olgica Milenkovic}, title = {Semiquantitative Group Testing in at Most Two Rounds}, year = 2021, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, url_Paper = {https://arxiv.org/abs/2102.04519}, url_Link = {https://ieeexplore.ieee.org/document/9518270}, doi = {10.1109/ISIT45174.2021.9518270}, abstract = {Semiquantitative group testing (SQGT) is a pooling method in which the test outcomes represent bounded intervals for the number of defectives. Alternatively, it may be viewed as an adder channel with quantized outputs. SQGT represents a natural choice for Covid-19 group testing as it allows for a straightforward interpretation of the cycle threshold values produced by polymerase chain reactions (PCR). Prior work on SQGT did not address the need for adaptive testing with a small number of rounds as required in practice. We propose conceptually simple methods for $2$-round and nonadaptive SQGT that significantly improve upon existing schemes by using ideas on nonbinary measurement matrices based on expander graphs and list-disjunct matrices.} }
@INPROCEEDINGS{ref:CDRV21, author = {Mahdi Cheraghchi and Joseph Downs and Jo\~{a}o Ribeiro and Alexandra Veliche}, title = {Mean-Based Trace Reconstruction over Practically any Replication-Insertion Channel}, year = 2021, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, url_Paper = {https://arxiv.org/abs/2102.09490}, url_Link = {https://ieeexplore.ieee.org/document/9518161}, doi = {10.1109/ISIT45174.2021.9518161}, abstract = {Mean-based reconstruction is a fundamental, natural approach to worst-case trace reconstruction over channels with synchronization errors. It is known that $\exp(O(n^{1/3}))$ traces are necessary and sufficient for mean-based worst-case trace reconstruction over the deletion channel, and this result was also extended to certain channels combining deletions and geometric insertions of uniformly random bits. In this work, we use a simple extension of the original complex-analytic approach to show that these results are examples of a much more general phenomenon: $\exp(O(n^{1/3}))$ traces suffice for mean-based worst-case trace reconstruction over any memoryless channel that maps each input bit to an arbitrarily distributed sequence of replications and insertions of random bits, provided the length of this sequence follows a sub-exponential distribution.} }
@INPROCEEDINGS{ref:BCN21, author = {Thach V. Bui and Mahdi Cheraghchi and Thuc D. Nguyen}, title = {Improved Algorithms for Non-adaptive Group Testing with Consecutive Positives}, year = 2021, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, url_Paper = {https://arxiv.org/abs/2101.11294}, url_Link = {https://ieeexplore.ieee.org/document/9518277}, doi = {10.1109/ISIT45174.2021.9518277}, abstract = {The goal of group testing is to efficiently identify a few specific items, called positives, in a large population of items via tests. A test is an action on a subset of items that returns positive if the subset contains at least one positive and negative otherwise. In non-adaptive group testing, all tests are independent, can be performed in parallel, and represented as a measurement matrix. In this work, we consider non-adaptive group testing with consecutive positives in which the items are linearly ordered and the positives are consecutive in that order. We present two algorithms for efficiently identifying consecutive positives. In particular, without storing measurement matrices, we can identify up to $d$ consecutive positives with $2 \log_2{\frac{n}{d}} + 2d$ ($4 \log_2{\frac{n}{d}} + 2d$, resp.) tests in $O( \log_2^2{\frac{n}{d}} + d)$ ($O( \log_2{\frac{n}{d}} + d)$, resp.) time. These results significantly improve the state-of-the-art scheme in which it takes $5 \log_2{\frac{n}{d}} + 2d + 21$ tests to identify the positives in $O( \frac{n}{d} \log_2{\frac{n}{d}} + d^2)$ time with the measurement matrices associated with the scheme stored somewhere. } }
@INPROCEEDINGS{ref:CGJWX21, author = {Mahdi Cheraghchi and Elena Grigorescu and Brendan Juba and Karl Wimmer and Ning Xie}, title = {List Learning with Attribute Noise}, year = 2021, booktitle = {Proceedings of The 24th International Conference on Artificial Intelligence and Statistics (AISTATS)}, pages = {2215--2223}, editor = {Banerjee, Arindam and Fukumizu, Kenji}, volume = {130}, series = {Proceedings of Machine Learning Research}, month = {13--15 Apr}, publisher = {PMLR}, url_Link = {http://proceedings.mlr.press/v130/cheraghchi21a.html}, url_Paper = {https://arxiv.org/abs/2006.06850}, abstract = {We introduce and study the model of list learning with attribute noise. Learning with attribute noise was introduced by Shackelford and Volper (COLT 1988) as a variant of PAC learning, in which the algorithm has access to noisy examples and uncorrupted labels, and the goal is to recover an accurate hypothesis. Sloan (COLT 1988) and Goldman and Sloan (Algorithmica 1995) discovered information-theoretic limits to learning in this model, which have impeded further progress. In this article we extend the model to that of list learning, drawing inspiration from the list-decoding model in coding theory, and its recent variant studied in the context of learning. On the positive side, we show that sparse conjunctions can be efficiently list learned under some assumptions on the underlying ground-truth distribution. On the negative side, our results show that even in the list-learning model, efficient learning of parities and majorities is not possible regardless of the representation used.} }
@INPROCEEDINGS{ref:conf:CHMY21, author = {Mahdi Cheraghchi and Shuichi Hirahara and Dimitrios Myrisiotis and Yuichi Yoshida}, title = {One-tape Turing machine and branching program lower bounds for {MCSP}}, booktitle = {Proceedings of the 38th {International Symposium on Theoretical Aspects of Computer Science (STACS)}}, year = 2021, keywords = {Minimum Circuit Size Problem, One-Tape Turing Machines, Branching Programs, Lower Bounds, Pseudorandom Generators, Hitting Set Generators}, doi = {10.4230/LIPIcs.STACS.2021.23}, url_Link = {https://drops.dagstuhl.de/opus/volltexte/2021/13668/}, url_Paper = {https://eccc.weizmann.ac.il/report/2020/103/}, note = {Invited to the special issue of Theory of Computing Systems.}, abstract = {For a size parameter $s:\mathbb{N}\to\mathbb{N}$, the Minimum Circuit Size Problem (denoted by $MCSP[s(n)]$) is the problem of deciding whether the minimum circuit size of a given function $f: \{0,1\}^n \to \{0,1\}$ (represented by a string of length $N := 2^n$) is at most a threshold $s(n)$. A recent line of work exhibited ``hardness magnification'' phenomena for MCSP: A very weak lower bound for MCSP implies a breakthrough result in complexity theory. For example, McKay, Murray, and Williams (STOC 2019) implicitly showed that, for some constant $\mu_1 > 0$, if $MCSP[2^{\mu_1\cdot n}]$ cannot be computed by a one-tape Turing machine (with an additional one-way read-only input tape) running in time $N^{1.01}$, then $P\neq NP$. In this paper, we present the following new lower bounds against one-tape Turing machines and branching programs: 1) A randomized two-sided error one-tape Turing machine (with an additional one-way read-only input tape) cannot compute $MCSP[2^{\mu_2\cdot n}]$ in time $N^{1.99}$, for some constant $\mu_2 > \mu_1$. 2) A non-deterministic (or parity) branching program of size $o(N^{1.5}/\log N)$ cannot compute MKTP, which is a time-bounded Kolmogorov complexity analogue of MCSP. This is shown by directly applying the Ne\v{c}iporuk method to MKTP, which previously appeared to be difficult. These results are the first non-trivial lower bounds for MCSP and MKTP against one-tape Turing machines and non-deterministic branching programs, and essentially match the best-known lower bounds for any explicit functions against these computational models. The first result is based on recent constructions of pseudorandom generators for read-once oblivious branching programs (ROBPs) and combinatorial rectangles (Forbes and Kelley, FOCS 2018; Viola 2019). En route, we obtain several related results: 1)There exists a (local) hitting set generator with seed length $\widetilde{O}(\sqrt{N})$ secure against read-once polynomial-size non-deterministic branching programs on $N$-bit inputs. 2) Any read-once co-non-deterministic branching program computing MCSP must have size at least $2^{\widetilde{\Omega}(N)}$. } }
@ARTICLE{ref:CR21, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {An Overview of Capacity Results for Synchronization Channels}, journal = {IEEE Transactions on Information Theory}, doi = {10.1109/TIT.2020.2997329}, year={2021}, volume={67}, number={6}, pages={3207--3232}, url_Link = {https://ieeexplore.ieee.org/document/9099526}, url_Paper = {https://arxiv.org/abs/1910.07199}, abstract = {Synchronization channels, such as the well-known deletion channel, are surprisingly harder to analyze than memoryless channels, and they are a source of many fundamental problems in information theory and theoretical computer science. One of the most basic open problems regarding synchronization channels is the derivation of an exact expression for their capacity. Unfortunately, most of the classic information-theoretic techniques at our disposal fail spectacularly when applied to synchronization channels. Therefore, new approaches must be considered to tackle this problem. This survey gives an account of the great effort made over the past few decades to better understand the (broadly defined) capacity of synchronization channels, including both the main results and the novel techniques underlying them. Besides the usual notion of channel capacity, we also discuss the zero-error capacity of adversarial synchronization channels. } }
@UNPUBLISHED{ref:GPRRCGM20, author = {Ryan Gabrys and Srilakshmi Pattabiraman and Vishal Rana and Jo\~{a}o Ribeiro and Mahdi Cheraghchi and Venkatesan Guruswami and Olgica Milenkovic}, title = {{AC-DC}: Amplification Curve Diagnostics for {Covid-19} Group Testing}, year = {2020}, eprint = {2011.05223}, archivePrefix ={arXiv}, primaryClass = {q-bio.QM}, note = {arXiv:2011.05223}, url_Paper = {https://arxiv.org/abs/2011.05223}, abstract = {The first part of the paper presents a review of the gold-standard testing protocol for Covid-19, real-time, reverse transcriptase PCR, and its properties and associated measurement data such as amplification curves that can guide the development of appropriate and accurate adaptive group testing protocols. The second part of the paper is concerned with examining various off-the-shelf group testing methods for Covid-19 and identifying their strengths and weaknesses for the application at hand. The third part of the paper contains a collection of new analytical results for adaptive semiquantitative group testing with probabilistic and combinatorial priors, including performance bounds, algorithmic solutions, and noisy testing protocols. The probabilistic setting is of special importance as it is designed to be simple to implement by nonexperts and handle heavy hitters. The worst-case paradigm extends and improves upon prior work on semiquantitative group testing with and without specialized PCR noise models.} }
@INPROCEEDINGS{ref:CN20, author = {Mahdi Cheraghchi and Vasileios Nakos}, title = {Combinatorial Group Testing Schemes with Near-Optimal Decoding Time}, booktitle = {Proceedings of the 61st Annual {IEEE} Symposium on Foundations of Computer Science {(FOCS)}}, year = 2020, doi = {10.1109/FOCS46700.2020.00115}, url_Link = {https://ieeexplore.ieee.org/document/9317982}, url_Paper = {https://arxiv.org/abs/2006.08420}, abstract = {In the long-studied problem of combinatorial group testing, one is asked to detect a set of $k$ defective items out of a population of size $n$, using $m \ll n$ disjunctive measurements. In the non-adaptive setting, the most widely used combinatorial objects are disjunct and list-disjunct matrices, which define incidence matrices of test schemes. Disjunct matrices allow the identification of the exact set of defectives, whereas list disjunct matrices identify a small superset of the defectives. Apart from the combinatorial guarantees, it is often of key interest to equip measurement designs with efficient decoding algorithms. The most efficient decoders should run in sublinear time in $n$, and ideally near-linear in the number of measurements $m$. In this work, we give several constructions with an optimal number of measurements and near-optimal decoding time for the most fundamental group testing tasks, as well as for central tasks in the compressed sensing and heavy hitters literature. For many of those tasks, the previous measurement-optimal constructions needed time either quadratic in the number of measurements or linear in the universe size. Among our results are the following: a construction of disjunct matrices matching the best known construction in terms of the number of rows $m$, but achieving $O(m)$ decoding time. a construction of list disjunct matrices with the optimal $m=O(k \log(n/k))$ number of rows and nearly linear decoding time in $m$; error-tolerant variations of the above constructions; a non-adaptive group testing scheme for the "for-each" model with $m=O(k \log n)$ measurements and $O(m)$ decoding time; a streaming algorithm for the "for-all" version of the heavy hitters problem in the strict turnstile model with near-optimal query time, as well as a "list decoding" variant obtaining also near-optimal update time and $O(k\log(n/k))$ space usage; an $\ell_2/\ell_2$ weak identification system for compressed sensing with nearly optimal sample complexity and nearly linear decoding time in the sketch length. Most of our results are obtained via a clean and novel approach which avoids list-recoverable codes or related complex techniques which were present in almost every state-of-the-art work on efficiently decodable constructions of such objects. } }
@INPROCEEDINGS{ref:LCGSW20, author = {Fuchun Lin and Mahdi Cheraghchi and Venkatesan Guruswami and Reihaneh Safavi-Naini and Huaxiong Wang}, title = {Leakage-Resilient Secret Sharing in Non-compartmentalized Models}, year = 2020, booktitle = {Proceedings of the {Conference on Information-Theoretic Cryptography (ITC)}}, pages = {7:1--7:24}, volume = 163, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, url_Link = {https://drops.dagstuhl.de/opus/volltexte/2020/12112}, url_Paper = {https://arxiv.org/abs/1902.06195}, doi = {10.4230/LIPIcs.ITC.2020.7}, keywords = {Leakage-resilient cryptography, Secret sharing scheme, Randomness extractor}, abstract = {Non-malleable secret sharing was recently proposed by Goyal and Kumar in independent tampering and joint tampering models for threshold secret sharing (STOC18) and secret sharing with general access structure (CRYPTO18). The idea of making secret sharing non-malleable received great attention and by now has generated many papers exploring new frontiers in this topic, such as multiple-time tampering and adding leakage resiliency to the one-shot tampering model. Non-compartmentalized tampering model was first studied by Agrawal et.al (CRYPTO15) for non-malleability against permutation composed with bit-wise independent tampering, and shown useful in constructing non-malleable string commitments. In spite of strong demands in application, there are only a few tampering families studied in non-compartmentalized model, due to the fact that compartmentalization (assuming that the adversary can not access all pieces of sensitive data at the same time) is crucial for most of the known techniques. We initiate the study of leakage-resilient secret sharing in the non-compartmentalized model. Leakage in leakage-resilient secret sharing is usually modelled as arbitrary functions with bounded total output length applied to each share or up to a certain number of shares (but never the full share vector) at one time. Arbitrary leakage functions, even with one bit output, applied to the full share vector is impossible to resist since the reconstruction algorithm itself can be used to construct a contradiction. We allow the leakage functions to be applied to the full share vector (non-compartmentalized) but restrict to the class of affine leakage functions. The leakage adversary can corrupt several players and obtain their shares, as in normal secret sharing. The leakage adversary can apply arbitrary affine functions with bounded total output length to the full share vector and obtain the outputs as leakage. These two processes can be both non-adaptive and do not depend on each other, or both adaptive and depend on each other with arbitrary ordering. We use a generic approach that combines randomness extractors with error correcting codes to construct such leakage-resilient secret sharing schemes, and achieve constant information ratio (the scheme for non-adaptive adversary is near optimal). We then explore making the non-compartmentalized leakage-resilient secret sharing also non-malleable against tampering. We consider a tampering model, where the adversary can use the shares obtained from the corrupted players and the outputs of the global leakage functions to choose a tampering function from a tampering family $\mathcal{F}$. We give two constructions of such leakage-resilient non-malleable secret sharing for the case $\mathcal{F}$ is the bit-wise independent tampering and, respectively, for the case $\mathcal{F}$ is the affine tampering functions, the latter is non-compartmentalized tampering that subsumes the permutation composed with bit-wise independent tampering mentioned above. } }
@INPROCEEDINGS{ref:BCE20, author = {Thach V. Bui and Mahdi Cheraghchi and Isao Echizen}, title = {Improved Non-adaptive algorithms for Threshold Group Testing with a Gap}, year = 2020, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, doi = {10.1109/ISIT44484.2020.9174212}, url_Link = {https://ieeexplore.ieee.org/document/9174212}, url_Paper = {https://arxiv.org/abs/2001.01008}, abstract = {The basic goal of threshold group testing is to identify up to $d$ defective items among a population of $n$ items, where $d$ is usually much smaller than $n$. The outcome of a test on a subset of items is positive if the subset has at least $u$ defective items, negative if it has up to $\ell$ defective items, where $0 \leq \ell < u$, and arbitrary otherwise. This is called threshold group testing. The parameter $g = u - \ell - 1$ is called \textit{the gap}. In this paper, we focus on the case $g > 0$, i.e., threshold group testing with a gap. Note that the results presented here are also applicable to the case $g = 0$; however, the results are not as efficient as those in related work. Currently, a few reported studies have investigated test designs and decoding algorithms for identifying defective items. Most of the previous studies have not been feasible because there are numerous constraints on their problem settings or the decoding complexities of their proposed schemes are relatively large. Therefore, it is compulsory to reduce the number of tests as well as the decoding complexity, i.e., the time for identifying the defective items, for achieving practical schemes. The work presented here makes five contributions. The first is a more accurate theorem for a non-adaptive algorithm for threshold group testing proposed by Chen and Fu. The second is an improvement in the construction of disjunct matrices, which are the main tools for tackling (threshold) group testing and other tasks such as constructing cover-free families or learning hidden graphs. Specifically, we present a better upper bound on the number of tests for disjunct matrices compared with that in related work. The third and fourth contributions are a reduction in the number of tests and a reduction in the decoding time for identifying defective items in a noisy setting on test outcomes. The fifth contribution is a simulation on the number of tests of the resulting improvements for previous work and the proposed theorems. } }
@ARTICLE{ref:CGMR20, author = {Mahdi Cheraghchi and Ryan Gabrys and Olgica Milenkovic and Jo\~{a}o Ribeiro}, title = {Coded Trace Reconstruction}, year = 2020, journal = {IEEE Transactions on Information Theory}, note = {Preliminary version in Proceedings of {ITW 2019}.}, doi = {10.1109/TIT.2020.2996377}, volume={66}, number={10}, pages={6084-6103}, url_Link = {https://ieeexplore.ieee.org/document/9097932}, url_Paper = {https://arxiv.org/abs/1903.09992}, abstract = {Motivated by average-case trace reconstruction and coding for portable DNA-based storage systems, we initiate the study of \textit{coded trace reconstruction}, the design and analysis of high-rate efficiently encodable codes that can be efficiently decoded with high probability from few reads (also called \textit{traces}) corrupted by edit errors. Codes used in current portable DNA-based storage systems with nanopore sequencers are largely based on heuristics, and have no provable robustness or performance guarantees even for an error model with i.i.d. deletions and constant deletion probability. Our work is a first step towards the design of efficient codes with provable guarantees for such systems. We consider a constant rate of i.i.d. deletions, and perform an analysis of marker-based code-constructions. This gives rise to codes with redundancy $O(n/\log n)$ (resp. $O(n/\log\log n)$) that can be efficiently reconstructed from $\exp(O(\log^{2/3}n))$ (resp. $\exp(O(\log\log n)^{2/3})$) traces, where $n$ is the message length. Then, we give a construction of a code with $O(\log n)$ bits of redundancy that can be efficiently reconstructed from $\mathrm{poly}(n)$ traces if the deletion probability is small enough. Finally, we show how to combine both approaches, giving rise to an efficient code with $O(n/\log n)$ bits of redundancy which can be reconstructed from $\mathrm{poly}(\log n)$ traces for a small constant deletion probability. } }
@ARTICLE{ref:CKLM20, title = {Circuit Lower Bounds for {MCSP} from Local Pseudorandom Generators}, author = {Mahdi Cheraghchi and Valentine Kabanets and Zhenjian Lu and Dimitrios Myrisiotis}, year = 2020, journal = {{ACM Transactions on Computation Theory (ToCT)}}, volume = 12, number = 3, articleno = 21, note = {Preliminary version in {Proceedings of ICALP 2019}.}, url_Paper = {https://eccc.weizmann.ac.il/report/2019/022}, url_Link = {https://dl.acm.org/doi/abs/10.1145/3404860}, doi = {10.1145/3404860}, abstract = {The Minimum Circuit Size Problem (MCSP) asks if a given truth table of a Boolean function $f$ can be computed by a Boolean circuit of size at most $\theta$, for a given parameter $\theta$. We improve several circuit lower bounds for MCSP, using pseudorandom generators (PRGs) that are local; a PRG is called \textit{local} if its output bit strings, when viewed as the truth table of a Boolean function, can be computed by a Boolean circuit of small size. We get new and improved lower bounds for MCSP that almost match the best-known lower bounds against several circuit models. Specifically, we show that computing MCSP, on functions with a truth table of length $N$, requires: 1) $N^{3-o(1)}$-size de Morgan formulas, improving the recent $N^{2-o(1)}$ lower bound by Hirahara and Santhanam (CCC 2017), 2) $N^{2-o(1)}$-size formulas over an arbitrary basis or general branching programs (no non-trivial lower bound was known for MCSP against these models), and 3) $2^{\Omega(N^{1/(d+1.01)})}$-size depth-$d$ $\mathrm{AC}^0$ circuits, improving the (implicit, in their work) exponential size lower bound by Allender et al. (SICOMP 2006). The $\mathrm{AC}^0$ lower bound stated above matches the best-known $\mathrm{AC}^0$ lower bound (for {\tt PARITY}) up to a small \textit{additive} constant in the depth. Also, for the special case of depth-$2$ circuits (i.e., CNFs or DNFs), we get an optimal lower bound of \(2^{{\Omega}(N)}\) for MCSP. }, keywords = {Minimum circuit size problem (MCSP), circuit lower bounds, pseudorandom generators (PRGs), constant-depth circuits, branching programs, local PRGs, de Morgan formulas} }
@INPROCEEDINGS{ref:conf:CGMR19, author = {Mahdi Cheraghchi and Ryan Gabrys and Olgica Milenkovic and Jo\~{a}o Ribeiro}, title = {Coded Trace Reconstruction}, year = 2019, booktitle = {Proceedings of the {IEEE Information Theory Workshop (ITW)}}, pages = {1--5}, note = {Extended version in {IEEE Transactions on Information Theory}.}, doi = {10.1109/ITW44776.2019.8989261}, url_Link = {https://ieeexplore.ieee.org/document/8989261}, url_Paper = {https://arxiv.org/abs/1903.09992}, abstract = {Motivated by average-case trace reconstruction and coding for portable DNA-based storage systems, we initiate the study of \textit{coded trace reconstruction}, the design and analysis of high-rate efficiently encodable codes that can be efficiently decoded with high probability from few reads (also called \textit{traces}) corrupted by edit errors. Codes used in current portable DNA-based storage systems with nanopore sequencers are largely based on heuristics, and have no provable robustness or performance guarantees even for an error model with i.i.d. deletions and constant deletion probability. Our work is a first step towards the design of efficient codes with provable guarantees for such systems. We consider a constant rate of i.i.d. deletions, and perform an analysis of marker-based code-constructions. This gives rise to codes with redundancy $O(n/\log n)$ (resp. $O(n/\log\log n)$) that can be efficiently reconstructed from $\exp(O(\log^{2/3}n))$ (resp. $\exp(O(\log\log n)^{2/3})$) traces, where $n$ is the message length. Then, we give a construction of a code with $O(\log n)$ bits of redundancy that can be efficiently reconstructed from $\mathrm{poly}(n)$ traces if the deletion probability is small enough. Finally, we show how to combine both approaches, giving rise to an efficient code with $O(n/\log n)$ bits of redundancy which can be reconstructed from $\mathrm{poly}(\log n)$ traces for a small constant deletion probability. } }
@INPROCEEDINGS{ref:conf:CKLM19, author = {Mahdi Cheraghchi and Valentine Kabanets and Zhenjian Lu and Dimitrios Myrisiotis}, title = {Circuit Lower Bounds for {MCSP} from Local Pseudorandom Generators}, year = 2019, booktitle = {Proceedings of the 46th {International Colloquium on Automata, Languages and Programming (ICALP)}}, pages = {39:1--39:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, volume = 132, note = {Extended version to appear in {ACM Transactions on Computation Theory (ToCT)}.}, doi = {10.4230/LIPIcs.ICALP.2019.39}, url_Link = {https://drops.dagstuhl.de/opus/frontdoor.php?source_opus=10615}, url_Paper = {https://eccc.weizmann.ac.il/report/2019/022}, abstract = {The Minimum Circuit Size Problem (MCSP) asks if a given truth table of a Boolean function $f$ can be computed by a Boolean circuit of size at most $\theta$, for a given parameter $\theta$. We improve several circuit lower bounds for MCSP, using pseudorandom generators (PRGs) that are local; a PRG is called \textit{local} if its output bit strings, when viewed as the truth table of a Boolean function, can be computed by a Boolean circuit of small size. We get new and improved lower bounds for MCSP that almost match the best-known lower bounds against several circuit models. Specifically, we show that computing MCSP, on functions with a truth table of length $N$, requires: 1) $N^{3-o(1)}$-size de Morgan formulas, improving the recent $N^{2-o(1)}$ lower bound by Hirahara and Santhanam (CCC 2017), 2) $N^{2-o(1)}$-size formulas over an arbitrary basis or general branching programs (no non-trivial lower bound was known for MCSP against these models), and 3) $2^{\Omega(N^{1/(d+1.01)})}$-size depth-$d$ $\mathrm{AC}^0$ circuits, improving the (implicit, in their work) exponential size lower bound by Allender et al. (SICOMP 2006). The $\mathrm{AC}^0$ lower bound stated above matches the best-known $\mathrm{AC}^0$ lower bound (for {\tt PARITY}) up to a small \textit{additive} constant in the depth. Also, for the special case of depth-$2$ circuits (i.e., CNFs or DNFs), we get an optimal lower bound of \(2^{{\Omega}(N)}\) for MCSP. }, keywords = {minimum circuit size problem (MCSP), circuit lower bounds, pseudorandom generators (PRGs), local PRGs, de Morgan formulas, branching programs, constant depth circuits} }
@INPROCEEDINGS{ref:LSCW19, author = {Fuchun Lin and Reihaneh Safavi-Naini and Mahdi Cheraghchi and Huaxiong Wang}, title = {Non-Malleable Codes against Active Physical Layer Adversary}, year = 2019, doi = {10.1109/ISIT.2019.8849438}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, pages = {2753--2757}, url_Link = {https://ieeexplore.ieee.org/document/8849438}, abstract = {Non-malleable codes are randomized codes that protect coded messages against modification by functions in a tampering function class. These codes are motivated by providing tamper resilience in applications where a cryptographic secret is stored in a tamperable storage device and the protection goal is to ensure that the adversary cannot benefit from their physical tampering with the device. In this paper we consider nonmalleable codes for protection of secure communication against active physical layer adversaries. We define a class of functions that closely model tampering of communication by adversaries who can eavesdrop on a constant fraction of the transmitted codeword, and use this information to select a vector of tampering functions that will be applied to a second constant fraction of codeword components (possibly overlapping with the first set). We derive rate bounds for non-malleable codes for this function class and give a modular construction that adapts and provides new analysis for an existing construction in the new setting. We discuss our results and directions for future work. } }
@INPROCEEDINGS{ref:CR19b, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {Simple Codes and Sparse Recovery with Fast Decoding}, year = 2019, doi = {10.1109/ISIT.2019.8849702}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, pages = {156--160}, url_Link = {https://ieeexplore.ieee.org/document/8849702}, url_Paper = {https://arxiv.org/abs/1901.02852}, abstract = {Construction of error-correcting codes achieving a designated minimum distance parameter is a central problem in coding theory. A classical and algebraic family of error-correcting codes studied for this purpose are the BCH codes. In this work, we study a very simple construction of linear codes that achieve a given distance parameter $K$. Moreover, we design a simple, nearly optimal syndrome decoder for the code as well. The running time of the decoder is only logarithmic in the block length of the code, and nearly linear in the distance parameter $K$. This decoder can be applied to exact for-all sparse recovery over any field, improving upon previous results with the same number of measurements. Furthermore, computation of the syndrome from a received word can be done in nearly linear time in the block length. We also demonstrate an application of these techniques in non-adaptive group testing, and construct simple explicit measurement schemes with $O(K^2 \log^2 N)$ tests and $O(K^3 \log^2 N)$ recovery time for identifying up to $K$ defectives in a population of size $N$. } }
@INPROCEEDINGS{ref:LCGSW19, author = {Fuchun Lin and Mahdi Cheraghchi and Venkatesan Guruswami and Reihaneh Safavi-Naini and Huaxiong Wang}, title = {Secret Sharing with Binary Shares}, year = 2019, doi = {10.4230/LIPIcs.ITCS.2019.53}, booktitle = {Proceedings of the 10th {Innovations in Theoretical Computer Science Conference (ITCS)}}, pages = {53:1--53:20}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, volume = 124, url_Link = {https://drops.dagstuhl.de/opus/frontdoor.php?source_opus=10146}, url_Paper = {https://arxiv.org/abs/1808.02974}, abstract = {Shamir's celebrated secret sharing scheme provides an efficient method for encoding a secret of arbitrary length $\ell$ among any $N \leq 2^\ell$ players such that for a threshold parameter $t$, (i) the knowledge of any $t$ shares does not reveal any information about the secret and, (ii) any choice of $t+1$ shares fully reveals the secret. It is known that any such threshold secret sharing scheme necessarily requires shares of length $\ell$, and in this sense Shamir's scheme is optimal. The more general notion of ramp schemes requires the reconstruction of secret from any $t+g$ shares, for a positive integer gap parameter $g$. Ramp secret sharing scheme necessarily requires shares of length $\ell/g$. Other than the bound related to secret length $\ell$, the share lengths of ramp schemes can not go below a quantity that depends only on the gap ratio $g/N$. In this work, we study secret sharing in the extremal case of bit-long shares and arbitrarily small gap ratio $g/N$, where standard ramp secret sharing becomes impossible. We show, however, that a slightly relaxed but equally effective notion of semantic security for the secret, and negligible reconstruction error probability, eliminate the impossibility. Moreover, we provide explicit constructions of such schemes. One of the consequences of our relaxation is that, unlike standard ramp schemes with perfect secrecy, adaptive and non-adaptive adversaries need different analysis and construction. For non-adaptive adversaries, we explicitly construct secret sharing schemes that provide secrecy against any $\tau$ fraction of observed shares, and reconstruction from any $\rho$ fraction of shares, for any choices of $0 \leq \tau < \rho \leq 1$. Our construction achieves secret length $N(\rho-\tau-o(1))$, which we show to be optimal. For adaptive adversaries, we construct explicit schemes attaining a secret length $\Omega(N(\rho-\tau))$. We discuss our results and open questions. }, keywords = {Secret sharing scheme, Wiretap channel} }
@ARTICLE{ref:CR19:sticky, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {Sharp Analytical Capacity Upper Bounds for Sticky and Related Channels}, year = 2019, journal = {IEEE Transactions on Information Theory}, volume = 65, number = 11, pages = {6950--6974}, note = {Preliminary version in {Proceedings of Allerton 2018}.}, doi = {10.1109/TIT.2019.2920375}, url_Link = {https://ieeexplore.ieee.org/document/8727914}, url_Paper = {https://arxiv.org/abs/1806.06218}, abstract = {We study natural examples of binary channels with synchronization errors. These include the duplication channel, which independently outputs a given bit once or twice, and geometric channels that repeat a given bit according to a geometric rule, with or without the possibility of bit deletion. We apply the general framework of Cheraghchi (STOC 2018) to obtain sharp analytical upper bounds on the capacity of these channels. Previously, upper bounds were known via numerical computations involving the computation of finite approximations of the channels by a computer and then using the obtained numerical results to upper bound the actual capacity. While leading to sharp numerical results, further progress on the full understanding of the channel capacity inherently remains elusive using such methods. Our results can be regarded as a major step towards a complete understanding of the capacity curves. Quantitatively, our upper bounds sharply approach, and in some cases surpass, the bounds that were previously only known by purely numerical methods. Among our results, we notably give a completely analytical proof that, when the number of repetitions per bit is geometric (supported on $\{0,1,2,\dots\}$) with mean growing to infinity, the channel capacity remains substantially bounded away from $1$. } }
@ARTICLE{ref:Che19:del, author = {Mahdi Cheraghchi}, title = {Capacity Upper Bounds for Deletion-type Channels}, year = 2019, journal = {Journal of the {ACM}}, volume = 66, number = 2, month = {Mar}, articleno = 9, numpages = 79, note = {Preliminary version in Proceedings of {STOC 2018}.}, doi = {10.1145/3281275}, url_Link = {https://dl.acm.org/doi/10.1145/3281275}, url_Paper = {https://arxiv.org/abs/1711.01630}, abstract = {We develop a systematic approach, based on convex programming and real analysis, for obtaining upper bounds on the capacity of the binary deletion channel and, more generally, channels with i.i.d. insertions and deletions. Other than the classical deletion channel, we give a special attention to the Poisson-repeat channel introduced by Mitzenmacher and Drinea (IEEE Transactions on Information Theory, 2006). Our framework can be applied to obtain capacity upper bounds for any repetition distribution (the deletion and Poisson-repeat channels corresponding to the special cases of Bernoulli and Poisson distributions). Our techniques essentially reduce the task of proving capacity upper bounds to maximizing a univariate, real-valued, and often concave function over a bounded interval. The corresponding univariate function is carefully designed according to the underlying distribution of repetitions and the choices vary depending on the desired strength of the upper bounds as well as the desired simplicity of the function (e.g., being only efficiently computable versus having an explicit closed-form expression in terms of elementary, or common special, functions). Among our results, we show the following: 1) The capacity of the binary deletion channel with deletion probability $d$ is at most $(1-d) \log \phi$ for $d \geq 1/2$, and, assuming that the capacity function is convex, is at most $1-d \log(4/\phi)$ for $d<1/2$, where $\phi=(1+\sqrt{5})/2$ is the golden ratio. This is the first nontrivial capacity upper bound for any value of $d$ outside the limiting case $d \to 0$ that is fully explicit and proved without computer assistance. 2) We derive the first set of capacity upper bounds for the Poisson-repeat channel. Our results uncover further striking connections between this channel and the deletion channel, and suggest, somewhat counter-intuitively, that the Poisson-repeat channel is actually analytically simpler than the deletion channel and may be of key importance to a complete understanding of the deletion channel. 3) We derive several novel upper bounds on the capacity of the deletion channel. All upper bounds are maximums of efficiently computable, and concave, univariate real functions over a bounded domain. In turn, we upper bound these functions in terms of explicit elementary and standard special functions, whose maximums can be found even more efficiently (and sometimes, analytically, for example for $d=1/2$). }, keywords = {Coding theory, Synchronization error-correcting codes, Channel coding} }
@ARTICLE{ref:CR19:poi, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {Improved Upper Bounds and Structural Results on the Capacity of the Discrete-Time {Poisson} Channel}, year = 2019, journal = {IEEE Transactions on Information Theory}, volume = 65, number = 7, pages = {4052--4068}, note = {Preliminary version in Proceedings of {ISIT 2018}.}, doi = {10.1109/TIT.2019.2896931}, url_Link = {https://ieeexplore.ieee.org/document/8632953}, url_Paper = {https://arxiv.org/abs/1801.02745}, abstract = {New capacity upper bounds are presented for the discrete-time Poisson channel with no dark current and an average-power constraint. These bounds are a consequence of techniques developed for the seemingly unrelated problem of upper bounding the capacity of binary deletion and repetition channels. Previously, the best known capacity upper bound in the regime where the average-power constraint does not approach zero was due to Martinez (JOSA B, 2007), which is re-derived as a special case of the framework developed in this paper. Furthermore, this framework is carefully instantiated in order to obtain a closed-form bound that improves the result of Martinez everywhere. Finally, capacity-achieving distributions for the discrete-time Poisson channel are studied under an average-power constraint and/or a peak-power constraint and arbitrary dark current. In particular, it is shown that the support of the capacity-achieving distribution under an average-power constraint must only be countably infinite. This settles a conjecture of Shamai (IEE Proceedings I, 1990) in the affirmative. Previously, it was only known that the support must be an unbounded set. } }
@ARTICLE{ref:Che19:entropy, author = {Mahdi Cheraghchi}, title = {Expressions for the Entropy of Basic Discrete Distributions}, year = 2019, journal = {{IEEE Transactions on Information Theory}}, volume = 65, number = 7, pages = {3999--4009}, note = {Preliminary version in Proceedings of {ISIT 2018}.}, doi = {10.1109/TIT.2019.2900716}, url_Link = {https://ieeexplore.ieee.org/document/8648470}, url_Paper = {https://arxiv.org/abs/1708.06394}, abstract = {We develop a general method for computing logarithmic and log-gamma expectations of distributions. As a result, we derive series expansions and integral representations of the entropy for several fundamental distributions, including the Poisson, binomial, beta-binomial, negative binomial, and hypergeometric distributions. Our results also establish connections between the entropy functions and to the Riemann zeta function and its generalizations. } }
@ARTICLE{ref:BKCE19, author = {Thach V. Bui and Minoru Kuribayashi and Mahdi Cheraghchi and Isao Echizen}, title = {Efficiently Decodable Non-Adaptive Threshold Group Testing}, year = 2019, journal = {{IEEE Transactions on Information Theory}}, volume = 65, number = 9, pages = {5519--5528}, note = {Preliminary version in Proceedings of {ISIT 2018}.}, doi = {10.1109/TIT.2019.2907990}, url_Link = {https://ieeexplore.ieee.org/document/8676252}, url_Paper = {https://arxiv.org/abs/1712.07509}, abstract = {We consider non-adaptive threshold group testing for identification of up to $d$ defective items in a set of $n$ items, where a test is positive if it contains at least $2 \leq u \leq d$ defective items, and negative otherwise. The defective items can be identified using $t = O ( ( \frac{d}{u} )^u ( \frac{d}{d - u} )^{d-u} (u \log{\frac{d}{u}} + \log{\frac{1}{\epsilon}} ) \cdot d^2 \log{n} )$ tests with probability at least $1 - \epsilon$ for any $\epsilon > 0$ or $t = O ( ( \frac{d}{u} )^u ( \frac{d}{d -u} )^{d - u} d^3 \log{n} \cdot \log{\frac{n}{d}} )$ tests with probability 1. The decoding time is $t \times \mathrm{poly}(d^2 \log{n})$. This result significantly improves the best known results for decoding non-adaptive threshold group testing: $O(n\log{n} + n \log{\frac{1}{\epsilon}})$ for probabilistic decoding, where $\epsilon > 0$, and $O(n^u \log{n})$ for deterministic decoding. } }
@ARTICLE{ref:Che19:SS, author = {Mahdi Cheraghchi}, title = {Nearly optimal robust secret sharing}, journal = {Designs, Codes and Cryptography}, volume = 87, number = 8, pages = {1777--1796}, year = 2019, doi = {10.1007/s10623-018-0578-y}, url_Link = {https://link.springer.com/article/10.1007/s10623-018-0578-y}, keywords = {Coding and information theory, Cryptography, Algebraic coding theory}, abstract = {We prove that a known approach to improve Shamir's celebrated secret sharing scheme; i.e., adding an information-theoretic authentication tag to the secret, can make it robust for $n$ parties against any collusion of size $\delta n$, for any constant $\delta \in (0, 1/2)$. This result holds in the so-called ``non-rushing'' model in which the $n$ shares are submitted simultaneously for reconstruction. We thus obtain a fully explicit and robust secret sharing scheme in this model that is essentially optimal in all parameters including the share size which is $k(1+o(1)) + O(\kappa)$, where $k$ is the secret length and $\kappa$ is the security parameter. Like Shamir's scheme, in this modified scheme any set of more than $\delta n$ honest parties can efficiently recover the secret. Using algebraic geometry codes instead of Reed-Solomon codes, the share length can be decreased to a constant (only depending on $\delta$) while the number of shares $n$ can grow independently. In this case, when $n$ is large enough, the scheme satisfies the ``threshold'' requirement in an approximate sense; i.e., any set of $\delta n(1+\rho)$ honest parties, for arbitrarily small $\rho > 0$, can efficiently reconstruct the secret.}, note = {Preliminary version in Proceedings of {ISIT 2016.}}, url_Paper = {https://eprint.iacr.org/2015/951} }
@INPROCEEDINGS{ref:conf:CR18, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {Sharp Analytical Capacity Upper Bounds for Sticky and Related Channels}, year = 2018, booktitle = {Proceedings of the 56th {Annual Allerton Conference on Communication, Control, and Computing}}, pages = {1104--1111}, note = {Extended version in {IEEE Transactions on Information Theory}.}, doi = {10.1109/ALLERTON.2018.8636009}, url_Link = {https://ieeexplore.ieee.org/document/8636009}, url_Paper = {https://arxiv.org/abs/1903.09992}, abstract = {We study natural examples of binary channels with synchronization errors. These include the duplication channel, which independently outputs a given bit once or twice, and geometric channels that repeat a given bit according to a geometric rule, with or without the possibility of bit deletion. We apply the general framework of Cheraghchi (STOC 2018) to obtain sharp analytical upper bounds on the capacity of these channels. Previously, upper bounds were known via numerical computations involving the computation of finite approximations of the channels by a computer and then using the obtained numerical results to upper bound the actual capacity. While leading to sharp numerical results, further progress on the full understanding of the channel capacity inherently remains elusive using such methods. Our results can be regarded as a major step towards a complete understanding of the capacity curves. Quantitatively, our upper bounds sharply approach, and in some cases surpass, the bounds that were previously only known by purely numerical methods. Among our results, we notably give a completely analytical proof that, when the number of repetitions per bit is geometric (supported on $\{0,1,2,\dots\}$) with mean growing to infinity, the channel capacity remains substantially bounded away from $1$. } }
@INPROCEEDINGS{ref:conf:Che18:del, author = {Mahdi Cheraghchi}, title = {Capacity Upper Bounds for Deletion-type Channels}, year = 2018, booktitle = {Proceedings of the 50th {ACM Symposium on Theory of Computing (STOC)}}, pages = {493--506}, note = {Extended version in {Journal of the ACM}.}, doi = {10.1145/3188745.3188768}, url_Link = {https://dl.acm.org/doi/10.1145/3188745.3188768}, url_Paper = {https://arxiv.org/abs/1711.01630}, abstract = {We develop a systematic approach, based on convex programming and real analysis, for obtaining upper bounds on the capacity of the binary deletion channel and, more generally, channels with i.i.d. insertions and deletions. Other than the classical deletion channel, we give a special attention to the Poisson-repeat channel introduced by Mitzenmacher and Drinea (IEEE Transactions on Information Theory, 2006). Our framework can be applied to obtain capacity upper bounds for any repetition distribution (the deletion and Poisson-repeat channels corresponding to the special cases of Bernoulli and Poisson distributions). Our techniques essentially reduce the task of proving capacity upper bounds to maximizing a univariate, real-valued, and often concave function over a bounded interval. The corresponding univariate function is carefully designed according to the underlying distribution of repetitions and the choices vary depending on the desired strength of the upper bounds as well as the desired simplicity of the function (e.g., being only efficiently computable versus having an explicit closed-form expression in terms of elementary, or common special, functions). Among our results, we show the following: 1) The capacity of the binary deletion channel with deletion probability $d$ is at most $(1-d) \log \phi$ for $d \geq 1/2$, and, assuming that the capacity function is convex, is at most $1-d \log(4/\phi)$ for $d<1/2$, where $\phi=(1+\sqrt{5})/2$ is the golden ratio. This is the first nontrivial capacity upper bound for any value of $d$ outside the limiting case $d \to 0$ that is fully explicit and proved without computer assistance. 2) We derive the first set of capacity upper bounds for the Poisson-repeat channel. Our results uncover further striking connections between this channel and the deletion channel, and suggest, somewhat counter-intuitively, that the Poisson-repeat channel is actually analytically simpler than the deletion channel and may be of key importance to a complete understanding of the deletion channel. 3) We derive several novel upper bounds on the capacity of the deletion channel. All upper bounds are maximums of efficiently computable, and concave, univariate real functions over a bounded domain. In turn, we upper bound these functions in terms of explicit elementary and standard special functions, whose maximums can be found even more efficiently (and sometimes, analytically, for example for $d=1/2$). }, keywords = {Coding theory, Synchronization error-correcting codes, Channel coding} }
@INPROCEEDINGS{ref:conf:CR18:poi, author = {Mahdi Cheraghchi and Jo\~{a}o Ribeiro}, title = {Improved Upper Bounds on the Capacity of the Discrete-Time {Poisson} Channel}, year = 2018, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, pages = {1769--1773}, note = {Extended version in {IEEE Transactions on Information Theory}.}, doi = {10.1109/ISIT.2018.8437514}, url_Link = {https://ieeexplore.ieee.org/document/8437514}, url_Paper = {https://arxiv.org/abs/1801.02745}, abstract = {New capacity upper bounds are presented for the discrete-time Poisson channel with no dark current and an average-power constraint. These bounds are a consequence of techniques developed for the seemingly unrelated problem of upper bounding the capacity of binary deletion and repetition channels. Previously, the best known capacity upper bound in the regime where the average-power constraint does not approach zero was due to Martinez (JOSA B, 2007), which is re-derived as a special case of the framework developed in this paper. Furthermore, this framework is carefully instantiated in order to obtain a closed-form bound that improves the result of Martinez everywhere. Finally, capacity-achieving distributions for the discrete-time Poisson channel are studied under an average-power constraint and/or a peak-power constraint and arbitrary dark current. In particular, it is shown that the support of the capacity-achieving distribution under an average-power constraint must only be countably infinite. This settles a conjecture of Shamai (IEE Proceedings I, 1990) in the affirmative. Previously, it was only known that the support must be an unbounded set. } }
@INPROCEEDINGS{ref:conf:Che18:entropy, author = {Mahdi Cheraghchi}, title = {Expressions for the Entropy of Binomial-Type Distributions}, year = 2018, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, pages = {2520--2524}, note = {Extended version in {IEEE Transactions on Information Theory}.}, doi = {10.1109/ISIT.2018.8437888}, url_Link = {https://ieeexplore.ieee.org/document/8437888}, url_Paper = {https://arxiv.org/abs/1708.06394}, abstract = {We develop a general method for computing logarithmic and log-gamma expectations of distributions. As a result, we derive series expansions and integral representations of the entropy for several fundamental distributions, including the Poisson, binomial, beta-binomial, negative binomial, and hypergeometric distributions. Our results also establish connections between the entropy functions and to the Riemann zeta function and its generalizations. } }
@INPROCEEDINGS{ref:conf:BKCE18, author = {Thach V. Bui and Minoru Kuribayashi and Mahdi Cheraghchi and Isao Echizen}, title = {Efficiently Decodable Non-Adaptive Threshold Group Testing}, year = 2018, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, note = {Extended version in {IEEE Transactions on Information Theory}.}, pages = {2584--2588}, doi = {10.1109/ISIT.2018.8437847}, url_Link = {https://ieeexplore.ieee.org/document/8437847}, url_Paper = {https://arxiv.org/abs/1712.07509}, abstract = {We consider non-adaptive threshold group testing for identification of up to $d$ defective items in a set of $n$ items, where a test is positive if it contains at least $2 \leq u \leq d$ defective items, and negative otherwise. The defective items can be identified using $t = O ( ( \frac{d}{u} )^u ( \frac{d}{d - u} )^{d-u} (u \log{\frac{d}{u}} + \log{\frac{1}{\epsilon}} ) \cdot d^2 \log{n} )$ tests with probability at least $1 - \epsilon$ for any $\epsilon > 0$ or $t = O ( ( \frac{d}{u} )^u ( \frac{d}{d -u} )^{d - u} d^3 \log{n} \cdot \log{\frac{n}{d}} )$ tests with probability 1. The decoding time is $t \times \mathrm{poly}(d^2 \log{n})$. This result significantly improves the best known results for decoding non-adaptive threshold group testing: $O(n\log{n} + n \log{\frac{1}{\epsilon}})$ for probabilistic decoding, where $\epsilon > 0$, and $O(n^u \log{n})$ for deterministic decoding. } }
@ARTICLE{ref:CCGG18, author = {Chandrasekaran, Karthekeyan and Cheraghchi, Mahdi and Gandikota, Venkata and Grigorescu, Elena}, title = {Local Testing of Lattices}, journal = {SIAM Journal on Discrete Mathematics}, volume = 32, number = 2, pages = {1265--1295}, year = 2018, doi = {10.1137/17M1110353}, note = {Preliminary version in Proceedings of {FSTTCS 2016}.}, url_Link = {https://epubs.siam.org/doi/10.1137/17M1110353}, url_Paper = {https://arxiv.org/abs/1608.00180}, abstract = {Testing membership in lattices is of practical relevance, with applications to integer programming, error detection in lattice-based communication, and cryptography. In this work, we initiate a systematic study of local testing for membership in lattices, complementing and building upon the extensive body of work on locally testable codes. In particular, we formally define the notion of local tests for lattices and present the following: 1. We show that in order to achieve low query complexity, it is sufficient to design $1$-sided nonadaptive canonical tests. This result is akin to, and based on, an analogous result for error-correcting codes due to [E. Ben-Sasson, P. Harsha, and S. Raskhodnikova, SIAM J. Comput., 35 (2005), pp. 1--21]. 2. We demonstrate upper and lower bounds on the query complexity of local testing for membership in code formula lattices. We instantiate our results for code formula lattices constructed from Reed--Muller codes to obtain nearly matching upper and lower bounds on the query complexity of testing such lattices. 3. We contrast lattice testing to code testing by showing lower bounds on the query complexity of testing low-dimensional lattices. This illustrates large lower bounds on the query complexity of testing membership in the well-known knapsack lattices. On the other hand, we show that knapsack lattices with bounded coefficients have low-query testers if the inputs are promised to lie in the span of the lattice. } }
@ARTICLE{ref:CGJWX18, author = {Mahdi Cheraghchi and Elena Grigorescu and Brendan Juba and Karl Wimmer and Ning Xie}, title = {{AC0-MOD2} lower bounds for the {Boolean} Inner Product}, journal = {Journal of Computer and System Sciences}, volume = 97, pages = "45--59", year = 2018, doi = "10.1016/j.jcss.2018.04.006", url_Link = "http://www.sciencedirect.com/science/article/pii/S002200001830518X", keywords = "Boolean analysis, Circuit complexity, Lower bounds", abstract = "AC0-MOD2 circuits are AC0 circuits augmented with a layer of parity gates just above the input layer. We study AC0-MOD2 circuit lower bounds for computing the Boolean Inner Product functions. Recent works by Servedio and Viola (ECCC TR12-144) and Akavia et al. (ITCS 2014) have highlighted this problem as a frontier problem in circuit complexity that arose both as a first step towards solving natural special cases of the matrix rigidity problem and as a candidate for constructing pseudorandom generators of minimal complexity. We give the first superlinear lower bound for the Boolean Inner Product function against AC0-MOD2 of depth four or greater. Specifically, we prove a superlinear lower bound for circuits of arbitrary constant depth, and an $\tilde{\Omega(n^2)}$ lower bound for the special case of depth-4 AC0-MOD2.", note = {Preliminary version in Proceedings of {ICALP 2016.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2015/030/} }
@ARTICLE{ref:CI17, author = {Mahdi Cheraghchi and Piotr Indyk}, title = {Nearly Optimal Deterministic Algorithm for Sparse Walsh-Hadamard Transform}, year = 2017, volume = 13, number = 3, doi = {10.1145/3029050}, journal = {ACM Transactions on Algorithms}, month = mar, articleno = 34, numpages = 36, keywords = {Sparse recovery, sublinear time algorithms, pseudorandomness, explicit constructions, sketching, sparse Fourier transform}, url_Link = {https://dl.acm.org/doi/10.1145/3029050}, abstract = {For every fixed constant $\alpha > 0$, we design an algorithm for computing the $k$-sparse Walsh-Hadamard transform (i.e., Discrete Fourier Transform over the Boolean cube) of an $N$-dimensional vector $x \in \mathbb{R}^N$ in time $k^{1+\alpha} (\log N)^{O(1)}$. Specifically, the algorithm is given query access to $x$ and computes a $k$-sparse $\tilde{x} \in \mathbb{R}^N$ satisfying $\|\tilde{x} - \hat{x}\|_1 \leq c \|\hat{x} - H_k(\hat{x})\|_1$, for an absolute constant $c > 0$, where $\hat{x}$ is the transform of $x$ and $H_k(\hat{x})$ is its best $k$-sparse approximation. Our algorithm is fully deterministic and only uses non-adaptive queries to $x$ (i.e., all queries are determined and performed in parallel when the algorithm starts). An important technical tool that we use is a construction of nearly optimal and linear lossless condensers which is a careful instantiation of the GUV condenser (Guruswami, Umans, Vadhan, JACM 2009). Moreover, we design a deterministic and non-adaptive $\ell_1/\ell_1$ compressed sensing scheme based on general lossless condensers that is equipped with a fast reconstruction algorithm running in time $k^{1+\alpha} (\log N)^{O(1)}$ (for the GUV-based condenser) and is of independent interest. Our scheme significantly simplifies and improves an earlier expander-based construction due to Berinde, Gilbert, Indyk, Karloff, Strauss (Allerton 2008). Our methods use linear lossless condensers in a black box fashion; therefore, any future improvement on explicit constructions of such condensers would immediately translate to improved parameters in our framework (potentially leading to $k (\log N)^{O(1)}$ reconstruction time with a reduced exponent in the poly-logarithmic factor, and eliminating the extra parameter $\alpha$). By allowing the algorithm to use randomness, while still using non-adaptive queries, the running time of the algorithm can be improved to $\tilde{O}(k \log^3 N)$.}, note = {Preliminary version in Proceedings of {SODA 2016.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2015/076/} }
@ARTICLE{ref:CG17, author = {Mahdi Cheraghchi and Venkatesan Guruswami}, title = {Non-malleable Coding Against Bit-Wise and Split-State Tampering}, year = 2017, volume = 30, pages = {191--241}, doi = {10.1007/s00145-015-9219-z}, journal = {Journal of Cryptology}, keywords = {Information theory, Tamper-resilient cryptography, Coding theory, Error detection, Randomness extractors}, url_Link = {https://link.springer.com/article/10.1007/s00145-015-9219-z}, abstract = {Non-malleable coding, introduced by Dziembowski, Pietrzak and Wichs (ICS 2010), aims for protecting the integrity of information against tampering attacks in situations where error-detection is impossible. Intuitively, information encoded by a non-malleable code either decodes to the original message or, in presence of any tampering, to an unrelated message. Non-malleable coding is possible against any class of adversaries of bounded size. In particular, Dziembowski et al. show that such codes exist and may achieve positive rates for any class of tampering functions of size at most $2^{2^{\alpha n}}$, for any constant $\alpha \in [0, 1)$. However, this result is existential and has thus attracted a great deal of subsequent research on explicit constructions of non-malleable codes against natural classes of adversaries. In this work, we consider constructions of coding schemes against two well-studied classes of tampering functions; namely, bit-wise tampering functions (where the adversary tampers each bit of the encoding independently) and the much more general class of split-state adversaries (where two independent adversaries arbitrarily tamper each half of the encoded sequence). We obtain the following results for these models. 1) For bit-tampering adversaries, we obtain explicit and efficiently encodable and decodable non-malleable codes of length $n$ achieving rate $1-o(1)$ and error (also known as "exact security") $\exp(-\tilde{\Omega}(n^{1/7}))$. Alternatively, it is possible to improve the error to $\exp(-\tilde{\Omega}(n))$ at the cost of making the construction Monte Carlo with success probability $1-\exp(-\Omega(n))$ (while still allowing a compact description of the code). Previously, the best known construction of bit-tampering coding schemes was due to Dziembowski et al. (ICS 2010), which is a Monte Carlo construction achieving rate close to $.1887$. 2) We initiate the study of \textit{seedless non-malleable extractors} as a natural variation of the notion of non-malleable extractors introduced by Dodis and Wichs (STOC 2009). We show that construction of non-malleable codes for the split-state model reduces to construction of non-malleable two-source extractors. We prove a general result on existence of seedless non-malleable extractors, which implies that codes obtained from our reduction can achieve rates arbitrarily close to $1/5$ and exponentially small error. In a separate recent work, the authors show that the optimal rate in this model is $1/2$. Currently, the best known explicit construction of split-state coding schemes is due to Aggarwal, Dodis and Lovett (ECCC TR13-081) which only achieves vanishing (polynomially small) rate. }, note = {Preliminary version in Proceedings of {TCC 2014.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2013/121} }
@INPROCEEDINGS{ref:conf:CCGG16, author = {Chandrasekaran, Karthekeyan and Cheraghchi, Mahdi and Gandikota, Venkata and Grigorescu, Elena}, title = {Local Testing for Membership in Lattices}, booktitle = {36th {IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS)}}, pages = {46:1--46:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, year = 2016, volume = 65, doi = {10.4230/LIPIcs.FSTTCS.2016.46}, note = {Extended version in {SIAM Journal on Discrete Mathematics}.}, url_Link = {http://drops.dagstuhl.de/opus/volltexte/2016/6881}, url_Paper = {https://arxiv.org/abs/1608.00180}, abstract = {Testing membership in lattices is of practical relevance, with applications to integer programming, error detection in lattice-based communication, and cryptography. In this work, we initiate a systematic study of local testing for membership in lattices, complementing and building upon the extensive body of work on locally testable codes. In particular, we formally define the notion of local tests for lattices and present the following: 1. We show that in order to achieve low query complexity, it is sufficient to design $1$-sided nonadaptive canonical tests. This result is akin to, and based on, an analogous result for error-correcting codes due to [E. Ben-Sasson, P. Harsha, and S. Raskhodnikova, SIAM J. Comput., 35 (2005), pp. 1--21]. 2. We demonstrate upper and lower bounds on the query complexity of local testing for membership in code formula lattices. We instantiate our results for code formula lattices constructed from Reed--Muller codes to obtain nearly matching upper and lower bounds on the query complexity of testing such lattices. 3. We contrast lattice testing to code testing by showing lower bounds on the query complexity of testing low-dimensional lattices. This illustrates large lower bounds on the query complexity of testing membership in the well-known knapsack lattices. On the other hand, we show that knapsack lattices with bounded coefficients have low-query testers if the inputs are promised to lie in the span of the lattice. }, keywords = {Lattices, Property Testing, Locally Testable Codes, Complexity Theory} }
@INPROCEEDINGS{ref:conf:CGJWX16, author = {Mahdi Cheraghchi and Elena Grigorescu and Brendan Juba and Karl Wimmer and Ning Xie}, title = {{AC0-MOD2} lower bounds for the {Boolean} Inner Product}, booktitle = {Proceedings of the 43rd {International Colloquium on Automata, Languages, and Programming (ICALP 2016)}}, pages = {35:1--35:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, year = 2016, volume = 55, url_Link = {http://drops.dagstuhl.de/opus/volltexte/2016/6315}, doi = {10.4230/LIPIcs.ICALP.2016.35}, keywords = {Boolean analysis, circuit complexity, lower bounds}, abstract = "AC0-MOD2 circuits are AC0 circuits augmented with a layer of parity gates just above the input layer. We study AC0-MOD2 circuit lower bounds for computing the Boolean Inner Product functions. Recent works by Servedio and Viola (ECCC TR12-144) and Akavia et al. (ITCS 2014) have highlighted this problem as a frontier problem in circuit complexity that arose both as a first step towards solving natural special cases of the matrix rigidity problem and as a candidate for constructing pseudorandom generators of minimal complexity. We give the first superlinear lower bound for the Boolean Inner Product function against AC0-MOD2 of depth four or greater. Specifically, we prove a superlinear lower bound for circuits of arbitrary constant depth, and an $\tilde{\Omega(n^2)}$ lower bound for the special case of depth-4 AC0-MOD2.", note = {Extended version in {Journal of Computer and System Sciences}}, url_Paper = {https://eccc.weizmann.ac.il//report/2015/030/} }
@INPROCEEDINGS{ref:conf:Che16:SS, author = {Mahdi Cheraghchi}, title = {Nearly optimal robust secret sharing}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, year = 2016, pages = {2509--2513}, doi = {10.1109/ISIT.2016.7541751}, url_Link = {https://ieeexplore.ieee.org/document/7541751}, keywords = {Coding and information theory, Cryptography, Algebraic coding theory}, abstract = {We prove that a known approach to improve Shamir's celebrated secret sharing scheme; i.e., adding an information-theoretic authentication tag to the secret, can make it robust for $n$ parties against any collusion of size $\delta n$, for any constant $\delta \in (0, 1/2)$. This result holds in the so-called ``non-rushing'' model in which the $n$ shares are submitted simultaneously for reconstruction. We thus obtain a fully explicit and robust secret sharing scheme in this model that is essentially optimal in all parameters including the share size which is $k(1+o(1)) + O(\kappa)$, where $k$ is the secret length and $\kappa$ is the security parameter. Like Shamir's scheme, in this modified scheme any set of more than $\delta n$ honest parties can efficiently recover the secret. Using algebraic geometry codes instead of Reed-Solomon codes, the share length can be decreased to a constant (only depending on $\delta$) while the number of shares $n$ can grow independently. In this case, when $n$ is large enough, the scheme satisfies the ``threshold'' requirement in an approximate sense; i.e., any set of $\delta n(1+\rho)$ honest parties, for arbitrarily small $\rho > 0$, can efficiently reconstruct the secret.}, note = {Extended version in {Designs, Codes and Cryptography.}}, url_Paper = {https://eprint.iacr.org/2015/951} }
@INPROCEEDINGS{ref:conf:CI16, author = {Mahdi Cheraghchi and Piotr Indyk}, title = {Nearly Optimal Deterministic Algorithm for Sparse Walsh-Hadamard Transform}, year = 2016, booktitle = {Proceedings of the 27th {Annual ACM-SIAM Symposium on Discrete Algorithms (SODA)}}, pages = {298--317}, numpages = 20, keywords = {Sparse recovery, sublinear time algorithms, pseudorandomness, explicit constructions, sketching, sparse Fourier transform}, url_Link = {https://dl.acm.org/doi/10.5555/2884435.2884458}, abstract = {For every fixed constant $\alpha > 0$, we design an algorithm for computing the $k$-sparse Walsh-Hadamard transform (i.e., Discrete Fourier Transform over the Boolean cube) of an $N$-dimensional vector $x \in \mathbb{R}^N$ in time $k^{1+\alpha} (\log N)^{O(1)}$. Specifically, the algorithm is given query access to $x$ and computes a $k$-sparse $\tilde{x} \in \mathbb{R}^N$ satisfying $\|\tilde{x} - \hat{x}\|_1 \leq c \|\hat{x} - H_k(\hat{x})\|_1$, for an absolute constant $c > 0$, where $\hat{x}$ is the transform of $x$ and $H_k(\hat{x})$ is its best $k$-sparse approximation. Our algorithm is fully deterministic and only uses non-adaptive queries to $x$ (i.e., all queries are determined and performed in parallel when the algorithm starts). An important technical tool that we use is a construction of nearly optimal and linear lossless condensers which is a careful instantiation of the GUV condenser (Guruswami, Umans, Vadhan, JACM 2009). Moreover, we design a deterministic and non-adaptive $\ell_1/\ell_1$ compressed sensing scheme based on general lossless condensers that is equipped with a fast reconstruction algorithm running in time $k^{1+\alpha} (\log N)^{O(1)}$ (for the GUV-based condenser) and is of independent interest. Our scheme significantly simplifies and improves an earlier expander-based construction due to Berinde, Gilbert, Indyk, Karloff, Strauss (Allerton 2008). Our methods use linear lossless condensers in a black box fashion; therefore, any future improvement on explicit constructions of such condensers would immediately translate to improved parameters in our framework (potentially leading to $k (\log N)^{O(1)}$ reconstruction time with a reduced exponent in the poly-logarithmic factor, and eliminating the extra parameter $\alpha$). By allowing the algorithm to use randomness, while still using non-adaptive queries, the running time of the algorithm can be improved to $\tilde{O}(k \log^3 N)$.}, note = {Preliminary version in Proceedings of {SODA 2016.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2015/076/} }
@ARTICLE{ref:CG16, author = {Mahdi Cheraghchi and Venkatesan Guruswami}, title = {Capacity of Non-Malleable Codes}, journal = {IEEE Transactions on Information Theory}, year = 2016, volume = 62, number = 3, pages = {1097--1118}, doi = {10.1109/TIT.2015.2511784}, url_Link = {https://ieeexplore.ieee.org/document/7365445}, keywords = {cryptography, coding theory, tamper-resilient storage, probabilistic method, information theory, error detection}, abstract = {Non-malleable codes, introduced by Dziembowski, Pietrzak and Wichs (ICS 2010), encode messages $s$ in a manner so that tampering the codeword causes the decoder to either output $s$ or a message that is independent of $s$. While this is an impossible goal to achieve against unrestricted tampering functions, rather surprisingly non-malleable coding becomes possible against every fixed family $\mathcal{F}$ of tampering functions that is not too large (for instance, when $|\mathcal{F}| \le 2^{2^{\alpha n}}$ for some $\alpha <1$ where $n$ is the number of bits in a codeword). In this work, we study the "capacity of non-malleable codes," and establish optimal bounds on the achievable rate as a function of the family size, answering an open problem from Dziembowski et al. (ICS 2010). Specifically, 1) We prove that for every family $\mathcal{F}$ with $|\mathcal{F}| \le 2^{2^{\alpha n}}$, there exist non-malleable codes against $\mathcal{F}$ with rate arbitrarily close to $1-\alpha$ (this is achieved w.h.p. by a randomized construction). 2) We show the existence of families of size $\exp(n^{O(1)} 2^{\alpha n})$ against which there is no non-malleable code of rate $1-\alpha$ (in fact this is the case w.h.p for a random family of this size). 3) We also show that $1-\alpha$ is the best achievable rate for the family of functions which are only allowed to tamper the first $\alpha n$ bits of the codeword, which is of special interest. As a corollary, this implies that the capacity of non-malleable coding in the split-state model (where the tampering function acts independently but arbitrarily on the two halves of the codeword, a model which has received some attention recently) equals $1/2$. We also give an efficient Monte Carlo construction of codes of rate close to $1$ with polynomial time encoding and decoding that is non-malleable against any fixed $c > 0$ and family $\mathcal{F}$ of size $2^{n^c}$, in particular tampering functions with say cubic size circuits. }, note = {Preliminary version in Proceedings of {ITCS 2014.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2013/118} }
@INPROCEEDINGS{ref:conf:CG14b, author = {Mahdi Cheraghchi and Venkatesan Guruswami}, title = {Non-malleable Coding Against Bit-Wise and Split-State Tampering}, year = 2014, booktitle = "Proceedings of Theory of Cryptography Conference {(TCC)}", pages = "440--464", doi = {10.1007/978-3-642-54242-8_19}, keywords = {Information theory, Tamper-resilient cryptography, Coding theory, Error detection, Randomness extractors}, url_Link = {https://link.springer.com/chapter/10.1007/978-3-642-54242-8_19}, abstract = {Non-malleable coding, introduced by Dziembowski, Pietrzak and Wichs (ICS 2010), aims for protecting the integrity of information against tampering attacks in situations where error-detection is impossible. Intuitively, information encoded by a non-malleable code either decodes to the original message or, in presence of any tampering, to an unrelated message. Non-malleable coding is possible against any class of adversaries of bounded size. In particular, Dziembowski et al. show that such codes exist and may achieve positive rates for any class of tampering functions of size at most $2^{2^{\alpha n}}$, for any constant $\alpha \in [0, 1)$. However, this result is existential and has thus attracted a great deal of subsequent research on explicit constructions of non-malleable codes against natural classes of adversaries. In this work, we consider constructions of coding schemes against two well-studied classes of tampering functions; namely, bit-wise tampering functions (where the adversary tampers each bit of the encoding independently) and the much more general class of split-state adversaries (where two independent adversaries arbitrarily tamper each half of the encoded sequence). We obtain the following results for these models. 1) For bit-tampering adversaries, we obtain explicit and efficiently encodable and decodable non-malleable codes of length $n$ achieving rate $1-o(1)$ and error (also known as "exact security") $\exp(-\tilde{\Omega}(n^{1/7}))$. Alternatively, it is possible to improve the error to $\exp(-\tilde{\Omega}(n))$ at the cost of making the construction Monte Carlo with success probability $1-\exp(-\Omega(n))$ (while still allowing a compact description of the code). Previously, the best known construction of bit-tampering coding schemes was due to Dziembowski et al. (ICS 2010), which is a Monte Carlo construction achieving rate close to $.1887$. 2) We initiate the study of \textit{seedless non-malleable extractors} as a natural variation of the notion of non-malleable extractors introduced by Dodis and Wichs (STOC 2009). We show that construction of non-malleable codes for the split-state model reduces to construction of non-malleable two-source extractors. We prove a general result on existence of seedless non-malleable extractors, which implies that codes obtained from our reduction can achieve rates arbitrarily close to $1/5$ and exponentially small error. In a separate recent work, the authors show that the optimal rate in this model is $1/2$. Currently, the best known explicit construction of split-state coding schemes is due to Aggarwal, Dodis and Lovett (ECCC TR13-081) which only achieves vanishing (polynomially small) rate. }, note = {Extended version in {Journal of Cryptology.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2013/121} }
@INPROCEEDINGS{ref:conf:CG14a, author = {Mahdi Cheraghchi and Venkatesan Guruswami}, title = {Capacity of Non-Malleable Codes}, year = 2014, url_Link = {https://doi.org/10.1145/2554797.2554814}, doi = {10.1145/2554797.2554814}, booktitle = {Proceedings of the 5th {Conference on Innovations in Theoretical Computer Science (ITCS)}}, pages = {155--168}, numpages = 14, keywords = {cryptography, coding theory, tamper-resilient storage, probabilistic method, information theory, error detection}, abstract = {Non-malleable codes, introduced by Dziembowski, Pietrzak and Wichs (ICS 2010), encode messages $s$ in a manner so that tampering the codeword causes the decoder to either output $s$ or a message that is independent of $s$. While this is an impossible goal to achieve against unrestricted tampering functions, rather surprisingly non-malleable coding becomes possible against every fixed family $\mathcal{F}$ of tampering functions that is not too large (for instance, when $|\mathcal{F}| \le 2^{2^{\alpha n}}$ for some $\alpha <1$ where $n$ is the number of bits in a codeword). In this work, we study the "capacity of non-malleable codes," and establish optimal bounds on the achievable rate as a function of the family size, answering an open problem from Dziembowski et al. (ICS 2010). Specifically, 1) We prove that for every family $\mathcal{F}$ with $|\mathcal{F}| \le 2^{2^{\alpha n}}$, there exist non-malleable codes against $\mathcal{F}$ with rate arbitrarily close to $1-\alpha$ (this is achieved w.h.p. by a randomized construction). 2) We show the existence of families of size $\exp(n^{O(1)} 2^{\alpha n})$ against which there is no non-malleable code of rate $1-\alpha$ (in fact this is the case w.h.p for a random family of this size). 3) We also show that $1-\alpha$ is the best achievable rate for the family of functions which are only allowed to tamper the first $\alpha n$ bits of the codeword, which is of special interest. As a corollary, this implies that the capacity of non-malleable coding in the split-state model (where the tampering function acts independently but arbitrarily on the two halves of the codeword, a model which has received some attention recently) equals $1/2$. We also give an efficient Monte Carlo construction of codes of rate close to $1$ with polynomial time encoding and decoding that is non-malleable against any fixed $c > 0$ and family $\mathcal{F}$ of size $2^{n^c}$, in particular tampering functions with say cubic size circuits. }, note = {Extended version in IEEE Transactions on Information Theory.}, url_Paper = {https://eccc.weizmann.ac.il//report/2013/118} }
@ARTICLE{ref:CGV13, author = {Cheraghchi, Mahdi and Guruswami, Venkatesan and Velingker, Ameya}, title = {Restricted Isometry of {Fourier} Matrices and List Decodability of Random Linear Codes}, journal = {SIAM Journal on Computing}, volume = 42, number = 5, pages = {1888--1914}, year = 2013, doi = {10.1137/120896773}, keywords = {combinatorial list decoding, compressed sensing, {Gaussian} processes}, url_Link = {https://epubs.siam.org/doi/abs/10.1137/120896773}, abstract = {We prove that a random linear code over $\mathbb{F}_q$, with probability arbitrarily close to $1$, is list decodable at radius $1-1/q-\epsilon$ with list size $L=O(1/\epsilon^2)$ and rate $R=\Omega_q(\epsilon^2/(\log^3(1/\epsilon)))$. Up to the polylogarithmic factor in $1/\epsilon$ and constant factors depending on $q$, this matches the lower bound $L=\Omega_q(1/\epsilon^2)$ for the list size and upper bound $R=O_q(\epsilon^2)$ for the rate. Previously only existence (and not abundance) of such codes was known for the special case $q=2$ (Guruswami, H{\aa}stad, Sudan and Zuckerman, 2002). In order to obtain our result, we employ a relaxed version of the well known Johnson bound on list decoding that translates the \textit{average} Hamming distance between codewords to list decoding guarantees. We furthermore prove that the desired average-distance guarantees hold for a code provided that a natural complex matrix encoding the codewords satisfies the Restricted Isometry Property with respect to the Euclidean norm (RIP-2). For the case of random binary linear codes, this matrix coincides with a random submatrix of the Hadamard-Walsh transform matrix that is well studied in the compressed sensing literature. Finally, we improve the analysis of Rudelson and Vershynin (2008) on the number of random frequency samples required for exact reconstruction of $k$-sparse signals of length $N$. Specifically, we improve the number of samples from $O(k \log(N) \log^2(k) (\log k + \log\log N))$ to $O(k \log(N) \cdot \log^3(k))$. The proof involves bounding the expected supremum of a related Gaussian process by using an improved analysis of the metric defined by the process. This improvement is crucial for our application in list decoding.}, note = {Preliminary version in Proceedings of {SODA 2013.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2012/082} }
@INPROCEEDINGS{ref:conf:CGV13, author = {Cheraghchi, Mahdi and Guruswami, Venkatesan and Velingker, Ameya}, title = {Restricted Isometry of {Fourier} Matrices and List Decodability of Random Linear Codes}, year = 2013, booktitle = {Proceedings of the 24th {Annual ACM-SIAM Symposium on Discrete Algorithms (SODA)}}, pages = {432--442}, numpages = 11, keywords = {combinatorial list decoding, compressed sensing, {Gaussian} processes}, url_Link = {https://dl.acm.org/doi/10.5555/2627817.2627848}, abstract = {We prove that a random linear code over $\mathbb{F}_q$, with probability arbitrarily close to $1$, is list decodable at radius $1-1/q-\epsilon$ with list size $L=O(1/\epsilon^2)$ and rate $R=\Omega_q(\epsilon^2/(\log^3(1/\epsilon)))$. Up to the polylogarithmic factor in $1/\epsilon$ and constant factors depending on $q$, this matches the lower bound $L=\Omega_q(1/\epsilon^2)$ for the list size and upper bound $R=O_q(\epsilon^2)$ for the rate. Previously only existence (and not abundance) of such codes was known for the special case $q=2$ (Guruswami, H{\aa}stad, Sudan and Zuckerman, 2002). In order to obtain our result, we employ a relaxed version of the well known Johnson bound on list decoding that translates the \textit{average} Hamming distance between codewords to list decoding guarantees. We furthermore prove that the desired average-distance guarantees hold for a code provided that a natural complex matrix encoding the codewords satisfies the Restricted Isometry Property with respect to the Euclidean norm (RIP-2). For the case of random binary linear codes, this matrix coincides with a random submatrix of the Hadamard-Walsh transform matrix that is well studied in the compressed sensing literature. Finally, we improve the analysis of Rudelson and Vershynin (2008) on the number of random frequency samples required for exact reconstruction of $k$-sparse signals of length $N$. Specifically, we improve the number of samples from $O(k \log(N) \log^2(k) (\log k + \log\log N))$ to $O(k \log(N) \cdot \log^3(k))$. The proof involves bounding the expected supremum of a related Gaussian process by using an improved analysis of the metric defined by the process. This improvement is crucial for our application in list decoding.}, note = {Extended version in {SIAM Journal on Computing (SICOMP).}}, url_Paper = {https://eccc.weizmann.ac.il//report/2012/082} }
@ARTICLE{ref:Che13, author = {Cheraghchi, Mahdi}, title = {Improved Constructions for Non-adaptive Threshold Group Testing}, year = 2013, volume = 67, url_Link = {https://link.springer.com/article/10.1007/s00453-013-9754-7}, url_Slides = {http://www.ima.umn.edu/videos/?id=1803}, doi = {10.1007/s00453-013-9754-7}, journal = {Algorithmica}, pages = {384--417}, keywords = {Threshold group testing, Lossless expanders, Combinatorial designs, Explicit constructions}, note = {Preliminary version in {Proceedings of ICALP 2010.}}, url_Paper = {https://arxiv.org/abs/1002.2244}, abstract = {The basic goal in combinatorial group testing is to identify a set of up to $d$ defective items within a large population of size $n \gg d$ using a pooling strategy. Namely, the items can be grouped together in pools, and a single measurement would reveal whether there are one or more defectives in the pool. The threshold model is a generalization of this idea where a measurement returns positive if the number of defectives in the pool reaches a fixed threshold $u > 0$, negative if this number is no more than a fixed lower threshold $\ell < u$, and may behave arbitrarily otherwise. We study non-adaptive threshold group testing (in a possibly noisy setting) and show that, for this problem, $O(d^{g+2} (\log d) \log(n/d))$ measurements (where $g := u-\ell-1$ and $u$ is any fixed constant) suffice to identify the defectives, and also present almost matching lower bounds. This significantly improves the previously known (non-constructive) upper bound $O(d^{u+1} \log(n/d))$. Moreover, we obtain a framework for explicit construction of measurement schemes using lossless condensers. The number of measurements resulting from this scheme is ideally bounded by $O(d^{g+3} (\log d) \log n)$. Using state-of-the-art constructions of lossless condensers, however, we obtain explicit testing schemes with $O(d^{g+3} (\log d) \mathrm{quasipoly}(\log n))$ and $O(d^{g+3+\beta}\mathrm{poly}(\log n))$ measurements, for arbitrary constant $\beta > 0$.} }
@ARTICLE{ref:Che13:testing, author = {Mahdi Cheraghchi}, title = "Noise-resilient group testing: Limitations and constructions", journal = "Discrete Applied Mathematics", volume = 161, number = 1, pages = "81--95", year = 2013, doi = "10.1016/j.dam.2012.07.022", url_Link = "http://www.sciencedirect.com/science/article/pii/S0166218X12003009", keywords = "Group testing, Randomness condensers, Extractors, List decoding", note = {Preliminary version in Proceedings of the {FCT 2009}. arXiv manuscript published in 2008.}, url_Paper = {https://arxiv.org/abs/0811.2609}, abstract = {We study combinatorial group testing schemes for learning $d$-sparse boolean vectors using highly unreliable disjunctive measurements. We consider an adversarial noise model that only limits the number of false observations, and show that any noise-resilient scheme in this model can only approximately reconstruct the sparse vector. On the positive side, we take this barrier to our advantage and show that approximate reconstruction (within a satisfactory degree of approximation) allows us to break the information theoretic lower bound of $\tilde{\Omega}(d^2 \log n)$ that is known for exact reconstruction of $d$-sparse vectors of length $n$ via non-adaptive measurements, by a multiplicative factor $\tilde{\Omega}(d)$. Specifically, we give simple randomized constructions of non-adaptive measurement schemes, with $m=O(d \log n)$ measurements, that allow efficient reconstruction of $d$-sparse vectors up to $O(d)$ false positives even in the presence of $\delta m$ false positives and $O(m/d)$ false negatives within the measurement outcomes, for \textit{any} constant $\delta < 1$. We show that, information theoretically, none of these parameters can be substantially improved without dramatically affecting the others. Furthermore, we obtain several explicit constructions, in particular one matching the randomized trade-off but using $m = O(d^{1+o(1)} \log n)$ measurements. We also obtain explicit constructions that allow fast reconstruction in time $\mathrm{poly}(m)$, which would be sublinear in $n$ for sufficiently sparse vectors. The main tool used in our construction is the list-decoding view of randomness condensers and extractors. An immediate consequence of our result is an adaptive scheme that runs in only two non-adaptive \textit{rounds} and exactly reconstructs any $d$-sparse vector using a total $O(d \log n)$ measurements, a task that would be impossible in one round and fairly easy in $O(\log(n/d))$ rounds. } }
@UNPUBLISHED{ref:CGM12, author = {Mahdi Cheraghchi and Anna G\'al and Andrew Mills}, title = {Correctness and Corruption of Locally Decodable Codes}, year = 2012, url_Paper = {https://eccc.weizmann.ac.il//report/2012/172/}, abstract = {Locally decodable codes (LDCs) are error correcting codes with the extra property that it is sufficient to read just a small number of positions of a possibly corrupted codeword in order to recover any one position of the input. To achieve this, it is necessary to use randomness in the decoding procedures. We refer to the probability of returning the correct answer as the {\em correctness} of the decoding algorithm. Thus far, the study of LDCs has focused on the question of the tradeoff between their length and the query complexity of the decoders. Another natural question is what is the largest possible correctness, as a function of the amount of codeword corruption and the number of queries, regardless of the length of the codewords. Goldreich et al. (Computational Complexity 15(3), 2006) observed that for a given number of queries and fraction of errors, the correctness probability cannot be arbitrarily close to $1$. However, the quantitative dependence between the largest possible correctness and the amount of corruption $\delta$ has not been established before. We present several bounds on the largest possible correctness for LDCs, as a function of the amount of corruption tolerated and the number of queries used, regardless of the length of the code. Our bounds are close to tight. We also investigate the relationship between the amount of corruption tolerated by an LDC and its minimum distance as an error correcting code. Even though intuitively the two notions are expected to be related, we demonstrate that in general this is not the case. However, we show a close relationship between minimum distance and amount of corruption tolerated for linear codes over arbitrary finite fields, and for binary nonlinear codes. We use these results to strengthen the known bounds on the largest possible amount of corruption that can be tolerated by LDCs (with any nontrivial correctness better than random guessing) regardless of the query complexity or the length of the code. }, note = {Manuscript.} }
@INPROCEEDINGS{ref:CKKL12, author = {Cheraghchi, Mahdi and Klivans, Adam and Kothari, Pravesh and Lee, Homin K.}, title = {Submodular Functions Are Noise Stable}, year = 2012, booktitle = {Proceedings of the 23rd {Annual ACM-SIAM Symposium on Discrete Algorithms (SODA)}}, pages = {1586--1592}, numpages = 7, url_Link = {https://dl.acm.org/doi/10.5555/2095116.2095242}, url_Paper = {https://eccc.weizmann.ac.il//report/2011/090/}, abstract = {We show that all non-negative submodular functions have high \textit{noise-stability}. As a consequence, we obtain a polynomial-time learning algorithm for this class with respect to any product distribution on $\{-1,1\}^n$ (for any constant accuracy parameter $\epsilon$). Our algorithm also succeeds in the agnostic setting. Previous work on learning submodular functions required either query access or strong assumptions about the types of submodular functions to be learned (and did not hold in the agnostic setting). Additionally we give simple algorithms that efficiently release differentially private answers to all Boolean conjunctions and to all halfspaces with constant average error, subsuming and improving recent work due to Gupta, Hardt, Roth and Ullman (STOC 2011).} }
@ARTICLE{ref:CHIS12, author = {Cheraghchi, Mahdi and H\r{a}stad, Johan and Isaksson, Marcus and Svensson, Ola}, title = {Approximating Linear Threshold Predicates}, year = 2012, volume = 4, number = 1, url_Link = {https://dl.acm.org/doi/10.1145/2141938.2141940}, doi = {10.1145/2141938.2141940}, journal = {ACM Transactions on Computation Theory (ToCT)}, month = mar, articleno = 2, numpages = 31, keywords = {linear threshold predicates, constraint satisfactory problems, approximation algorithms}, note = {Preliminary version in {Proceedings of APPROX 2010.}}, url_Paper = {https://eccc.weizmann.ac.il//report/2010/132}, abstract = {We study constraint satisfaction problems on the domain $\{-1,1\}$, where the given constraints are homogeneous linear threshold predicates. That is, predicates of the form $\mathrm{sgn}(w_1 x_1 + \cdots +w_n x_n)$ for some positive integer weights $w_1, \dots, w_n$. Despite their simplicity, current techniques fall short of providing a classification of these predicates in terms of approximability. In fact, it is not easy to guess whether there exists a homogeneous linear threshold predicate that is approximation resistant or not. The focus of this paper is to identify and study the approximation curve of a class of threshold predicates that allow for non-trivial approximation. Arguably the simplest such predicate is the majority predicate $\mathrm{sgn}(x_1+ \cdots + x_n)$, for which we obtain an almost complete understanding of the asymptotic approximation curve, assuming the Unique Games Conjecture. Our techniques extend to a more general class of "majority-like" predicates and we obtain parallel results for them. In order to classify these predicates, we introduce the notion of \textit{Chow-robustness} that might be of independent interest.} }
@ARTICLE{ref:CKMS12, author = {Mahdi Cheraghchi and Amin Karbasi and Soheil Mohajer and Vankatesh Saligrama}, journal = {IEEE Transactions on Information Theory}, title = {Graph-Constrained Group Testing}, year = 2012, volume = 58, number = 1, pages = {248--262}, note = {Preliminary version in Proceedings of {ISIT 2010}.}, doi = {10.1109/TIT.2011.2169535}, url_Link = {https://ieeexplore.ieee.org/document/6121984}, url_Paper = {https://arxiv.org/abs/1001.1445}, abstract = {Non-adaptive group testing involves grouping arbitrary subsets of $n$ items into different pools. Each pool is then tested and defective items are identified. A fundamental question involves minimizing the number of pools required to identify at most $d$ defective items. Motivated by applications in network tomography, sensor networks and infection propagation, a variation of group testing problems on graphs is formulated. Unlike conventional group testing problems, each group here must conform to the constraints imposed by a graph. For instance, items can be associated with vertices and each pool is any set of nodes that must be path connected. In this paper, a test is associated with a random walk. In this context, conventional group testing corresponds to the special case of a complete graph on $n$ vertices. For interesting classes of graphs a rather surprising result is obtained, namely, that the number of tests required to identify $d$ defective items is substantially similar to what is required in conventional group testing problems, where no such constraints on pooling is imposed. Specifically, if $T(n)$ corresponds to the mixing time of the graph $G$, it is shown that with $m=O(d^2T^2(n)\log(n/d))$ non-adaptive tests, one can identify the defective items. Consequently, for the Erd\H{o}s-R\'enyi random graph $G(n,p)$, as well as expander graphs with constant spectral gap, it follows that $m=O(d^2\log^3n)$ non-adaptive tests are sufficient to identify $d$ defective items. Next, a specific scenario is considered that arises in network tomography, for which it is shown that $m=O(d^3\log^3n)$ non-adaptive tests are sufficient to identify $d$ defective items. Noisy counterparts of the graph constrained group testing problem are considered, for which parallel results are developed. We also briefly discuss extensions to compressive sensing on graphs. } }
@ARTICLE{ref:CDS12, author = {Mahdi Cheraghchi and Fr\'ed\'eric Didier and Amin Shokrollahi}, title = {Invertible Extractors and Wiretap Protocols}, journal = {IEEE Transactions on Information Theory}, year = 2012, volume = 58, number = 2, pages = {1254--1274}, note = {Preliminary version in Proceedings of {ISIT 2009}.}, doi = {10.1109/TIT.2011.2170660}, url_Link = {https://ieeexplore.ieee.org/document/6035782}, url_Paper = {https://arxiv.org/abs/0901.2120}, abstract = {A wiretap protocol is a pair of randomized encoding and decoding functions such that knowledge of a bounded fraction of the encoding of a message reveals essentially no information about the message, while knowledge of the entire encoding reveals the message using the decoder. In this paper, the notion of efficiently invertible extractors is studied and it is shown that a wiretap protocol can be constructed from such an extractor. Then, invertible extractors for symbol-fixing, affine, and general sources are constructed and used to create wiretap protocols with asymptotically optimal trade-offs between their rate (ratio of the length of the message versus its encoding) and resilience (ratio of the observed positions of the encoding and the length of the encoding). The results are further applied to create wiretap protocols for challenging communication problems, such as active intruders who change portions of the encoding, network coding, and intruders observing arbitrary Boolean functions of the encoding. As a by-product of the constructions, new explicit extractors for a restricted family of affine sources over large fields is obtained (that in particular generalizes the notion of symbol-fixing sources) which is of independent interest. These extractors are able to extract the entire source entropy with zero error. }, keywords = {Wiretap Channel, Extractors, Network Coding, Active Intrusion, Exposure Resilient Cryptography} }
@INPROCEEDINGS{ref:conf:Che11, author = {Mahdi Cheraghchi}, title = {Coding-Theoretic Methods for Sparse Recovery}, year = 2011, booktitle = {Proceedings of the 49th {Annual Allerton Conference on Communication, Control, and Computing}}, pages = {909--916}, doi = {10.1109/Allerton.2011.6120263}, url_Link = {https://ieeexplore.ieee.org/document/6120263}, url_Paper = {https://arxiv.org/abs/1110.0279}, abstract = { We review connections between coding-theoretic objects and sparse learning problems. In particular, we show how seemingly different combinatorial objects such as error-correcting codes, combinatorial designs, spherical codes, compressed sensing matrices and group testing designs can be obtained from one another. The reductions enable one to translate upper and lower bounds on the parameters attainable by one object to another. We survey some of the well-known reductions in a unified presentation, and bring some existing gaps to attention. New reductions are also introduced; in particular, we bring up the notion of minimum "L-wise distance" of codes and show that this notion closely captures the combinatorial structure of RIP-2 matrices. Moreover, we show how this weaker variation of the minimum distance is related to combinatorial list-decoding properties of codes. } }
@ARTICLE{ref:CHKV11, author = {Mahdi Cheraghchi and Ali Hormati and Amin Karbasi and Martin Vetterli}, title = {Group Testing With Probabilistic Tests: Theory, Design and Application}, year = 2011, journal = {IEEE Transactions on Information Theory}, volume = 57, number = 10, pages = {7057--7067}, note = {Preliminary version in Proceedings of the {Allerton 2009}.}, doi = {10.1109/TIT.2011.2148691}, url_Link = {https://ieeexplore.ieee.org/document/5759087}, url_Paper = {https://arxiv.org/abs/0909.3508}, abstract = {Detection of defective members of large populations has been widely studied in the statistics community under the name "group testing", a problem which dates back to World War II when it was suggested for syphilis screening. There, the main interest is to identify a small number of infected people among a large population using \textit{collective samples}. In viral epidemics, one way to acquire collective samples is by sending agents inside the population. While in classical group testing, it is assumed that the sampling procedure is fully known to the reconstruction algorithm, in this work we assume that the decoder possesses only \textit{partial} knowledge about the sampling process. This assumption is justified by observing the fact that in a viral sickness, there is a chance that an agent remains healthy despite having contact with an infected person. Therefore, the reconstruction method has to cope with two different types of uncertainty; namely, identification of the infected population and the partially unknown sampling procedure. In this work, by using a natural probabilistic model for "viral infections", we design non-adaptive sampling procedures that allow successful identification of the infected population with overwhelming probability $1-o(1)$. We propose both probabilistic and explicit design procedures that require a "small" number of agents to single out the infected individuals. More precisely, for a contamination probability $p$, the number of agents required by the probabilistic and explicit designs for identification of up to $k$ infected members is bounded by $m = O(k^2 (\log n) / p^2)$ and $m = O(k^2 (\log^2 n) / p^2)$, respectively. In both cases, a simple decoder is able to successfully identify the infected population in time $O(mn)$. } }
@INPROCEEDINGS{ref:conf:Che10, author = {Mahdi Cheraghchi}, title = {Derandomization and Group Testing}, year = 2010, booktitle = {Proceedings of the 48th {Annual Allerton Conference on Communication, Control, and Computing}}, pages = {991--997}, doi = {10.1109/ALLERTON.2010.5707017}, url_Link = {https://ieeexplore.ieee.org/document/5707017}, url_Paper = {https://arxiv.org/abs/1010.0433}, abstract = { The rapid development of derandomization theory, which is a fundamental area in theoretical computer science, has recently led to many surprising applications outside its initial intention. We will review some recent such developments related to combinatorial group testing. In its most basic setting, the aim of group testing is to identify a set of "positive" individuals in a population of items by taking groups of items and asking whether there is a positive in each group. In particular, we will discuss explicit constructions of optimal or nearly-optimal group testing schemes using "randomness-conducting" functions. Among such developments are constructions of error-correcting group testing schemes using randomness extractors and condensers, as well as threshold group testing schemes from lossless condensers. } }
@INPROCEEDINGS{ref:conf:CHIS10, author = {Cheraghchi, Mahdi and H\r{a}stad, Johan and Isaksson, Marcus and Svensson, Ola}, title = {Approximating Linear Threshold Predicates}, booktitle = "Proceedings of {APPROX}", year = 2010, pages = "110--123", url_Link = {https://link.springer.com/chapter/10.1007/978-3-642-15369-3_9}, doi = {10.1007/978-3-642-15369-3_9}, keywords = {linear threshold predicates, constraint satisfactory problems, approximation algorithms}, note = {Extended version in {ACM Transactions on Computation Theory (ToCT).}}, url_Paper = {https://eccc.weizmann.ac.il//report/2010/132}, abstract = {We study constraint satisfaction problems on the domain $\{-1,1\}$, where the given constraints are homogeneous linear threshold predicates. That is, predicates of the form $\mathrm{sgn}(w_1 x_1 + \cdots +w_n x_n)$ for some positive integer weights $w_1, \dots, w_n$. Despite their simplicity, current techniques fall short of providing a classification of these predicates in terms of approximability. In fact, it is not easy to guess whether there exists a homogeneous linear threshold predicate that is approximation resistant or not. The focus of this paper is to identify and study the approximation curve of a class of threshold predicates that allow for non-trivial approximation. Arguably the simplest such predicate is the majority predicate $\mathrm{sgn}(x_1+ \cdots + x_n)$, for which we obtain an almost complete understanding of the asymptotic approximation curve, assuming the Unique Games Conjecture. Our techniques extend to a more general class of "majority-like" predicates and we obtain parallel results for them. In order to classify these predicates, we introduce the notion of \textit{Chow-robustness} that might be of independent interest.} }
@INPROCEEDINGS{ref:conf:Che10:threshold, author = {Cheraghchi, Mahdi}, title = {Improved Constructions for Non-adaptive Threshold Group Testing}, booktitle = "Proceedings of the 37th {International Colloquium on Automata, Languages and Programming (ICALP)}", year = 2010, pages = "552--564", url_Link = {https://link.springer.com/chapter/10.1007/978-3-642-14165-2_47}, url_Slides = {http://www.ima.umn.edu/videos/?id=1803}, doi = {10.1007/978-3-642-14165-2_47}, keywords = {Threshold group testing, Lossless expanders, Combinatorial designs, Explicit constructions}, note = {Extended version in {Algorithmica.}}, url_Paper = {https://arxiv.org/abs/1002.2244}, abstract = {The basic goal in combinatorial group testing is to identify a set of up to $d$ defective items within a large population of size $n \gg d$ using a pooling strategy. Namely, the items can be grouped together in pools, and a single measurement would reveal whether there are one or more defectives in the pool. The threshold model is a generalization of this idea where a measurement returns positive if the number of defectives in the pool reaches a fixed threshold $u > 0$, negative if this number is no more than a fixed lower threshold $\ell < u$, and may behave arbitrarily otherwise. We study non-adaptive threshold group testing (in a possibly noisy setting) and show that, for this problem, $O(d^{g+2} (\log d) \log(n/d))$ measurements (where $g := u-\ell-1$ and $u$ is any fixed constant) suffice to identify the defectives, and also present almost matching lower bounds. This significantly improves the previously known (non-constructive) upper bound $O(d^{u+1} \log(n/d))$. Moreover, we obtain a framework for explicit construction of measurement schemes using lossless condensers. The number of measurements resulting from this scheme is ideally bounded by $O(d^{g+3} (\log d) \log n)$. Using state-of-the-art constructions of lossless condensers, however, we obtain explicit testing schemes with $O(d^{g+3} (\log d) \mathrm{quasipoly}(\log n))$ and $O(d^{g+3+\beta} \mathrm{poly}(\log n))$ measurements, for arbitrary constant $\beta > 0$.} }
@INPROCEEDINGS{ref:conf:CKMS10, author = {Mahdi Cheraghchi and Amin Karbasi and Soheil Mohajer and Vankatesh Saligrama}, title = {Graph-constrained group testing}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, year = 2010, pages = {1913--1917}, note = {Extended version in IEEE Transactions on Information Theory.}, doi = {10.1109/ISIT.2010.5513397}, url_Link = {https://ieeexplore.ieee.org/document/5513397}, url_Paper = {https://arxiv.org/abs/1001.1445}, abstract = {Non-adaptive group testing involves grouping arbitrary subsets of $n$ items into different pools and identifying defective items based on tests obtained for each pool. Motivated by applications in network tomography, sensor networks and infection propagation we formulate non-adaptive group testing problems on graphs. Unlike conventional group testing problems each group here must conform to the constraints imposed by a graph. For instance, items can be associated with vertices and each pool is any set of nodes that must be path connected. In this paper we associate a test with a random walk. In this context conventional group testing corresponds to the special case of a complete graph on $n$ vertices. For interesting classes of graphs we arrive at a rather surprising result, namely, that the number of tests required to identify $d$ defective items is substantially similar to that required in conventional group testing problems, where no such constraints on pooling is imposed. Specifically, if $T(n)$ corresponds to the mixing time of the graph $G$, we show that with $m=O(d^2T^2(n)\log(n/d))$ non-adaptive tests, one can identify the defective items. Consequently, for the Erd\H{o}s-R\'enyi random graph $G(n,p)$, as well as expander graphs with constant spectral gap, it follows that $m=O(d^2\log^3n)$ non-adaptive tests are sufficient to identify $d$ defective items. We next consider a specific scenario that arises in network tomography and show that $m=O(d^3\log^3n)$ non-adaptive tests are sufficient to identify $d$ defective items. We also consider noisy counterparts of the graph constrained group testing problem and develop parallel results for these cases. } }
@phdthesis{ref:Che10:thesis, author = {Mahdi Cheraghchi}, title = {Applications of Derandomization Theory in Coding}, school = {EPFL}, url_Link = {http://infoscience.epfl.ch/record/149074}, url_Paper = {https://arxiv.org/abs/1107.4709}, institution = {IIF}, publisher = {EPFL}, address = {Lausanne, Switzerland}, pages = 210, year = 2010, abstract = {Randomized techniques play a fundamental role in theoretical computer science and discrete mathematics, in particular for the design of efficient algorithms and construction of combinatorial objects. The basic goal in derandomization theory is to eliminate or reduce the need for randomness in such randomized constructions. Towards this goal, numerous fundamental notions have been developed to provide a unified framework for approaching various derandomization problems and to improve our general understanding of the power of randomness in computation. Two important classes of such tools are pseudorandom generators and randomness extractors. Pseudorandom generators transform a short, purely random, sequence into a much longer sequence that looks random, while extractors transform a weak source of randomness into a perfectly random one (or one with much better qualities, in which case the transformation is called a randomness condenser). In this thesis, we explore some applications of the fundamental notions in derandomization theory to problems outside the core of theoretical computer science, and in particular, certain problems related to coding theory. First, we consider the wiretap channel problem which involves a communication system in which an intruder can eavesdrop a limited portion of the transmissions. We utilize randomness extractors to construct efficient and information-theoretically optimal communication protocols for this model. Then we consider the combinatorial group testing problem. In this classical problem, one aims to determine a set of defective items within a large population by asking a number of queries, where each query reveals whether a defective item is present within a specified group of items. We use randomness condensers to explicitly construct optimal, or nearly optimal, group testing schemes for a setting where the query outcomes can be highly unreliable, as well as the threshold model where a query returns positive if the number of defectives pass a certain threshold. Next, we use randomness condensers and extractors to design ensembles of error-correcting codes that achieve the information-theoretic capacity of a large class of communication channels, and then use the obtained ensembles for construction of explicit capacity achieving codes. Finally, we consider the problem of explicit construction of error-correcting codes on the Gilbert-Varshamov bound and extend the original idea of Nisan and Wigderson to obtain a small ensemble of codes, mostly achieving the bound, under suitable computational hardness assumptions.}, url = {http://infoscience.epfl.ch/record/149074}, doi = {10.5075/epfl-thesis-4767} }
@INPROCEEDINGS{ref:conf:CHKV09, author = {Mahdi Cheraghchi and Ali Hormati and Amin Karbasi and Martin Vetterli}, title = {Compressed sensing with probabilistic measurements: A group testing solution}, booktitle = {Proceedings of the 47th {Annual Allerton Conference on Communication, Control, and Computing}}, year = 2009, pages = {30--35}, note = {Extended version in {IEEE Transactions on Information Theory}.}, doi = {10.1109/ALLERTON.2009.5394829}, url_Link = {https://ieeexplore.ieee.org/document/5394829}, url_Paper = {https://arxiv.org/abs/0909.3508}, abstract = {Detection of defective members of large populations has been widely studied in the statistics community under the name "group testing", a problem which dates back to World War II when it was suggested for syphilis screening. There, the main interest is to identify a small number of infected people among a large population using \textit{collective samples}. In viral epidemics, one way to acquire collective samples is by sending agents inside the population. While in classical group testing, it is assumed that the sampling procedure is fully known to the reconstruction algorithm, in this work we assume that the decoder possesses only \textit{partial} knowledge about the sampling process. This assumption is justified by observing the fact that in a viral sickness, there is a chance that an agent remains healthy despite having contact with an infected person. Therefore, the reconstruction method has to cope with two different types of uncertainty; namely, identification of the infected population and the partially unknown sampling procedure. In this work, by using a natural probabilistic model for "viral infections", we design non-adaptive sampling procedures that allow successful identification of the infected population with overwhelming probability $1-o(1)$. We propose both probabilistic and explicit design procedures that require a "small" number of agents to single out the infected individuals. More precisely, for a contamination probability $p$, the number of agents required by the probabilistic and explicit designs for identification of up to $k$ infected members is bounded by $m = O(k^2 (\log n) / p^2)$ and $m = O(k^2 (\log^2 n) / p^2)$, respectively. In both cases, a simple decoder is able to successfully identify the infected population in time $O(mn)$. } }
@INPROCEEDINGS{ref:conf:Che09:testing, author = {Mahdi Cheraghchi}, title = "Noise-resilient group testing: Limitations and constructions", booktitle = "Proceedings of the 17th {International Symposium on Fundamentals of Computation Theory (FCT)}", year = 2009, pages = "62--73", doi = "10.1007/978-3-642-03409-1_7", url_Link = "https://link.springer.com/chapter/10.1007/978-3-642-03409-1_7", keywords = "Group testing, Randomness condensers, Extractors, List decoding", note = {Extended version in {Discrete Applied Mathematics}. arXiv manuscript published in 2008.}, url_Paper = {https://arxiv.org/abs/0811.2609}, abstract = {We study combinatorial group testing schemes for learning $d$-sparse boolean vectors using highly unreliable disjunctive measurements. We consider an adversarial noise model that only limits the number of false observations, and show that any noise-resilient scheme in this model can only approximately reconstruct the sparse vector. On the positive side, we take this barrier to our advantage and show that approximate reconstruction (within a satisfactory degree of approximation) allows us to break the information theoretic lower bound of $\tilde{\Omega}(d^2 \log n)$ that is known for exact reconstruction of $d$-sparse vectors of length $n$ via non-adaptive measurements, by a multiplicative factor $\tilde{\Omega}(d)$. Specifically, we give simple randomized constructions of non-adaptive measurement schemes, with $m=O(d \log n)$ measurements, that allow efficient reconstruction of $d$-sparse vectors up to $O(d)$ false positives even in the presence of $\delta m$ false positives and $O(m/d)$ false negatives within the measurement outcomes, for \textit{any} constant $\delta < 1$. We show that, information theoretically, none of these parameters can be substantially improved without dramatically affecting the others. Furthermore, we obtain several explicit constructions, in particular one matching the randomized trade-off but using $m = O(d^{1+o(1)} \log n)$ measurements. We also obtain explicit constructions that allow fast reconstruction in time $\mathrm{poly}(m)$, which would be sublinear in $n$ for sufficiently sparse vectors. The main tool used in our construction is the list-decoding view of randomness condensers and extractors. An immediate consequence of our result is an adaptive scheme that runs in only two non-adaptive \textit{rounds} and exactly reconstructs any $d$-sparse vector using a total $O(d \log n)$ measurements, a task that would be impossible in one round and fairly easy in $O(\log(n/d))$ rounds. } }
@INPROCEEDINGS{ref:conf:Che09:capacity, author = {Mahdi Cheraghchi}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, title = {Capacity achieving codes from randomness conductors}, year = 2009, pages = {2639--2643}, doi = {10.1109/ISIT.2009.5205931}, url_Link = {https://ieeexplore.ieee.org/abstract/document/5205931}, url_Paper = {https://arxiv.org/abs/0901.1866}, abstract = {We establish a general framework for construction of small ensembles of capacity achieving linear codes for a wide range of (not necessarily memoryless) discrete symmetric channels, and in particular, the binary erasure and symmetric channels. The main tool used in our constructions is the notion of randomness extractors and lossless condensers that are regarded as central tools in theoretical computer science. Same as random codes, the resulting ensembles preserve their capacity achieving properties under any change of basis. Using known explicit constructions of condensers, we obtain specific ensembles whose size is as small as polynomial in the block length. By applying our construction to Justesen's concatenation scheme (Justesen, 1972) we obtain explicit capacity achieving codes for BEC (resp., BSC) with almost linear time encoding and almost linear time (resp., quadratic time) decoding and exponentially small error probability. }, keywords = {Capacity achieving codes, Randomness extractors, Lossless condensers, Code ensembles, Concatenated codes} }
@INPROCEEDINGS{ref:conf:CDS09, author = {Mahdi Cheraghchi and Fr\'ed\'eric Didier and Amin Shokrollahi}, title = {Invertible Extractors and Wiretap Protocols}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, year = 2009, pages = {1934--1938}, note = {Extended version in IEEE Transactions on Information Theory.}, doi = {10.1109/ISIT.2009.5205583}, url_Link = {https://ieeexplore.ieee.org/document/5205583}, url_Paper = {https://arxiv.org/abs/0901.2120}, abstract = {A wiretap protocol is a pair of randomized encoding and decoding functions such that knowledge of a bounded fraction of the encoding of a message reveals essentially no information about the message, while knowledge of the entire encoding reveals the message using the decoder. In this paper, the notion of efficiently invertible extractors is studied and it is shown that a wiretap protocol can be constructed from such an extractor. Then, invertible extractors for symbol-fixing, affine, and general sources are constructed and used to create wiretap protocols with asymptotically optimal trade-offs between their rate (ratio of the length of the message versus its encoding) and resilience (ratio of the observed positions of the encoding and the length of the encoding). The results are further applied to create wiretap protocols for challenging communication problems, such as active intruders who change portions of the encoding, network coding, and intruders observing arbitrary Boolean functions of the encoding. As a by-product of the constructions, new explicit extractors for a restricted family of affine sources over large fields is obtained (that in particular generalizes the notion of symbol-fixing sources) which is of independent interest. These extractors are able to extract the entire source entropy with zero error. }, keywords = {Wiretap Channel, Extractors, Network Coding, Active Intrusion, Exposure Resilient Cryptography} }
@INPROCEEDINGS{ref:conf:ACS09, author = {Ehsan Ardestanizadeh and Mahdi Cheraghchi and Amin Shokrollahi}, title = {Bit precision analysis for compressed sensing}, booktitle = {Proceedings of the {IEEE International Symposium on Information Theory (ISIT)}}, year = 2009, pages = {1-5}, doi = {10.1109/ISIT.2009.5206076}, url_Link = {https://ieeexplore.ieee.org/document/5206076}, url_Paper = {https://arxiv.org/abs/0901.2147}, abstract = {This paper studies the stability of some reconstruction algorithms for compressed sensing in terms of the \textit{bit precision}. Considering the fact that practical digital systems deal with discretized signals, we motivate the importance of the total number of accurate bits needed from the measurement outcomes in addition to the number of measurements. It is shown that if one uses a $2k \times n $ Vandermonde matrix with roots on the unit circle as the measurement matrix, $O(\ell + k \log \frac{n}{k})$ bits of precision per measurement are sufficient to reconstruct a $k$-sparse signal $x \in \mathbb{R}^n$ with \textit{dynamic range} (i.e., the absolute ratio between the largest and the smallest nonzero coefficients) at most $2^\ell$ within $\ell$ bits of precision, hence identifying its correct support. Finally, we obtain an upper bound on the total number of required bits when the measurement matrix satisfies a restricted isometry property, which is in particular the case for random Fourier and Gaussian matrices. For very sparse signals, the upper bound on the number of required bits for Vandermonde matrices is shown to be better than this general upper bound. } }
@INPROCEEDINGS{ref:conf:CS09, author = {Mahdi Cheraghchi and Amin Shokrollahi}, title = {Almost-Uniform Sampling of Points on High-Dimensional Algebraic Varieties}, booktitle = {Proceedings of the 26th {International Symposium on Theoretical Aspects of Computer Science (STACS)}}, pages = {277--288}, year = 2009, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, volume = 3, url_Link = {http://drops.dagstuhl.de/opus/volltexte/2009/1817}, doi = {10.4230/LIPIcs.STACS.2009.1817}, keywords = {Uniform sampling, Algebraic varieties, Randomized algorithms, Computational complexity}, url_Paper = {https://arxiv.org/abs/0902.1254}, abstract = {We consider the problem of uniform sampling of points on an algebraic variety. Specifically, we develop a randomized algorithm that, given a small set of multivariate polynomials over a sufficiently large finite field, produces a common zero of the polynomials almost uniformly at random. The statistical distance between the output distribution of the algorithm and the uniform distribution on the set of common zeros is polynomially small in the field size, and the running time of the algorithm is polynomial in the description of the polynomials and their degrees provided that the number of the polynomials is a constant. } }
@INPROCEEDINGS{ref:conf:CSW06, author = {Mahdi Cheraghchi and Amin Shokrollahi and Avi Wigderson}, title = {Computational Hardness and Explicit Constructions of Error Correcting Codes}, year = 2006, booktitle = {Proceedings of the 44th {Annual Allerton Conference on Communication, Control, and Computing}}, pages = {1173--1179}, url_Paper = {https://infoscience.epfl.ch/record/101078}, abstract = { We outline a procedure for using pseudorandom generators to construct binary codes with good properties, assuming the existence of sufficiently hard functions. Specifically, we give a polynomial time algorithm, which for every integers $n$ and $k$, constructs polynomially many linear codes of block length $n$ and dimension $k$, most of which achieving the Gilbert-Varshamov bound. The success of the procedure relies on the assumption that the exponential time class of $\mathrm{E} := \mathrm{DTIME}[2^{O(n)}]$ is not contained in the sub-exponential space class $\mathrm{DSPACE}[2^{o(n)}]$. The methods used in this paper are by now standard within computational complexity theory, and the main contribution of this note is observing that they are relevant to the construction of optimal codes. We attempt to make this note self contained, and describe the relevant results and proofs from the theory of pseudorandomness in some detail. } }
@UNPUBLISHED{ref:Che05, author = {Mahdi Cheraghchi}, title = {On Matrix Rigidity and the Complexity of Linear Forms}, year = 2005, note = {ECCC Technical Report TR05-070.}, abstract = { The rigidity function of a matrix is defined as the minimum number of its entries that need to be changed in order to reduce the rank of the matrix to below a given parameter. Proving a strong enough lower bound on the rigidity of a matrix implies a nontrivial lower bound on the complexity of any linear circuit computing the set of linear forms associated with it. However, although it is shown that most matrices are rigid enough, no explicit construction of a rigid family of matrices is known. We review the concept of rigidity and some of its interesting variations as well as several notable works related to that. We also show the existence of highly rigid matrices constructed by evaluation of bivariate polynomials over a finite field. }, keywords = {Matrix Rigidity, Low Level Complexity, Circuit Complexity, Linear Forms} }
@mastersthesis{ref:Che05:thesis, author = {Mahdi Cheraghchi}, title = {Locally Testble Codes}, school = {EPFL}, url_Paper = {https://eccc.weizmann.ac.il//static/books/Locally_Testable_Codes}, institution = {IIF}, publisher = {EPFL}, address = {Lausanne, Switzerland}, year = 2005, abstract = { Error correcting codes are combinatorial objects that allow reliable recovery of information in presence of errors by cleverly augmenting the original information with a certain amount of redundancy. The availability of efficient means of error detection is considered as a fundamental criterion for error correcting codes. Locally testable codes are families of error correcting codes that are highly robust against transmission errors and in addition provide super-efficient (sublinear time) probabilistic algorithms for error detection. In particular, the error detection algorithm probes the received sequence only at a small (or even constant) number of locations. There seems to be a trade-off between the amount of redundancy and the number of probes for the error detection procedure in locally testable codes. Even though currently best constructions allow reduction of redundancy to a nearly linear amount, it is not clear whether this can be further reduced to linear while preserving a constant number of probes. We study the formal notion of locally testable codes and survey several major results in this area. We also investigate closely related concepts, and in particular, polynomial low-degree tests and probabilistically checkable proofs. }, keywords = {Error Correcting Codes, Locally Testable Codes, Probabilistically Checkable Proofs (PCP), Low-Degree Tests, Hardness of Approximation} }