\documentclass[11pt,fleqn]{article} \usepackage{cs294,epsf, overpic, xypic} \header{7}{2/7/07}{Simon's Algorithm + QFT} \begin{document} \maketitle \def\BC{\C} \subsection{} Recall that our basic primitive for designing quantum algorithms is fourier sampling: prepare some quantum state $\ket{\psi} = \sum_x \alpha_x \ket{x}$ on $n$ qubits; perform a Hadamard transform, resulting in the superposition $\sum_x \beta_x \ket{x}$; now measure to sample $x$ with probability $|\beta_x|^2$. The point is that classically it is difficult to simulate the effects of the quantum interference, and therefore to determine for which strings $x$ there is constructive interference and are therefore output with high probability. Today we will see two introduce two innovations that will prepare us for the quantum algorithm for factoring. 1) A more sophisticated method for preparing the initial superposition $\ket{\psi} = \sum_x \alpha_x \ket{x}$? 2) Generalizing the fourier transform from the Hadamard transform (which is a fourier transform over the group $Z_2^n$ to the group $Z_N$. \subsection{Setting up a random pre-image state} Suppose we're given a classical circuit for a $k-1$ function $f:\left\{ 0,1\right\} ^{n}\rightarrow \left\{ 0,1\right\} ^{n}$. We will show how to set up the quantum state $\ket{\phi} = 1/\sqrt{k} \sum_{x: f(x) = a} \ket{x}$. Here $a$ is uniformly random among all $a$ in the image of $f$. The algorithm uses two registers, both with $n$ qubits. \ The registers are initialized to the basis state $\left| 0\cdots0\right\rangle \left| 0\cdots0\right\rangle $. \ We then perform the Hadamard transform $H_{2^{n}}$\ on the first register, producing the superposition% \[ \frac{1}{2^{n/2}}% {\displaystyle\sum\limits_{x\in\left\{ 0,1\right\} ^{n}}} \left| x\right\rangle \left| 0\cdots0\right\rangle . \] Then, we compute $f\left( x\right)$\ through the oracle $C_f$ and store the result in the second register, obtaining the state% \[ \frac{1}{2^{n/2}}% {\displaystyle\sum\limits_{x\in\left\{ 0,1\right\} ^{n}}} \left| x\right\rangle \left| f\left( x\right) \right\rangle . \] The second register is not modified after this step. Thus we may invoke the principle of safe storage and assume that the second register is measured at this point. Let $a$ be the result of measuring of the second register. Then $a$ is a random element in the range of $f$, and according to rules of partial measurement, the state of the first register is a superposition over exactly those values of $x$ that are consistent with those contents for the second register. i.e. $$\ket{\phi} = 1/\sqrt{k} \sum_{x: f(x) = a} \ket{x}$$ \subsection{Simon's Algorithm} Suppose we are given function $2-1$ $f:\left\{ 0,1\right\} ^{n}\rightarrow \left\{ 0,1\right\} ^{n}$, specified by a black box, with the promise that there is an $a \in \{0,1\}^n$ with $a \neq 0^n$ such that \begin{itemize} \item For all $x$ $f(x+a) = f(x)$. \item If $f(x) = f(y)$ then either $x = y$ or $y = x+ a$. \end{itemize} The challenge is to determine $a$. It is intuitively obvious that this is a difficult task for a classical probabilistic computer. We will show an efficient quantum algorithm. \subsection{Simon's Algorithm\label{simon}} \begin{figure} \centering \begin{overpic}[scale=.7]{294-2.eps} \put(-10,40){$|0^n\>$}\put(-10,10){$|0^n\>$}\put(20,40){$H_{2^n}$} \put(50,30){$C_f$}\put(75,10){$|f(x)\>$}\put(75,40){$H_{2^n}$} \put(105,40){$|y\>$} \end{overpic} \caption{Simon's algorithm} \end{figure} 1. Use $f$ to set up random pre-image state $$\phi = 1/\sqrt{2} \ket{z} + 1/\sqrt{2} \ket{z+a}$$ where $z$ is a random $n$-bit string. 2. Perform a Hadamard transform $H^{\otimes n}$. After step 2 we obtain a superposition% \[% {\displaystyle\sum\limits_{y\in\left\{ 0,1\right\} ^{n}}} \alpha_{y}\left| y\right\rangle \] where% \[ \alpha_{y}=\frac{1}{\sqrt{2}}\frac{1}{2^{n/2}}\left( -1\right) ^{y\cdot z}+\frac{1}{\sqrt{2}}\frac{1}{2^{n/2}}\left( -1\right) ^{y\cdot\left( z\oplus a\right) }=\frac{1}{2^{\left( n+1\right) /2}}\left( -1\right) ^{y\cdot z}\left[ 1+\left( -1\right) ^{y\cdot a}\right] . \] There are now two cases. \ For each $y$, if $y\cdot a=1$, then $\alpha_{y}=0$, whereas if $y\cdot a=0$, then% \[ \alpha_{y}=\frac{\pm1}{2^{\left( n-1\right) /2}}. \] So when we observe the first register, with certainty we'll see a $y$ such that $y\cdot a=0$. Hence, the output of the measurement is a random $y$ such that $y\cdot a=0$. Furthermore, each $y$ such that $y\cdot a=0$\ has an equal probability of occurring. \ Therefore what we've managed to learn is an equation% \begin{equation}\label{equ1} y_{1}a_{1}\oplus \cdots\oplus y_{n}a_{n}=0 \end{equation} where $y=\left( y_{1},\ldots,y_{n}\right) $\ is chosen uniformly at random from $\left\{ 0,1\right\} ^{n}$. \ Now, that isn't enough information to determine $a$, but assuming that $y\neq0$, it reduces the number of possibilities for $a$ by half. It should now be clear how to proceed. \ We run the algorithm over and over, accumulating more and more equations of the form in (\ref{equ1}). Then, once we have enough of these equations, we solve them using Gaussian elimination to obtain a unique value of $a$. \ But how many equations is enough? From linear algebra, we know that $a$ is uniquely determined once we have $n-1$ linearly independent equations---in other words, $n-1$ equations% \begin{align*} y^{\left( 1\right) }\cdot a & \equiv0\left( \operatorname{mod}2\right) \\ & \vdots\\ y^{\left( n-1\right) }\cdot a & \equiv0\left( \operatorname{mod}2\right) \end{align*} such that the set $\left\{ y^{\left( 1\right) },\ldots,y^{\left( n-1\right) }\right\} $ is linearly independent in the vector space $Z_{2}^{n}$. \ Thus, our strategy will be to lower-bound the probability that any $n-1$ equations returned by the algorithm are independent. Suppose we already have $k$ linearly independent equations, with associated vectors $y^{\left( 1\right) },\ldots,y^{\left( k\right) }$. \ The vectors then span a subspace $S\subseteq Z_{2}^{n}$ of size $2^{k}$, consisting of all vectors of the form% \[ b_{1}y^{\left( 1\right) }+\cdots+b_{k}y^{\left( k\right) }% \] with $b_{1},\ldots,b_{k}\in\left\{ 0,1\right\} $. \ Now suppose we learn a new equation with associated vector $y^{\left( k+1\right) }$. \ This equation will be independent of all the previous equations provided that $y^{\left( k+1\right) }$\ lies \textit{outside} of $S$, which in turn has probability at least $(2^n-2^k)/2^n=1-2^{k-n}$ of occurring. \ \ So the probability that any $n$ equations are independent is exactly the product of those probabilities.% \[ \left( 1-\frac{1}{2^{n}}\right) \times\left( 1-\frac{1}{2^{n-1}}\right) \times\cdots\times\left( 1-\frac{1}{4}\right) \times\left(1-\frac{1}{2}\right). \] Can we lower-bound this expression? \ Trivially, it's at least% \[% {\displaystyle\prod\limits_{k=1}^{\infty}} \left( 1-\frac{1}{2^{k}}\right) \approx0.28879; \] the infinite product here is related to something in analysis called a q-series. \ Another way to look at the constant $0.28879\ldots$\ is this: it is the limit, as $n$ goes to infinity, of the probability that an $n\times n$\ random matrix over $Z_{2}$\ is invertible. But we don't need heavy-duty analysis to show that the product has a constant lower bound. We use the inequality $(1-a)(1-b)=1-a-b+ab>1-(a+b)$, if $a,b\in (0,1)$. \ We just need to multiply the product out, ignore monomials involving two or more $\frac{1}{2^{k}}$\ terms multiplied together (which only increase the product), and observe that the product is lower-bounded by% \[ \left[ 1-\left( \frac{1}{2^{n}}+\frac{1}{2^{n-1}}+\cdots+\frac{1}{4}\right) \right] \cdot\frac{1}{2}\geq\frac{1}{4}. \] We conclude that we can determine $a$ with constant probability of error after repeating the algorithm $O\left( n\right) $ times. \ So the number of queries to $f$ used by Simon's algorithm is $O\left( n\right) $. \ The number of computation steps, though, is at least the number of steps needed to solve a system of linear equations, and the best known upper bound for this is $O\left( n^{2.376}\right) $, due to Coppersmith and Winograd. \subsection{Classical solution} We are going to prove that any probabilistic algorithm needs an exponential time to solve this problem. Suppose that $a$ is chosen uniformly at random from $\left\{ 0,1\right\} ^{n}-\left\{ 0^{n}\right\} $. \ Now consider a classical probabilistic algorithm that's already made $k$ queries, to inputs $x_{1},\ldots,x_{k}$. \ We want to know how much information the algorithm could have obtained about $a$, given those queried pairs $(x_i,f(x_i))$. On the one hand, there might be a pair of inputs $x_{i},x_{j}$\ (with $1\leq i,j\leq k$) such that $f\left( x_{i}\right) =f\left( x_{j}\right) $. \ In this case, the algorithm already has enough information to determine $a$: $a=x_{i}\oplus x_{j}$. On the other hand, suppose no such pair $f(x_{i}),f(x_{j})$\ exists. Then the queried $f(x_i)$'s are distinct and $a$ is none of $\dbinom{k}{2}$ values $x_i\oplus x_j$. The probability that the next query will succeed is at most \[ \frac{k}{2^n-1-\dbinom{k}{2}} \] because there are at least $2^n-1-\dbinom{k}{2}$ possible values of u for choosing at the $(k+1)$-th query. And $f(x_{k+1})$ should be equal to one of the prior observed $f(x_i)$, $i\in [1,k]$. Taking the sum over all $k\in \{1,\ldots,m\}$. We get \[ \sum_{k=1}^m \frac{k}{2^n-1-\dbinom{k}{2}} \le \sum_{k=1}^m \frac{k}{2^n-k^2}\le \frac{m^2}{2^n-m^2} \] In order to have an constant probability, we must choose $m=\Omega (2^{n/2})$. Hence, any deterministic algorithm has to run in exponential time to get a correct answer with probability larger than a constant. \section*{Fourier transform on $\Z_N$} Let $f$ be a complex-valued function on $\Z_N$. Then its Fourier transform is \begin{equation*} \hat{f}(t)=\frac{1}{\sqrt{N}}\sum_{x\in\Z_N} f(x)w^{xt} \end{equation*} where $w=\exp(2\pi i/N)$. Let $B_1$ be the standard basis for $\C^{Z_N}$ consisting of vectors $f_i(j)=\delta_{i,j}$. In the standard basis the matrix for the Fourier transform is \begin{equation*}FT_N= \begin{pmatrix} 1&1&1&1&\cdots&1\\ 1&w&w^2&w^3&\cdots&w^{N-1}\\ 1&w^2&w^4&w^6&\cdots&w^{2N-2}\\ 1&w^3&w^6&w^9&\cdots&w^{3N-3}\\ \vdots&\vdots&\vdots&\vdots&\ddots&\vdots\\ 1&w^{N-1}&w^{2N-2}&w^{3N-3}&\cdots&w^{(N-1)(N-1)} \end{pmatrix} \end{equation*} where $i,j$'th entry of $FT_N$ is $w^{ij}$. \section*{Classical fast Fourier transform} Straightforward multiplication of the vector $f$ by $FT_N$ would take $\Omega(N^2)$ steps because multiplication of $f$ by each row requires $N$ multiplications. However, there is an algorithm known as fast Fourier transform (FFT) that performs Fourier transform in $O(N\log N)$ operations. In our presentation of FFT we shall restrict ourselves to the case $N=2^n$. Let $B_2$ be a basis for $\C^{\Z_N}$ consisting of vectors \begin{equation*} f_i(j)= \begin{cases} \delta_{2i,j},&i\in\{0,1,\dotsc,N/2-1\},\\ \delta_{2i-N+1,j},&i\in\{N/2,N/2+1,\dotsc,N-1\}, \end{cases} \end{equation*} i.e., the vectors of the standard basis sorted by the least-significant bit. Then as a map from $B_2$ to $B_1$ the Fourier transform has the matrix representation % Fast and dirty way to create such a matrix. What is fast and beautiful way? \vspace{14pt}\begin{equation*} \begin{tabular}{c} \makebox[0pt]{\raisebox{14pt}[0pt][0pt]{\phantom{xxx}bit \#}} $j$\\$j+N/2$ \end{tabular} \left(\begin{tabular}{c|c} \makebox[0pt]{\raisebox{14pt}[0pt][0pt]{\phantom{xxx}$2k$}}$w^{2jk}$ &\makebox[0pt]{\raisebox{14pt}[0pt][0pt]{\phantom{xxxxxx}$2k+1$}}$w^{2jk}w^j$\\\hline$w^{2jk}$&$w^{2jk}w^j$ \end{tabular}\right)= \begin{pmatrix} FT_{N/2}&w^j FT_{N/2}\\ FT_{N/2}&-w^j FT_{N/2} \end{pmatrix}. \end{equation*} Hence, \begin{equation*} \left(\begin{tabular}{c|c} $w^{2jk}$ &$w^{2jk}w^j$\\\hline$w^{2jk}$&$w^{2jk}w^j$ \end{tabular}\right)\left(\begin{tabular}{c}$v_0$\\\hline $v_1$\end{tabular}\right)=\begin{pmatrix}FT_{N/2}v_0+w^j FT_{N/2}v_1\\FT_{N/2}v_0-w^jFT_{N/2}v_1\end{pmatrix}. \end{equation*} This representation gives a recursive algorithm for computing the Fourier transform in time $T(N)=2T(N/2)+O(N)=O(N\log N)$. As a circuit the algorithm can be implemented as \begin{figure} %% placeholder %[u].[dr]!C*\frm{\{} \restore \begin{xy} \xymatrix{ &\ar@{-}[r]& & & & &*\txt{identity}\ar@{->}@/_1.5pc/[dl]\\ *[r]{v_0}&\save [u].[dr]!C*\frm{\{} \restore\ar@{-}[r]& & FT_{N/2}& &*++[o][F]{\times 1}\ar@*{[|(3)]}@{-}[l]\ar@*{[|(3)]}@{-}[rr]\ar@*{[|(3)]}@{-}[rrddd]& &*++[o][F]{+}\ar@*{[|(3)]}@{-}[r]&\\ &\ar@{-}[r]& & & &\\ &\ar@{-}[r]& & & &\\ *[r]{v_1}&\save [u].[dr]!C*\frm{\{} \restore\ar@{-}[r]& & FT_{N/2}& &*++[o][F]{\times w^j}\ar@*{[|(3)]}@{-}[l]\ar@*{[|(3)]}@{-}[rr]\ar@*{[|(3)]}@{-}[rruuu]& &*++[o][F]{-}\ar@*{[|(3)]}@{-}[r]&\\ &\ar@{-}[r]& & & & &*\txt{multiplication by $w^j$}\ar@{->}@/^1.5pc/[ul]\\ \save"1,3"."3,5"*[F]\frm{}\restore \save"4,3"."6,5"*[F]\frm{}\restore} \end{xy} \caption{A circuit for classical fast Fourier transform}\label{fig:class} \end{figure} \section*{Quantum Fourier transform} Let $N=2^n$. Suppose a quantum state $\alpha$ on $n$ qubits is given as $\sum_{j=0}^{N-1} \alpha_j\ket{j}$. Let the Fourier transform of $\phi$ be $FT_N\ket{\phi}=\sum_{j=0}^{N-1}\beta_j \ket{j}$ where \begin{equation*} FT_N\begin{pmatrix}\alpha_0\\\alpha_1\\\vdots\\\alpha_{N-1}\end{pmatrix}= \begin{pmatrix}\beta_0\\\beta_1\\\vdots\\\beta_{N-1}\end{pmatrix}. \end{equation*} The map $FT_N=\ket{\alpha}\mapsto\ket{\beta}$ is unitary (see the proof below), and is called the quantum Fourier transform (QFT). A natural question arises whether it can be efficiently implemented quantumly. The answer is that it can be implemented by circuit of size $O(\log^2 N)$. However, this does not constitute an exponential speed-up over the classical algorithm because the result of quantum Fourier transform is a superposition of states which can be observed, and any measurement can extract at most $n=\log N$ bits of information. A quantum circuit for quantum Fourier transform is \begin{figure} \begin{xy} \xymatrix{ &\ar@{-}[rr]^{n\text{'th bit}}& & & & & &*++[o][F]{R_n}\ar@{-}[ll]\ar@{-}[ddd]\ar@{-}[rrr] & & &\\ *[r]\txt<5pc>{most significant bits}&\save [u].[dr]!C*\frm{\{} \restore\ar@{-}[rr]^{n-1\text{'th bit}}& & & QFT_{N/2}& & & &*++[o][F]{R_{n-1}} \ar@{-}[lll]\ar@{-}[dd]\ar@{-}[rr] & &\\ &\ar@{.}[rr]& & & & & & & & &\ar@{.}[lllll]\\ *[r]\txt<5pc>{least significant bit}& & & & & & &*[r]{\bullet}&*[r]{\bullet} &*+[F]{H}\ar@{-}[r]\ar@{-}[llllllll]& \save"1,4"."3,6"*[F]\frm{}\restore } \end{xy} \caption{Circuit for quantum Fourier transform} \end{figure} where $R_K$ is the controlled phase shift by angle $2\pi/2^K$ whose matrix is \begin{equation*} R_K=\begin{pmatrix}1&0&0&0\\0&1&0&0\\0&0&1&0\\0&0&0&e^{2\pi/2^K}\end{pmatrix}. \end{equation*} In the circuity above the quantum Fourier transform on $n-1$ bits corresponds to two Fourier transforms on $n-1$ bits in the figure \ref{fig:class}. The controlled phase shifts correspond to multiplications by $w^j$ in classical circuit. Finally, the Hadamard gate at the very end corresponds to the summation. \section*{Properties of Fourier transform} \begin{itemize} \item $FT_N$ is unitary. Proof: the inner product of the $i$'th and $j$'th column of $FT_N$ where $i\neq j$ is \begin{equation*} \frac{1}{N}\sum_{k\in \Z_N} w^{ik} \overline{w^{jk}}=\frac{1}{N}\sum_{k\in\Z_N} w^{ik-jk}=\frac{1}{N}\sum_{k\in\Z_N} (w^{i-j})^k=\frac{1}{N}\frac{w^{N(i-j)}-1}{w^{i-j}-1}=\frac{1}{N}\frac{1-1}{w^{i-j}-1} \end{equation*} which is zero because $w^{i-j}\neq 1$ due to $i\neq j$. The norm of $i$'th column is \begin{equation*} \sqrt{\frac{1}{N}\sum_{k\in \Z_N} w^{ik} \overline{w^{ik}}}=\sqrt{\frac{1}{N}\sum_{k\in\Z_N}1}=1. \end{equation*} \item $FT^{-1}_N$ is $FT_N$ with $w$ replaced by $w^{-1}$. Proof: since $FT$ is unitary we have $F^{-1}_N=FT_N^*$. Since $FT_N$ is symmetric and $\bar{w}=w^{-1}$, the result follows. \item Fourier transform sends translation into phase rotation, and vice versa. More precisely, if we let the translation be $T_l\colon \ket{x}\mapsto \ket {x+l\pmod N}$ and rotation by $P_k\colon \ket{x}\mapsto w^{kx}\ket{x}$, then $FT_N P_l P_k=P_l T_{-k}FT_N$. Proof: by linearity it suffices to prove this for a vector of the form $\ket{x}$. We have \begin{align*} FT_N T_l P_k \ket{x}&=FT_N w^{kx}\ket{x+l\pmod N}=\frac{1}{\sqrt{N}}w^{kx}\sum_{y\in\Z_N} w^{y(x+l)}\ket{y}\\ \intertext{and by making the substitution $y=y'-k$} &=\frac{1}{\sqrt{N}}w^{y'x}\sum_{y'\in\Z_N} w^{(y'-k)l}\ket{y'-k}=\frac{1}{\sqrt{N}}P_lT_{-k}\sum_{y'\in\Z_N} w^{xy}\ket{y'}\\ &=P_lT_{-k}FT_N\ket{x}. \end{align*} Corollary: $FT_N$ followed by Fourier sampling is equivalent to $T_l FT_N$ followed by Fourier sampling. \item Suppose $r\mid N$. Let $\ket{\phi}=\frac{1}{\sqrt{N/r}}\sum_{j=0}^{N/r-1} \ket{jr}$. Then $FT_N \ket{\phi}=\frac{1}{\sqrt{r}}\sum_{i=0}^{r-1} \ket{i\frac{N}{r}}$. Proof: the amplitude of $\ket{i\frac{N}{r}}$ is \begin{align*} \frac{1}{\sqrt{N}}\frac{1}{\sqrt{N/r}}\sum_{j=0}^{N/r-1} w^{(jr)(iN/r)}=\frac{\sqrt{r}}{N}\sum_{j=0}^{N/r-1}1=\frac{1}{\sqrt{r}} \end{align*} Since $FT_N$ is unitary, the norm of $FT_N\ket{\phi}$ has to be equal to the norm of $\ket{\phi}$ which is $1$. However the orthogonal projection of $FT_N\ket{\phi}$ on the space spanned by vectors of the form $\ket{i\frac{N}{r}}$ has norm $1$. Therefore $FT_N\ket{\phi}$ lies in that space. If we apply the corollary above to $\ket{\phi}$ we conclude that the result of Fourier sampling of $T_l\ket{\phi}=\frac{\sqrt{r}}{\sqrt{N}}\sum_{j=0}^{N/r-1} \ket{jr+l}$ is a random multiples of $N/r$. \end{itemize} \end{document} \end{document}