From 9bd7675dfe5f6684231d447fa1e7190b0bf2d302 Mon Sep 17 00:00:00 2001
From: "Collin J. Doering" <collin.doering@rekahsoft.ca>
Date: Tue, 16 Oct 2018 23:39:34 -0400
Subject: [PATCH] Initial commit

Signed-off-by: Collin J. Doering <collin.doering@rekahsoft.ca>
---
 .gitignore                     |   9 ++
 Makefile                       |  11 ++
 README.md                      |   5 +
 code/Mult.asm                  |  47 ++++++
 code/MultNaive.asm             |  25 +++
 mult-optimization-analysis.tex | 283 +++++++++++++++++++++++++++++++++
 6 files changed, 380 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100644 code/Mult.asm
 create mode 100644 code/MultNaive.asm
 create mode 100644 mult-optimization-analysis.tex

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2299d8c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+# Latex files
+*.aux
+*.log
+*.pdf
+*.toc
+
+# Editor specific files
+auto/
+*~
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f5c229f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,11 @@
+PDFLATEX=/usr/bin/pdflatex
+MASTER=mult-optimization-analysis.tex
+
+.PHONY: doc
+doc:
+	$(PDFLATEX) -file-line-error $(MASTER)
+
+.PHONY: clean
+clean:
+	@rm -rf auto
+	@find . -regex '.*\.\(aux\|log\|out\|pdf\|toc\)' -exec rm {} +;
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cc2610c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# Multiplication Algorithm Analysis on Hack
+
+Given an implementation of the Hack computer detailed in the nand-to-tetris course, provides an
+in depth analysis of a simple swap then multiple algorithm in comparison to the naive
+implementation using repeated addition.
diff --git a/code/Mult.asm b/code/Mult.asm
new file mode 100644
index 0000000..b75c703
--- /dev/null
+++ b/code/Mult.asm
@@ -0,0 +1,47 @@
+// Multiplies R0 and R1 and stores the result in R2.
+// (R0, R1, R2 refer to RAM[0], RAM[1], and RAM[2], respectively.)
+
+        @2       // Load RAM[2] address into A register
+        M=0      // Set RAM[2] to 0
+        @1       // Load RAM[1] into A register
+        D=M      // Set D register to RAM[1]
+        @0       // Load RAM[0] into A register
+        D=D-M    // Subtract RAM[0] from RAM[1] and store the result in D register
+        @SWAP
+        D;JLT    // Swap RAM[0] and RAM[1] to optimize number of jumps
+        @LOOPCOND
+        0;JMP    // Otherwise jump to loop condition
+
+(SWAP)
+        @0       // Load RAM[0] address into register A
+        D=M      // Set D register to RAM[0]
+        @3       // Load RAM[3] address into register A
+        M=D      // Set RAM[3] to RAM[0]
+        @1       // Load RAM[1] address into register A
+        D=M      // Set D register to RAM[1]
+        @0       // Load RAM[0] address into register A
+        M=D      // Set RAM[0] to register D (RAM[1])
+        @3       // Load RAM[3] address into register A
+        D=M      // Set D register to RAM[3]
+        @1       // Load RAM[1] address into register A
+        M=D      // Set RAM[1] to register D (RAM[3])
+
+(LOOPCOND)
+        @0       // Load RAM[0] address into A register
+        D=M      // Set D register to RAM[0]
+        @LOOP
+        D;JGT    // If D > 0 goto LOOP
+        @END
+        0;JMP    // When D > 0 goto end
+(LOOP)
+        @1       // Load RAM[1] address into A register
+        D=M      // Set D register to RAM[1]
+        @2       // Load RAM[2] address into A register
+        M=D+M    // Add content of D register to RAM[2] and store result in RAM[2]
+        @0       // Load RAM[0] address into A register
+        MD=M-1   // Subtract one from RAM[0] storing the results in RAM[0] and the D register
+        @LOOPCOND
+        D;JGT    // If D > 0 goto LOOP
+(END)
+        @END
+        0;JMP
diff --git a/code/MultNaive.asm b/code/MultNaive.asm
new file mode 100644
index 0000000..2a7c43b
--- /dev/null
+++ b/code/MultNaive.asm
@@ -0,0 +1,25 @@
+// Multiplies R0 and R1 and stores the result in R2.
+// (R0, R1, R2 refer to RAM[0], RAM[1], and RAM[2], respectively.)
+
+        @2       // Load RAM[2] address into A register
+        M=0      // Set RAM[2] to 0
+
+(LOOPCOND)
+        @0       // Load RAM[0] address into A register
+        D=M      // Set D register to RAM[0]
+        @LOOP
+        D;JGT    // If D > 0 goto LOOP
+        @END
+        0;JMP    // When D > 0 goto end
+(LOOP)
+        @1       // Load RAM[1] address into A register
+        D=M      // Set D register to RAM[1]
+        @2       // Load RAM[2] address into A register
+        M=D+M    // Add content of D register to RAM[2] and store result in RAM[2]
+        @0       // Load RAM[0] address into A register
+        MD=M-1   // Subtract one from RAM[0] storing the results in RAM[0] and the D register
+        @LOOPCOND
+        D;JGT    // If D > 0 goto LOOP
+(END)
+        @END
+        0;JMP
diff --git a/mult-optimization-analysis.tex b/mult-optimization-analysis.tex
new file mode 100644
index 0000000..bb7bcf5
--- /dev/null
+++ b/mult-optimization-analysis.tex
@@ -0,0 +1,283 @@
+\documentclass[a4paper,10pt,fleqn]{article}
+
+\title{Analysis and Comparison of two Multiplication Algorithms for the Hack Computer}
+\author{Collin J. Doering}
+
+\usepackage{amsmath,amssymb,fullpage,listings,xcolor,colortbl,tabu,pgfplots}
+
+% Adjust margin (right and left)
+%\addtolength{\textwidth}{2cm}
+%\addtolength{\hoffset}{-1cm}
+
+% Adjust margin (top and bottom)
+%\addtolength{\textheight}{2cm}
+%\addtolength{\voffset}{-1cm}
+
+\newtheorem{theorem}{Theorem}[section]
+\newtheorem{lemma}[theorem]{Lemma}
+\newtheorem{proposition}[theorem]{Proposition}
+\newtheorem{corollary}[theorem]{Corollary}
+\newtheorem{definition}{Definition}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+  This article introduces two simple multiplication algorithms, written for the \emph{Hack}
+  computer in \emph{Hack} assembly. There after, a thorough caparison of the number of
+  instructions required to compute each multiplication algorithm is given.
+\end{abstract}
+
+\tableofcontents
+\clearpage
+
+\section{Introduction}
+
+The \emph{Hack} computer, as specified by the book for \emph{Nand to
+  Tetris}\footnote{\label{nandtotetrisbook}The Elements of Computing Systems by Noam Nisan and
+  Shimon Schocken} has two registers. The \emph{A} register is used to store addresses and
+data, whereas the \emph{D} register is used to store solely data. The \emph{M} register (which
+isn't technically a register, but acts like one) is used to access/modify \emph{RAM[A]} where
+\emph{A} is the value currently contained within the \emph{A} register. The ALU (Arithmetic
+Logic Unit) within the CPU unfortunately does not come with a circuit for
+multiplication\footnote{Among other things modern computers are expected to have like floating
+  point registers/operations, etc \ldots}, so this needs to be implemented in software. Two
+such ways for doing so are given here, followed by analyses of both algorithms and finally a
+comparison of the number of \emph{Hack} machine instructions required by either algorithm.
+
+As most readers will already know, multiplication of natural numbers is simply repeated
+addition. This premise is used in both multiplication algorithms given in the upcoming section.
+Given formally:
+
+\begin{equation*}
+\forall (a,b) \in \mathbb{N} : a \cdot b = \sum_{i=1}^{b} a = \sum_{i=1}^{a} b
+\end{equation*}
+
+\subsection{Conventions}
+Throughout the following analyses $a$ and $b$ will refer to the decimal values of \emph{RAM[0]}
+and \emph{RAM[1]} respectively.
+
+\section{Naive Implementation}
+\label{naive_section}
+Using the idea defined in the introduction, that is, multiplication of natural numbers is
+repeated addition, we set out to implement a program in \emph{Hack} assembly that models this
+behavior. Immediately we have a choice of whether to do a additions of b or b additions of a.
+We setting on the former but the choice is arbitrary. To be clear we will increment
+\emph{RAM[1]}, \emph{RAM[0]} times storing the result in \emph{RAM[2]}. A possible
+implementation is as follows.
+
+\subsection{Hack Assembly}
+\lstinputlisting[numbers=left,frame=L,breaklines=true,xleftmargin=\parindent]{code/MultNaive.asm}
+
+\subsection{Analysis}
+\label{naive_analysis}
+Beginning the analysis of the program at the first non-comment line, it is clear two
+instructions are run to initialize \emph{RAM[2]} to zero. Then follows a loop condition and
+accompanying loop body. The loop condition is checked/executed $a + 1$ times, $a$ of which
+execute the loop body upon there completion.
+\newline
+
+\begin{gather*}
+  \text{Let } M_{naive} : \mathbb{N} \times \mathbb{N} \to \mathbb{N} \\
+  \begin{split}
+    M_{naive}(a,b) & = \underbrace{2}_{\text{initialize \emph{RAM[2]}}}  + \underbrace{a(\overbrace{4}^{condition} + \overbrace{8}^{body})}_{\text{loop condition and body run $a$ times}}+ \underbrace{6}_{\text{last run of the loop condition}} \\
+    & = 12a + 8
+  \end{split}
+\end{gather*}
+
+\subsubsection{Concluding Comments}
+After analysis of the naive implementation of multiplication in \emph{Hack} assembly, it is
+clear that as $a - b$ grows so does the number of instructions required to compute $a \cdot b$.
+This is problematic because as multiplication is a commutative operation, one would expect that regardless of the order of the inputs it performs a similar, if not identical number of instructions. That is, $M_{naive}(a,b) \approx M_{naive}(b,a)$. This however, is not the case. For example:
+\begin{equation*}
+\forall x \in \mathbb{N} : M_{naive}(x,0) > M_{naive}(0,x) \because 12x + 8 > 8
+\end{equation*}
+
+It becomes clear that in the case when $a > b$ our naive implementation will end up executing
+the loop body and condition instructions an additional $a - b$ times. Optimally we would like
+to check for this case and switch the values of a and b respectively. This corresponds to
+swapping the values contained within \emph{RAM[0]} and \emph{RAM[1]}, and is detailed in the following section.
+
+\section{Swapping Implementation}
+\label{swap_section}
+As mentioned in the end of last section, in the case of $a > b$, the naive implementation will
+perform many unnecessary instructions. To avoid this, we instead check to see if $a > b$ and if
+so, swap their values and compute $a \cdot b$ as we did before using repeated addition. A possible implementation is as follows.
+
+\subsection{Hack Assembly}
+\lstinputlisting[numbers=left,frame=L,breaklines=true,xleftmargin=\parindent]{code/Mult.asm}
+
+\subsection{Analysis}
+\label{swap_analysis}
+Similarly to the naive algorithm, outlined previously, the swapping implementation takes the
+same two instructions to initialize \emph{RAM[2]} to zero. Thereafter it takes another eight and
+ten instructions for the $a > b$ and $a \leq b$ cases respectively. These instructions
+initialize \emph{RAM[3]} to zero and check whether $a > b$, finally making the appropriate
+jump. In the case $a > b$, that is the swap case, an additional 12 instructions are executed to
+perform the swap of \emph{RAM[0]} and \emph{RAM[1]}, using \emph{RAM[3]} as temporary storage.
+Then the repeated addition of $b$, $a$ times occurs just like in the naive implementation,
+which takes the same number of instructions. That is, the loop condition $a + 1$ times, $a$ of
+which execute the loop body.
+\begin{align*}
+    \text{Let } & M_{\leq} : \mathbb{N} \times \mathbb{N} \to \mathbb{N} \\
+    & M_{>} : \mathbb{N} \times \mathbb{N} \to \mathbb{N} \\
+    & M_{swap} : \mathbb{N} \times \mathbb{N} \to \mathbb{N}
+\end{align*}
+Where $M_{\leq}(a,b)$ and $M_{>}(a,b)$ compute the number of instructions executed for the
+$a \leq b$ and $a > b$ cases respectively. $M_{swap}(a,b)$ computes the number of instructions
+executed in either case.
+
+\begin{align*}
+  \begin{split}
+    M_{\leq}(a,b) & = \underbrace{10}_{\text{initialize program}} + \underbrace{a(4 + 8) + 6}_{\text{loop (same \# of instructions as naive algorithm)}} \\
+    & = 12a + 16 \\
+    M_{>}(a,b) & = \underbrace{8}_{\text{initialize program}} + \underbrace{12}_{\text{swap \emph{REG[0]} and \emph{REG[1]}}} + \underbrace{b(4 + 8) + 6}_{\text{loop (same \# of instructions as naive algorithm)}} \\
+    & = 12b + 26
+  \end{split} \\
+  M_{swap}(a,b) & =
+  \begin{cases}
+    M_{>}(a,b) & \text{if } a > b \quad \text{(swap case)} \\
+    M_{\leq}(a,b) & \text{if } a \leq b \quad \text{(otherwise)}
+  \end{cases}
+\end{align*}
+
+\section{Comparison of Algorithms}
+Following the analyses given in sections \ref{naive_analysis} and \ref{swap_analysis}, we need
+to find the difference in the number of instructions executed by each algorithm. Here we can
+choose to define the difference by either the number of instructions executed by the swap
+implementation minus the number executed by the naive implementation, or vice versa. That is,
+$D(a,b) = M_{swap}(a,b) - M_{naive}(a,b)$ or $D(a,b) = M_{naive}(a,b) - M_{swap}(a,b)$ respectively. The
+choice is arbitrary and simply changes the meaning of the functions output; specifically it
+changes whether it's positive or negative. Below we have chosen the prior, so a negative output
+means the swap implementation took fewer steps and a positive output implies it took greater.
+
+\begin{align*}
+  \text{Let } D & : \mathbb{N} \times \mathbb{N} \to \mathbb{N} \\
+  D(a,b) & =
+  \begin{cases}
+    M_{>}(a,b) - M_{naive}(a,b) & \text{if } a > b \\
+    M_{\leq}(a,b) - M_{naive}(a,b) & \text{if } a \leq b
+  \end{cases} \\
+  & =
+  \begin{cases}
+    (12b + 26) - (12a + 8) \\
+    (12a + 16) - (12a + 8)
+  \end{cases} \\
+  & =
+  \begin{cases}
+    12(b - a) + 18 \\
+    8
+  \end{cases}
+\end{align*}
+
+Notice that in the case $a \leq b$ the swap implementation actually takes 8 more instructions.
+However, in the case $a > b$, $b - a$ will always be negative, and when $b - a < -1$ then
+$D(a,b) < 0$ which indicates that the swap implementation will take fewer instructions for a
+majority of the $a > b$ case. The only instance where this is not the case is when
+$b - a = -1$, where the swap implementation will instead cost an extra 6 instructions.
+
+\subsection{Average Difference}
+
+Now that we have a function $D(a,b)$ that determines the difference of the number of
+instructions required to compute $a \cdot b$ we can proceed to determining the average
+difference. That is, how many instructions, on average are saved or gained by using the swap
+implementation versus the naive implementation. This average will be dependent on the largest
+natural number the algorithms will be used for. On the \emph{Hack} computer, this is
+$2^{16} - 1 = 65535$, though we will calculate for the general case where $n \in \mathbb{N}$.
+\begin{figure}[h!]
+  \label{fig:tables}
+  \centering
+  \begin{tabu}{ c | [2pt]c | c | c | c | c}
+    $(a,b)$  & $0$                              & $1$                               & $2$                               & $3$                              & \ldots \\ \tabucline[2pt]{-}
+    $0$      & \cellcolor{green!25}$(0,0)_{\leq}$ & \cellcolor{green!25}$(0,1)_{\leq}$ & \cellcolor{green!25}$(0,2)_{\leq}$ & \cellcolor{green!25}$(0,3)_{\leq}$ & \cellcolor{green!25}\ldots \\ \hline
+    $1$      & \cellcolor{blue!25}$(1,0)_{>}$  & \cellcolor{green!25}$(1,1)_{\leq}$ & \cellcolor{green!25}$(1,2)_{\leq}$ & \cellcolor{green!25}$(1,3)_{\leq}$ & \cellcolor{green!25}\ldots \\ \hline
+    $2$      & \cellcolor{blue!25}$(2,0)_{>}$  & \cellcolor{blue!25}$(2,1)_{>}$ & \cellcolor{green!25}$(2,2)_{\leq}$ & \cellcolor{green!25}$(2,3)_{\leq}$ & \cellcolor{green!25}\ldots \\ \hline
+    $3$      & \cellcolor{blue!25}$(3,0)_{>}$  & \cellcolor{blue!25}$(3,1)_{>}$ & \cellcolor{blue!25}$(3,2)_{>}$  & \cellcolor{green!25}$(3,3)_{\leq}$ & \cellcolor{green!25}\ldots \\ \hline
+    $\vdots$ & \cellcolor{blue!25}$\vdots$    & \cellcolor{blue!25}$\vdots$ & \cellcolor{blue!25}$\vdots$       & \cellcolor{blue!25}$\ddots$      & \cellcolor{green!25}$\ddots$ \\
+  \end{tabu}
+  \,
+  \begin{tabu}{ c | [2pt]c | c | c | c | c}
+    $b - a$ & $0$                       & $1$                        & $2$                      & $3$                      & \ldots                     \\ \tabucline[2pt]{-}
+    $0$     & \cellcolor{green!25}$0$   & \cellcolor{green!25}$1$   & \cellcolor{green!25}$2$ & \cellcolor{green!25}$3$ & \cellcolor{green!25}\ldots \\ \hline
+    $1$     & \cellcolor{blue!25}$-1$    & \cellcolor{green!25}$0$    & \cellcolor{green!25}$1$ & \cellcolor{green!25}$2$ & \cellcolor{green!25}\ldots \\ \hline
+    $2$     & \cellcolor{blue!25}$-2$    & \cellcolor{blue!25}$-1$     & \cellcolor{green!25}$0$  & \cellcolor{green!25}$1$ & \cellcolor{green!25}\ldots \\ \hline
+    $3$     & \cellcolor{blue!25}$-3$    & \cellcolor{blue!25}$-2$     & \cellcolor{blue!25}$-1$   & \cellcolor{green!25}$0$  & \cellcolor{green!25}\ldots \\ \hline
+    $\vdots$  & \cellcolor{blue!25}$\vdots$ & \cellcolor{blue!25}$\vdots$ & \cellcolor{blue!25}$\vdots$ & \cellcolor{blue!25}$\ddots$ & \cellcolor{green!25}$\ddots$ \\
+  \end{tabu}
+  \caption{Tables showing combinations of $(a,b)$, and $b - a$ along with color coding where blue indicates $a > b$ and green indicates $a \leq b$}
+\end{figure}
+
+Let $S = \{(x,y) : x,y \in \mathbb{N}[0,n]\}$ where $S$ represents all possible inputs to
+either multiplication algorithm; that is, all possible pairs of natural numbers smaller than or
+equal to $n$.
+\begin{equation}
+  \label{eq:avg}
+  Avg(n) = \frac{\sum_{(a,b) \in S} D(a,b)}{(n + 1)^2}
+\end{equation}
+To find the average difference we need to compute Equation \ref{eq:avg}. From the tables shown
+in Figure \ref{fig:tables}, we can see the numerator of Equation \ref{eq:avg} can be broken into two different sums,
+one for each case $a \leq b$ and $a > b$ as follows.
+\begin{align*}
+  a \leq b & \implies D(a,b) = 8 \\
+  & \implies \sum_{(a,b) \in S : a \leq b} D(a,b) = 8 \cdot \sum_{i=1}^{n+1} i && \because \quad \left\vert{\{(a,b) \in S : a \leq b\}}\right\vert = \sum_{i=1}^{n+1} i \\
+  a > b & \implies D(a,b) = 12(b - a) + 18 \\
+  & \implies \sum_{(a,b) \in S : a > b} D(a,b) = \sum_{i=1}^{n} i(12(i - n - 1) + 18) && \because \sum_{(a,b) \in S : a > b} b - a = \sum_{i=1}^{n} i(i - n - 1)
+\end{align*}
+
+Finally, this leads to the following definition of the average difference function:
+\begin{align*}
+\text{Let } Avg & : \mathbb{N} \to \mathbb{Q} \\
+Avg(n) & = \frac{\sum_{(a,b) \in S} D(a,b)}{(n + 1)^2} \\
+& = \frac{8 \cdot \sum_{i=1}^{n+1} i + \sum_{i=1}^{n} i(12(i-n-1) + 18)}{(n + 1)^{2}} \\
+& = \frac{8 \cdot \frac{1}{2}(n + 1)(n + 2) + \sum_{i=1}^{n} i(12i - 12n + 6)}{(n + 1)^{2}} \\
+& = \frac{4(n + 1)(n + 2) + \sum_{i=1}^{n} (12i^{2} - 12ni + 6i)}{(n + 1)^{2}} \\
+& = \frac{4(n + 1)(n + 2) + 12 (\sum_{i=1}^{n} i^{2}) - 12(\sum_{i=1}^{n} i) + 6(\sum_{i=1}^{n} i)}{(n + 1)^{2}} \\
+& = \frac{4(n + 1)(n + 2) + \frac{1}{6} \cdot 12n(n + 1)(2n + 1) - \frac{1}{2} \cdot 12n^{2}(n + 1) + \frac{1}{2} \cdot 6n(n + 1)}{(n + 1)^{2}} \\
+& = \frac{(n + 1)(4(n + 2) + 2n(2n + 1) - 6n^{2} + 3n)}{(n + 1)^{2}} \\
+& = \frac{4n + 8 + 4n^{2} + 2n - 6n^{2} + 3n}{n+1} \\
+& = \frac{8 + 9n - 2n^{2}}{n+1}
+\end{align*}
+
+\section{Conclusion}
+Now that we have defined a average difference function, we can finally make a determination of
+the difference in the number of instructions executed by either algorithm. In Figure
+\ref{fig:avg_graph}, a graph of the $Avg(n)$ function is given, where its visually clear that
+$Avg(n)$ is decreasing. Formally,
+\begin{equation*}
+\lim_{x \to \infty} Avg(n) = -\infty \implies Avg(n) \text{ is monotonically decreasing on interval } [0,\infty]
+\end{equation*}
+This means that as $n$ becomes larger, the more instructions the swap implementation will save.
+In the instance of our \emph{Hack} computer, where the largest unsigned number that can be
+represented is $2^{16}-1 = 65535$, this means on average, the swap implementation will save
+$131059$ instructions because $Avg(65535) = -131059$.
+
+\begin{figure}
+  \label{fig:avg_graph}
+  \centering
+  \begin{tikzpicture}
+    \begin{axis}[
+      axis x line=center,
+      axis y line=left,
+      xmin=0,xmax=32,
+      ymin=-50,ymax=12,
+      xlabel=$n$,
+      ylabel={$Avg(n)$}]
+      \addplot [domain=0:32, samples=64, color=blue] {(8 + 9*x - 2*x^2)/(x+1)};
+    \end{axis}
+  \end{tikzpicture}
+  \caption{Graph of the average difference function}
+\end{figure}
+
+In closing, utilizing the swap implementation of the multiplication algorithm given in Section
+\ref{swap_section} is significantly better that the naive implementation given in Section
+\ref{naive_section}. The idea of choosing the smallest of $a$ and $b$ when multiplying numbers
+by repeated addition applies generally to any program though this is not rigorously proven in
+this article.
+
+\end{document}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End: