Commit 8c6adbca authored by Ben Glocker's avatar Ben Glocker
Browse files

tutorials 5-7

parent ecec7a38
......@@ -615,7 +615,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {
"collapsed": true
},
......@@ -651,7 +651,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"collapsed": true
},
......@@ -685,7 +685,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
......@@ -707,14 +707,14 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[973886, 409460, 337342, 398719, 273231, 700388, 243130, 347759, 604817, 319332, 721668, 8793, 485780, 541846, 42575, 77128, 47963, 528511, 342950, 552766]\n"
"[658042, 242361, 863569, 787057, 225873, 237945, 236740, 938816, 355375, 141558, 892528, 211345, 581149, 33164, 158996, 838642, 498178, 151876, 1675, 23323]\n"
]
}
],
......
%% Cell type:markdown id: tags:
# CO202 - Software Engineering - Algorithms
# CO202 - Algorithms 2
%% Cell type:markdown id: tags:
## Tutorial on Dynamic Programming: Fibonacci
......@@ -71,12 +71,12 @@
#### Test data
%% Cell type:code id: tags:
``` python
data = range(1,20)
#data = range(1,30)
#data = range(1,20)
data = range(1,30)
#data = range(1,1000,10)
#data = range(1,10000,100)
#data = range(1,100000,1000)
```
......@@ -122,10 +122,18 @@
plt.ylabel('time (/s)')
plt.xlim(0)
plt.ylim(0)
```
%%%% Output: execute_result
(0, 0.6281491025812429)
%%%% Output: display_data
![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAFQNJREFUeJzt3X2QXXd93/H3R7Jdsi4txFYIta1du1GGcaih8dakjUvpNG4EZUal8RDJGx466WztYELadIqpMpQ24zZNUibTicFZiCcw2tTjKU8qYyICQ4vTQNDK49jYrkHYehzABgJYEcUIffvHvTq+Wla7d1d79u69+37N7Nx7fue3536PjnQ/Oud3HlJVSJIEsGnQBUiS1g9DQZLUMBQkSQ1DQZLUMBQkSQ1DQZLUMBQkSQ1DQZLUMBQkSY0LBl3Acl166aU1MTEx6DIkaagcOHDga1W1Zal+QxcKExMTzM3NDboMSRoqSQ7308/DR5KkhqEgSWoYCpKkhqEgSWq0GgpJtid5LMnBJLedo88rkjyQ5OEk/7vNeiRJi2vt7KMkm4E7gBuAY8D+JHur6pGePs8D3gVsr6ojSX6krXokSUtrc0/hOuBgVT1eVc8AdwM75vW5CfhgVR0BqKonW6xHkrSENkPhMuBoz/SxbluvHween+R/JTmQ5PUt1iNJWsKgL167ALgW+EfADwGfSfLZqvpCb6ck08A0wNatW9e8SEnaKNrcUzgOXNEzfXm3rdcxYF9V/WVVfQ34NPCS+QuqqpmqmqyqyS1blrxKW5K0Qm2Gwn5gW5Irk1wE7AT2zuvzEeD6JBckGQNeBjzaYk2SpEW0FgpVdQq4FdhH54v+nqp6OMnNSW7u9nkU+CPgQeBzwHur6vNt1SRJQ2F2FiYmYNOmzuvs7Jp9dKpqzT5sNUxOTpY3xJM0smZnYXoaTp58tm1sDGZmYGpqxYtNcqCqJpfq5xXNkrSe7N59diBAZ3r37jX5eENBktaTI0eW177KDAVJWk/Oddr9Gp2ObyhI0npy++2dMYReY2Od9jVgKEjSejI11RlUHh+HpPN6noPMyzHoK5olSfNNTa1ZCMznnoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIahoIkqWEoSJIarYZCku1JHktyMMltC8x/RZJvJXmg+/P2NuuRJC3ugrYWnGQzcAdwA3AM2J9kb1U9Mq/rfVX16rbqkCT1r809heuAg1X1eFU9A9wN7Gjx8yRJ56nNULgMONozfazbNt/fS/Jgko8l+YkW65EkLaG1w0d9uh/YWlUnkrwK+DCwbX6nJNPANMDWrVvXtkJJ2kDa3FM4DlzRM315t61RVd+uqhPd9/cCFya5dP6CqmqmqiaranLLli0tlixJG1ubobAf2JbkyiQXATuBvb0dkvxoknTfX9et5+st1iRJWkRrh4+q6lSSW4F9wGbgrqp6OMnN3fl3AjcCtyQ5BXwH2FlV1VZNkqTFZdi+gycnJ2tubm7QZUjSUElyoKoml+rnFc2SpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqGAqSpIahIElqtBoKSbYneSzJwSS3LdLv7yQ5leTGNuuRJC2utVBIshm4A3glcDWwK8nV5+j3X4CPt1WLJKk/be4pXAccrKrHq+oZ4G5gxwL93gx8AHiyxVokSX1oMxQuA472TB/rtjWSXAa8Bnj3YgtKMp1kLsncU089teqFSpI6Bj3Q/DvAW6vq9GKdqmqmqiaranLLli1rVJokbTwXtLjs48AVPdOXd9t6TQJ3JwG4FHhVklNV9eEW65IknUObobAf2JbkSjphsBO4qbdDVV155n2SPwA+aiBI0uC0FgpVdSrJrcA+YDNwV1U9nOTm7vw72/psSdLKtLmnQFXdC9w7r23BMKiqN7ZZiyRpaYMeaJYkrSOGgiSpYShIkhqGgiSp0fdAc5LnA38D+A5waKkLziRJw2fRUEjy14E3AbuAi4CngOcAL0jyWeBdVfWp1quUJK2JpfYU/gfwfuDvV9U3e2ckuRZ4XZKrqur32ypQkrR2Fg2FqrphkXkHgAOrXpEkaWD6GmhO8tNJLu6+/4Uk70wy3m5pkqS11u/ZR+8GTiZ5CfCrwJfoHFaSJI2QfkPhVFUVnYfk/G5V3QE8t72yJEmD0O8pqU8neRvwC8DLk2wCLmyvLEnSIPS7p/DzwHeBX6yqr9B5NsJvtVaVJGkglrpOYR/wR8DHquqdZ9qr6giOKUjSyFlqT+ENwF8A70hyf5J3J9lx5kwkSdJoWTQUquorVfUHVbWTzqMz3w9cC3w8ySeS/Nu1KFKSRsLsLExMwKZNndfZ2UFX9AP6vvdR915Hn+n+vD3JpcDPtlWYJI2U2VmYnoaTJzvThw93pgGmpgZX1zyL7ikk+bUkP7zQvKr6GvCVJK9upTJJGiW7dz8bCGecPNlpX0eW2lN4CPifSf4fcD/P3hBvG/BS4BPAf2q1QkkaBUeOLK99QJa699FHgI8k2Qb8NPBC4NvAHmC6qr7TfomSNAK2bu0cMlqofR3pa0yhqr4IfLHlWiRpdN1++9ljCgBjY532dcQnr0nSWpiagpkZGB+HpPM6M7OuBplhGWcfSZLO09TUuguB+dxTkCQ1+n2ewo8n+WSSz3enr0nya+2WJklaa/3uKbwHeBvwPYCqehDYudQvJdme5LEkB5PctsD8HUkeTPJAkrkk1y+neEnS6up3TGGsqj6XpLft1GK/kGQzcAdwA3AM2J9kb1U90tPtk8Deqqok1wD3AC/qu3pJ0qrqd0/ha0n+JlAASW4EvrzE71wHHKyqx6vqGeBuOg/paVTVie7DewAuPrN8SdJg9Lun8CZgBnhRkuPAE3QeuLOYy4CjPdPHgJfN75TkNcB/Bn4E+Cd91iNJakG/F689DvxM95bZm6rq6dUqoKo+BHwoycuBXwd+Zn6fJNPANMDWdXb1nySNkr5CIcnzgNcDE8AFZ8YWquqXF/m148AVPdOXd9sWVFWfTnJVkku7N9vrnTdDZ0+FyclJDzFJUkv6PXx0L/BZOjfIO93n7+wHtiW5kk4Y7ARu6u2Q5MeAL3UHmn8S+CvA1/tcviRplfUbCs+pqn+9nAVX1akktwL7gM3AXVX1cJKbu/PvBH4OeH2S7wHfAX6+Z+BZkrTG0s93cJJ/BZwAPgp890x7VX2jvdIWNjk5WXNzc2v9sZI01JIcqKrJpfr1u6fwDPBbwG6ePW20gKtWVp4kaT3qNxR+Ffix+QPAkqTR0u/FaweBk0v2kiQNtX73FP4SeCDJpzh7TGGxU1IlSUOm31D4cPdHkjTC+r2i+X1tFyJJGrxFQyHJPVX12iQPscDN6qrqmtYqkyStuaX2FN7SfX1124VIkgZv0bOPqurM7bF/qaoO9/4Av9R+eZKktdTvKak3LND2ytUsRJI0eEuNKdxCZ4/gqiQP9sx6LvB/2ixMkrT2lhpT+EPgY3QegtP7jOWnB3HfI0lSuxYNhar6FvAtYNfalCNJGqR+xxQkSRuAoSBJahgKkqSGoSBJahgKkqSGoSBJahgKknQ+ZmdhYgI2beq8zs4OuqLz0u/zFCRJ883OwvQ0nOw+mPLw4c40wNTU4Oo6D+4pSNJK7d79bCCccfJkp31IGQqStFJHjiyvfQgYCpK0Ulu3Lq99CBgKkrRSt98OY2Nnt42NddqHlKEgSSs1NQUzMzA+DknndWZmaAeZoeVQSLI9yWNJDia5bYH5U0keTPJQkj9N8pI265GkVTc1BYcOwenTndchDgRoMRSSbAbuoPOEtquBXUmuntftCeAfVNXfAn4dmGmrHknS0trcU7gOOFhVj1fVM8DdwI7eDlX1p1X1F93JzwKXt1iPJGkJbYbCZcDRnulj3bZz+UU6T3mTJA3IuriiOck/pBMK159j/jQwDbB1iE/1kqT1rs09hePAFT3Tl3fbzpLkGuC9wI6q+vpCC6qqmaqarKrJLVu2tFKsJKndUNgPbEtyZZKLgJ3A3t4OSbYCHwReV1VfaLEWSVIfWjt8VFWnktwK7AM2A3dV1cNJbu7OvxN4O3AJ8K4kAKeqarKtmiRJi0tVDbqGZZmcnKy5ublBlyFJQyXJgX7+0+0VzZKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSWoYCpKkhqEgSfPNzsLEBGza1HmdnR10RWum1VBIsj3JY0kOJrltgfkvSvKZJN9N8m/arEWS+jI7C9PTcPgwVHVep6c3TDC0FgpJNgN3AK8ErgZ2Jbl6XrdvAL8M/HZbdUjSsuzeDSdPnt128mSnfQNoc0/hOuBgVT1eVc8AdwM7ejtU1ZNVtR/4Xot1SFL/jhxZXvuIaTMULgOO9kwf67YtW5LpJHNJ5p566qlVKU6SFrR16/LaR8xQDDRX1UxVTVbV5JYtWwZdjqRRdvvtMDZ2dtvYWKd9A2gzFI4DV/RMX95tk6T1a2oKZmZgfBySzuvMTKd9A2gzFPYD25JcmeQiYCewt8XPk6RzW85pplNTcOgQnD7ded0ggQBwQVsLrqpTSW4F9gGbgbuq6uEkN3fn35nkR4E54K8Bp5P8CnB1VX27rbokbUBnTjM9c1bRmdNMYUN94fcjVTXoGpZlcnKy5ubmBl2GpGEyMdEJgvnGxzt7AhtAkgNVNblUv6EYaJak87LBTzNdDkNB0ujb4KeZLoehIGn0bfDTTJfDUJA0+jb4aabL0drZR5K0rkxNGQJ9cE9BktQwFCRJDUNBktQwFCQNrw38hLS2ONAsaTh564pWuKcgaTht8CektcVQkDScvHVFKwwFScPJW1e0wlCQNJy8dUUrDAVJw8lbV7TCs48kDS9vXbHq3FOQtL547cFAuacgaf3w2oOBc09B0vrhtQcDZyhIWj+89mDgDAVJ7et3nMBrDwbOUJDUrjPjBIcPQ9Wz4wQLBYPXHgycoSCpXcsZJ/Dag4EzFCStTL+HhJY7TjA1BYcOwenTnVcDYU0ZCpKe1e8X/XIOCTlOMFRaDYUk25M8luRgktsWmJ8k/607/8EkP9lmPdK6t5wv5X4v8Grji345h4QcJxguVdXKD7AZ+BJwFXAR8OfA1fP6vAr4GBDgp4A/W2q51157bS1lzy331fjmoxW+X+Obj9aeW+6z7yJ9B/359j3TcU/tufCNNc4Tnb48UXsufGPVnj0r67fcvuPjtYddZ/dlV9X4+A/2TRbum5z/n9eezkcmndeFSh31vstZZr+Auernu7ufTiv5Af4usK9n+m3A2+b1+T1gV8/0Y8ALF1vuUqGw55b7aowT1fmvTudnjBML/iW07+A/3749fS9588J9L3nzivotuy83LdyXm85vuXuqxsbq7L5j58ywDd93OctcjvUQCjcC7+2Zfh3wu/P6fBS4vmf6k8DkYstdKhTGNx896w/zzM/45qP2XaDvoD/fvj19eWLhvjyxon7L7rucWi95euG+lzz9g33Hf7AfLLwDYt/lLXM5+g2FdPquviQ3Atur6l90p18HvKyqbu3p81HgN6rqT7rTnwTeWlVz85Y1DXRvgMKLgc+f+5Ovvfbc8w4cWN99nwK2rHENo7pea9XXbdZeDW31Xe/rtpxlnuVS4GuLzB+vqi2LzAfavSHeceCKnunLu23L7UNVzQAzAEnmqmpydUtdHzrrdnjk1m1U1wtGd91Gdb1gdNdttb4b2zz7aD+wLcmVSS4CdgJ75/XZC7y+exbSTwHfqqovt1iTJGkRre0pVNWpJLcC++iciXRXVT2c5Obu/DuBe+mcgXQQOAn887bqkSQtrdXnKVTVvXS++Hvb7ux5X8CblrnYmVUobb0a1XUb1fWC0V23UV0vGN11W5X1am2gWZI0fLzNhSSpMVShsNRtM4ZVkkNJHkryQJK5pX9j/UpyV5Ink3y+p+2Hk/xxki92X58/yBpX6hzr9o4kx7vb7oEkrxpkjSuR5Iokn0rySJKHk7yl2z7U222R9RqFbfacJJ9L8ufddfsP3fbz3mZDc/goyWbgC8ANwDE6ZzftqqpHBlrYKkhyiM5Fe4udYzwUkrwcOAG8v6pe3G37TeAbVfUb3TB/flW9dZB1rsQ51u0dwImq+u1B1nY+kryQzp0E7k/yXOAA8E+BNzLE222R9Xotw7/NAlxcVSeSXAj8CfAW4J9xnttsmPYUrgMOVtXjVfUMcDewY8A1aZ6q+jTwjXnNO4D3dd+/j84/zKFzjnUbelX15aq6v/v+aeBR4DKGfLstsl5Dr3uR8onu5IXdn2IVttkwhcJlwNGe6WOMyAamszE/keRA9+rtUfOCnutPvgK8YJDFtODN3bv83jVsh1jmSzIB/G3gzxih7TZvvWAEtlmSzUkeAJ4E/riqVmWbDVMojLLrq+qlwCuBN3UPU4yk7mnIw3HMsj/vpnMn4JcCXwb+62DLWbkkfxX4APArVfXt3nnDvN0WWK+R2GZV9f3u98blwHVJXjxv/oq22TCFQl+3xBhGVXW8+/ok8CE6h8pGyVe7x3fPHOd9csD1rJqq+mr3H+dp4D0M6bbrHpf+ADBbVR/sNg/9dltovUZlm51RVd8EPgVsZxW22TCFQj+3zRg6SS7uDoKR5GLgH7PoDf+G0l7gDd33bwA+MsBaVtWZf4Bdr2EIt1130PL3gUer6p09s4Z6u51rvUZkm21J8rzu+x+icwLO/2UVttnQnH0E0D117Hd49rYZQ//opiRX0dk7gM4V5n84zOuV5L8Dr6Bzx8avAv8e+DBwD7AVOAy8tqqGbsD2HOv2CjqHIQo4BPzLYbt/V5LrgfuAh4DT3eZ/R+f4+9But0XWaxfDv82uoTOQvJnOf+7vqar/mOQSznObDVUoSJLaNUyHjyRJLTMUJEkNQ0GS1DAUJEkNQ0GS1DAUJEkNQ0GS1DAUpPOUZCLJo0ne0723/ce7V5lKQ8dQkFbHNuCOqvoJ4JvAzw24HmlFDAVpdTxRVQ903x8AJgZYi7RihoK0Or7b8/77dO5jJQ0dQ0GS1DAUJEkN75IqSWq4pyBJahgKkqSGoSBJahgKkqSGoSBJahgKkqSGoSBJahgKkqTG/wfcadOMYLc2RwAAAABJRU5ErkJggg==)
%% Cell type:code id: tags:
``` python
```
......
This diff is collapsed.
%% Cell type:markdown id: tags:
# CO202 - Algorithms 2
%% Cell type:markdown id: tags:
## Tutorial on Randomised Algorithms: Approximating Pi
%% Cell type:code id: tags:
``` python
import numpy as np
def approximate_pi(samples):
n = len(samples)
m = np.sum(np.sqrt(np.sum(np.multiply(samples,samples),axis=1)) <= 1)
return 4*m/n
```
%% Cell type:code id: tags:
``` python
def monte_carlo_pi(num_samples, num_runs):
a_pi = 0
for _ in range(num_runs):
samples = np.random.rand(num_samples, 2) * 2 - 1
a_pi = a_pi + approximate_pi(samples)
return a_pi / num_runs
print(monte_carlo_pi(100,1000))
```
%% Cell type:markdown id: tags:
### Visualisation
%% Cell type:code id: tags:
``` python
%matplotlib inline
from matplotlib import pyplot as plt
samples_100 = np.random.rand(100, 2) * 2 - 1
samples_1000 = np.random.rand(1000, 2) * 2 - 1
samples_10000 = np.random.rand(10000, 2) * 2 - 1
flag_100 = np.sqrt(np.sum(np.multiply(samples_100,samples_100),axis=1)) <= 1;
samples_in_circle_100 = samples_100[flag_100, :]
flag_1000 = np.sqrt(np.sum(np.multiply(samples_1000,samples_1000),axis=1)) <= 1;
samples_in_circle_1000 = samples_1000[flag_1000, :]
flag_10000 = np.sqrt(np.sum(np.multiply(samples_10000,samples_10000),axis=1)) <= 1;
samples_in_circle_10000 = samples_10000[flag_10000, :]
pi_100 = approximate_pi(samples_100)
pi_1000 = approximate_pi(samples_1000)
pi_10000 = approximate_pi(samples_10000)
fig, axs = plt.subplots(1, 3, figsize=(18,5))
axs[0].scatter(samples_100[:,0], samples_100[:,1], marker='o')
axs[0].scatter(samples_in_circle_100[:,0], samples_in_circle_100[:,1], color='red')
axs[0].set_title('100 samples. ~pi: {}'.format(pi_100))
axs[1].scatter(samples_1000[:,0], samples_1000[:,1], marker='o')
axs[1].scatter(samples_in_circle_1000[:,0], samples_in_circle_1000[:,1], color='red')
axs[1].set_title('1000 samples. ~pi: {}'.format(pi_1000))
axs[2].scatter(samples_10000[:,0], samples_10000[:,1], marker='o')
axs[2].scatter(samples_in_circle_10000[:,0], samples_in_circle_10000[:,1], color='red')
axs[2].set_title('10000 samples. ~pi: {}'.format(pi_10000))
plt.show()
```
%% Cell type:markdown id: tags:
### Comparison
%% Cell type:code id: tags:
``` python
%matplotlib inline
from matplotlib import pyplot as plt
from time import clock
# a timer - runs the provided function and reports the
# run time in ms
def time_f(f):
before = clock()
f()
after = clock()
return after - before
t1, t2, t3, t4, t5 = [], [], [], [], []
n = 20
for _ in range(n):
t1.append(time_f(lambda: monte_carlo_pi(100,10000)))
t2.append(time_f(lambda: monte_carlo_pi(1000,1000)))
t3.append(time_f(lambda: monte_carlo_pi(10000,100)))
t4.append(time_f(lambda: monte_carlo_pi(100000,10)))
t5.append(time_f(lambda: monte_carlo_pi(1000000,1)))
# Plot both merge and insertion against each other
plt.scatter(range(n), t1, marker='x', c='r', label='100/10000')
plt.scatter(range(n), t2, marker='x', c='b', label='1000/1000')
plt.scatter(range(n), t3, marker='x', c='y', label='10000/100')
plt.scatter(range(n), t4, marker='x', c='g', label='100000/10')
plt.scatter(range(n), t5, marker='x', c='k', label='1000000/1')
plt.legend(loc='upper left')
plt.xlim((0, n))
plt.ylim((0, max(max(t1), max(t2), max(t3), max(t4), max(t5))))
plt.xlabel('n')
plt.ylabel('time /s')
```
%% Cell type:code id: tags:
``` python
%matplotlib inline
from matplotlib import pyplot as plt
e1, e2, e3, e4, e5 = [], [], [], [], []
n = 20
for _ in range(n):
e1.append(np.abs(np.pi - monte_carlo_pi(100,10000)))
e2.append(np.abs(np.pi - monte_carlo_pi(1000,1000)))
e3.append(np.abs(np.pi - monte_carlo_pi(10000,100)))
e4.append(np.abs(np.pi - monte_carlo_pi(100000,10)))
e5.append(np.abs(np.pi - monte_carlo_pi(1000000,1)))
# Plot both merge and insertion against each other
plt.scatter(range(n), e1, marker='x', c='r', label='100/10000')
plt.scatter(range(n), e2, marker='x', c='b', label='1000/1000')
plt.scatter(range(n), e3, marker='x', c='y', label='10000/100')
plt.scatter(range(n), e4, marker='x', c='g', label='100000/10')
plt.scatter(range(n), e5, marker='x', c='k', label='1000000/1')
plt.legend(loc='upper left')
plt.xlim((0, n))
plt.ylim((0, max(max(e1), max(e2), max(e3), max(e4), max(e5))))
plt.xlabel('n')
plt.ylabel('error')
print(np.mean(e1))
print(np.mean(e2))
print(np.mean(e3))
print(np.mean(e4))
print(np.mean(e5))
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# CO202 - Algorithms 2
%% Cell type:markdown id: tags:
## Tutorial on String Matching
%% Cell type:markdown id: tags:
### Naive String Matching
%% Cell type:code id: tags:
``` python
def naive_matcher(T,P):
shifts = []
n = len(T)
m = len(P)
for s in range(0,n-m+1):
match = True
for j in range(0,m):
if P[j] != T[s+j]:
match = False
break
if match == True:
shifts.append(s)
return shifts
```
%% Cell type:markdown id: tags:
### Knuth-Morris-Pratt
%% Cell type:code id: tags:
``` python
def prefix_function(P):
m = len(P)
pi = [0]*m
pi[0] = 0
k = 0
for q in range(1,m):
while k > 0 and P[k] != P[q]:
k = pi[k-1]
if P[k] == P[q]:
k += 1
pi[q] = k
return pi
def kmp_matcher(T, P):
shifts = []
n = len(T)
m = len(P)
pi = prefix_function(P)
q = 0
for i in range(0,n):
while q > 0 and P[q] != T[i]:
q = pi[q-1]
if P[q] == T[i]:
q += 1
if q == m:
shifts.append(i-m+1)
q = pi[q-1]
return shifts
```
%% Cell type:markdown id: tags:
### Boyer-Moore
%% Cell type:code id: tags:
``` python
def bcr_table(P):
m = len(P)
bcr = {}
for j in range(1, m+1):
bcr[P[j-1]] = j
return bcr
def gsr_table(P):
m = len(P)
gsr = [0]*m
suffix = ""
for j in range(m-1,-1,-1):
gsr[j] = find_suffix_shift(P[j], suffix, P)
suffix = P[j] + suffix
return gsr
def find_suffix_shift(badchar, suffix, P):
m = len(P)
k = len(suffix)
for j in range(m+1, 0, -1):
match = True
for i in range(0, k):
term_index = j-k-1+i
if term_index < 0 or suffix[i] == P[term_index]:
pass
else:
match = False
term_index = j-k-1
if match and (term_index <= 0 or P[term_index-1] != badchar):
return m-j+1
def bm_matcher(T, P):
shifts = []
n = len(T)
m = len(P)
bcr = bcr_table(P)
gsr = gsr_table(P)
s = 0
while s <= n-m:
j = m
while j > 0 and P[j-1] == T[s+j-1]:
j -= 1
if j < 1:
shifts.append(s)
s += gsr[0]
else:
s += max(gsr[j-1], j - bcr.get(T[s+j-1], 1))
return shifts
```
%% Cell type:markdown id: tags:
#### Boyer-Moore with BCR only
%% Cell type:code id: tags:
``` python
def bm_matcher_bcr(T, P):
shifts = []
n = len(T)
m = len(P)
bcr = bcr_table(P)
s = 0
while s <= n-m:
j = m
while j > 0 and P[j-1] == T[s+j-1]:
j -= 1
if j < 1:
shifts.append(s)
s += 1
else:
s += max(1, j - bcr.get(T[s+j-1], 1) )
return shifts
```
%% Cell type:markdown id: tags:
### Prefix Function String Matching
%% Cell type:code id: tags:
``` python
def pf_matcher(T, P):
shifts = []
n = len(T)
m = len(P)
P_T = P + '_' + T
pf = prefix_function(P_T)
for i in range(2*m,n+m+1):
if pf[i] == m:
shifts.append(i-2*m)
return shifts
```
%% Cell type:markdown id: tags:
### Tests
%% Cell type:markdown id: tags:
#### Preprocessing
%% Cell type:code id: tags:
``` python
P = 'abracadabra'
print(prefix_function(P))
print(bcr_table(P))
print(gsr_table(P))
```
%% Cell type:markdown id: tags:
#### Timer
%% Cell type:code id: tags:
``` python
import time
def time_f(f):
before = time.clock()
f()
after = time.clock()
return after - before
```
%% Cell type:markdown id: tags:
#### Texts and Patterns
%% Cell type:code id: tags:
``` python
text1 = 'here is a simple example'
pattern1 = 'example'
text2 = 'abcaabaababaca'
pattern2 = 'aba'
text3 = 'How much wood would a woodchuck chuck if a woodchuck could chuck wood?'
pattern3a = 'wood'
pattern3b = 'chuck'
pattern3c = 'woodchuck'
text4 = 'Ready are you? What know you of ready? For eight hundred years have I trained Jedi. My own counsel will I keep on who is to be trained. A Jedi must have the deepest commitment, the most serious mind. This one a long time have I watched. All his life has he looked away... to the future, to the horizon. Never his mind on where he was. Hmm? What he was doing. Hmph. Adventure. Heh. Excitement. Heh. A Jedi craves not these things. You are reckless.'
pattern4 = 'Jedi'
text5 = "What an incredible Cinderella story, this unknown comes outta nowhere to lead the pack, at Augusta. He's on his final hole. He's about 455 yards away. He's gonna hit about a 2-iron, I think. Oh, he got all of that! The crowd is standing on its feet here at Augusta, the normally reserved Augusta crowd, going wild, for this young Cinderella. He's come outta nowhere. He's got about 350 yards left. He's gonna hit about a 5-iron, I expect, don't you think? He's got a beautiful backswing -- that's -- oh, he got all of that one! He's gotta be pleased with that. The crowd is just on its feet here. He's the Cinderella boy, uh -- tears in his eyes I guess, as he lines up this last shot, he's got about 195 yards left. And he's got about a -- it looks like he's got about an 8-iron. This crowd has gone deathly silent, the Cinderella story, outta nowhere. A former greenskeeper and now, about to become the Masters champion. It looks like a mirac- it's in the hole! It's in the hole!"
pattern5 = 'Cinderella'
pattern_no_match = 'abracadabra'
```
%% Cell type:markdown id: tags:
#### Increasing text length
%% Cell type:code id: tags:
``` python
# test different texts and patterns
T = text5
P = pattern5
#P = pattern_no_match
t1 = []
t2 = []
t3 = []
n = []
for i in range(1,100):
temp = T * i
t1.append(time_f(lambda: naive_matcher(temp,P)))
t2.append(time_f(lambda: kmp_matcher(temp,P)))
t3.append(time_f(lambda: bm_matcher(temp,P)))
n.append(len(temp))
print('Text:')
print(T)
print('Pattern:')
print(P)
```
%% Cell type:code id: tags:
``` python
%matplotlib inline
from matplotlib import pyplot as plt
plt.scatter(n, t1, marker='x', c='red', label='naive')
plt.scatter(n, t2, marker='x', c='blue', label='knuth-morris-pratt')
plt.scatter(n, t3, marker='x', c='green', label='boyer-moore')
plt.legend(loc='upper left')
plt.xlabel('n')
plt.ylabel('time (/s)')
plt.xlim((0, max(n)))
plt.ylim((0, max(max(t1), max(t2), max(t3))))
```
%% Cell type:markdown id: tags:
#### All five algorithms
%% Cell type:code id: tags:
``` python
# test different texts and patterns
T = text5
#P = pattern5
P = pattern_no_match
t1 = []
t2 = []
t3 = []
t4 = []
t5 = []
n = []
for i in range(1,100):
temp = T * i
t1.append(time_f(lambda: naive_matcher(temp,P)))
t2.append(time_f(lambda: kmp_matcher(temp,P)))
t3.append(time_f(lambda: bm_matcher(temp,P)))
t4.append(time_f(lambda: bm_matcher_bcr(temp,P)))
t5.append(time_f(lambda: pf_matcher(temp,P)))
n.append(len(temp))
print('Text:')
print(T)
print('Pattern:')
print(P)
```
%% Cell type:code id: tags:
``` python
%matplotlib inline
from matplotlib import pyplot as plt
plt.scatter(n, t1, marker='x', c='red', label='naive')
plt.scatter(n, t2, marker='x', c='blue', label='knuth-morris-pratt')
plt.scatter(n, t3, marker='x', c='green', label='boyer-moore')
plt.scatter(n, t4, marker='x', c='cyan', label='boyer-moore-bcr')
plt.scatter(n, t5, marker='x', c='orange', label='pf-matcher')
plt.legend(loc='upper left')
plt.xlabel('n')
plt.ylabel('time (/s)')
plt.xlim((0, max(n)))
plt.ylim((0, max(max(t1), max(t2), max(t3), max(t4), max(t5))))
```
%% Cell type:markdown id: tags:
#### Long text
%% Cell type:code id: tags:
``` python
with open ("holmes.txt", "r") as myfile:
text_long=myfile.read().replace('\n', '')
pattern_long = 'Watson'
print('Number of characters in text:')
print(len(text_long))
```
%% Cell type:code id: tags:
``` python
len(bm_matcher(text_long,pattern_long))
```
%% Cell type:markdown id: tags:
#### Timings
%% Cell type:code id: tags:
``` python
print('Running time: Naive')
%timeit naive_matcher(text_long,pattern_long)
print('Running time: Knuth-Morris-Pratt')
%timeit kmp_matcher(text_long,pattern_long)
print('Running time: Boyer-Moore')
%timeit bm_matcher(text_long,pattern_long)
print('Running time: Boyer-Moore BCR')
%timeit bm_matcher_bcr(text_long,pattern_long)
print('Running time: PF-Matcher')
%timeit pf_matcher(text_long,pattern_long)
```
%% Cell type:code id: tags:
``` python