Commit 42b64aa0 authored by TerryZYH's avatar TerryZYH
Browse files

update

parent 37e8e713
This diff is collapsed.
......@@ -312,42 +312,6 @@
" self._helper(node.right,limitR)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "consistent-tractor",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXNUlEQVR4nO3db4xc1Z3m8e+Tplk6wKR34h7+tE3MSiwzAQJGLUPGWRJQCIaBhaBo5SxLlCiRRQRSGCFngRdEvNiA1lKUzAyJ1yIooIUgtNjEywLGEkGEsAa3jcEY48jLMIttkBuIASdewM6zL+o6KYrq7tt2dZV98nykkm+dc27Vry6Hp6tv3eoj20RERLk+1usCIiJieiXoIyIKl6CPiChcgj4ionAJ+oiIwh3W6wLamTFjhmfPnt3rMiIiDhlr1659w/ZQu76DMuhnz57N6Ohor8uIiDhkSPqX8fpy6iYionAJ+oiIwiXoIyIKl6CPiChcgj4ionC1r7qR1AeMAttsX9zSJ+BHwEXA74Gv215X9c2v+vqA223f2qHaP+SBZ7exeOVmtu/czfGDAyy64GQumzM8HU8VEdFR051fU7m88jvAJuAv2vRdCJxU3c4CfgKcVf1wuA04H9gKrJG0wvaLB1R1iwee3cYNyzaw+4O9AGzbuZsblm0ASNhHxEGtG/lV69SNpJnA3wG3jzPkUuAuN6wGBiUdB8wFtth+2fb7wL3V2I5avHLzHw/SPrs/2MvilZs7/VRxCHv9+9/n9e9/v9dlRHxIN/Kr7jv6HwLfBY4ep38YeLXp/taqrV37We0eQNJCYCHACSecULOshu07d0+pPf48vbfppV6XEPER3civSd/RS7oY2GF77UTD2rR5gvaPNtpLbY/YHhkaavst3nEdPzgwpfaIiINFN/KrzqmbecC/l/QKjVMv50n67y1jtgKzmu7PBLZP0N5Riy44mYH+vg+1DfT3seiCkzv9VBERHdWN/Jo06G3fYHum7dnAAuAx2/+pZdgK4GtqOBt42/ZrwBrgJEknSjq82n9Fx6qvXDZnmFsuP43hwQEEDA8OcMvlp+WD2Ig46HUjv/b7j5pJugrA9hLgIRqXVm6hcXnlN6q+PZKuAVbSuLzyDtsbD7Todi6bM5xgj4hD0nTn15SC3vbjwOPV9pKmdgNXj7PPQzR+EERERA/km7EREYVL0EdEFC5BHxFRuAR9REThEvQREYVL0EdEFC5BHxFRuAR9REThEvQREYVL0EdEFC5BHxFRuAR9REThEvQREYVL0EdEFC5BHxFRuAR9REThJl14RNIRwBPAv6rG/w/b32sZswi4oukx/wYYsv1Wtdbsu8BeYI/tkc6VHxERk6mzwtR7wHm2d0nqB56U9LDt1fsG2F4MLAaQdAnw97bfanqMc22/0cnCIyKinkmDvlomcFd1t7+6eYJdvgr8/MBLi4iITqh1jl5Sn6T1wA5gle2nxxn3cWA+cH9Ts4FHJa2VtHCC51goaVTS6NjYWO0XEBERE6sV9Lb32j4DmAnMlXTqOEMvAX7dctpmnu0zgQuBqyWdM85zLLU9YntkaGio/iuIiIgJTemqG9s7gcdpvGtvZwEtp21sb6/+3QEsB+ZOtciIiNh/kwa9pCFJg9X2APBF4KU24z4BfB74RVPbkZKO3rcNfAl4oSOVR0RELXWuujkOuFNSH40fDPfZflDSVQC2l1Tjvgw8avt3TfseAyyXtO+57rH9SMeqj4iISdW56uZ5YE6b9iUt938G/Kyl7WXg9AOqMCIiDki+GRsRUbgEfURE4RL0ERGFS9BHRBQuQR8RUbgEfURE4RL0ERGFS9BHRBQuQR8RUbgEfURE4RL0ERGFS9BHRBQuQR8RUbgEfURE4RL0ERGFS9BHRBSuzlKCR0h6RtJzkjZKurnNmC9IelvS+up2U1PffEmbJW2RdH2nX0BEREyszlKC7wHn2d4lqR94UtLDtle3jPuV7YubG6rlB28Dzge2AmskrbD9YieKj4iIyU36jt4Nu6q7/dXNNR9/LrDF9su23wfuBS7dr0ojImK/1DpHL6lP0npgB7DK9tNthn22Or3zsKRTqrZh4NWmMVurtnbPsVDSqKTRsbGx+q8gIiImVCvobe+1fQYwE5gr6dSWIeuAT9k+HfhH4IGqXe0ebpznWGp7xPbI0NBQnbIiIqKGKV11Y3sn8Dgwv6X9nX2nd2w/BPRLmkHjHfyspqEzge0HUG9ERExRnatuhiQNVtsDwBeBl1rGHCtJ1fbc6nHfBNYAJ0k6UdLhwAJgRUdfQURETKjOVTfHAXdWV9B8DLjP9oOSrgKwvQT4CvBtSXuA3cAC2wb2SLoGWAn0AXfY3jgdLyQiItpTI48PLiMjIx4dHe11GVGYh775T7z9sb/kiL/+616XEtHWjFlH8e/+w7/dr30lrbU90q4v34yNiChcnVM3EUU45f1nAPjUdf+xx5VEdFfe0UdEFC5BHxFRuAR9REThEvQREYVL0EdEFC5BHxFRuAR9REThEvQREYVL0EdEFC5BHxFRuAR9REThEvQREYVL0EdEFK7OClNHSHqmWvh7o6Sb24y5QtLz1e0pSac39b0iaYOk9ZLyR+YjIrqszp8pfg84z/YuSf3Ak5Ietr26acw/A5+3/VtJFwJLgbOa+s+1/Ubnyo6IiLomDfpqScBd1d3+6uaWMU813V1NYxHwiIg4CNQ6Ry+pT9J6YAewyvbTEwz/JvBw030Dj0paK2nhBM+xUNKopNGxsbE6ZUVERA21gt72Xttn0HinPlfSqe3GSTqXRtD/56bmebbPBC4ErpZ0zjjPsdT2iO2RoaGhqbyGiIiYwJSuurG9E3gcmN/aJ+kzwO3ApbbfbNpne/XvDmA5MHf/y42IiKmqc9XNkKTBansA+CLwUsuYE4BlwJW2f9PUfqSko/dtA18CXuhY9RERMak6V90cB9wpqY/GD4b7bD8o6SoA20uAm4BPAj+WBLDH9ghwDLC8ajsMuMf2I51/GRERMZ46V908D8xp076kaftbwLfajHkZOL21PSIiuiffjI2IKFyCPiKicAn6iIjCJegjIgqXoI+IKFyCPiKicAn6iIjCJegjIgqXoI+IKFyCPiKicAn6iIjCJegjIgqXoI+IKFyCPiKicAn6iIjCJegjIgpXZynBIyQ9I+k5SRsl3dxmjCT9g6Qtkp6XdGZT33xJm6u+6zv9AiK64YFntzHv1sc48fr/xbxbH+OBZ7f1uqSI2uosJfgecJ7tXZL6gSclPWx7ddOYC4GTqttZwE+As6rlB28Dzge2AmskrbD9YkdfRcQ0euDZbdywbAO7P9gLwLadu7lh2QYALpsz3MvSImqZ9B29G3ZVd/urm1uGXQrcVY1dDQxKOg6YC2yx/bLt94F7q7ERh4zFKzf/MeT32f3BXhav3NyjiiKmptY5ekl9ktYDO4BVtp9uGTIMvNp0f2vVNl57u+dYKGlU0ujY2FjN8iOm3/adu6fUHnGwqRX0tvfaPgOYCcyVdGrLELXbbYL2ds+x1PaI7ZGhoaE6ZUV0xfGDA1NqjzjYTOmqG9s7gceB+S1dW4FZTfdnAtsnaI84ZCy64GQG+vs+1DbQ38eiC07uUUURU1PnqpshSYPV9gDwReCllmErgK9VV9+cDbxt+zVgDXCSpBMlHQ4sqMZGHDIumzPMLZefxvDgAAKGBwe45fLT8kFsHDLqXHVzHHBndQXNx4D7bD8o6SoA20uAh4CLgC3A74FvVH17JF0DrAT6gDtsb+z8y4iYXpfNGU6wxyFr0qC3/Twwp037kqZtA1ePs/9DNH4QRERED+SbsRERhUvQR0QULkEfEVG4BH1EROES9BERhUvQR0QULkEfEVG4Ol+YOmTc/D838uL2d3pdRhykvv5aY25897/97x5XEtHep4//C753ySkdf9yigj5iIq8PzZp8UESB1PhS68FlZGTEo6OjvS4jIuKQIWmt7ZF2fTlHHxFRuAR9REThEvQREYVL0EdEFC5BHxFRuAR9REThJr2OXtIs4C7gWOAPwFLbP2oZswi4oukx/wYYsv2WpFeAd4G9wJ7xLv+JiIjpUecLU3uA62yvk3Q0sFbSKtsv7htgezGwGEDSJcDf236r6THOtf1GJwuPiIh6Jj11Y/s12+uq7XeBTcBEi2d+Ffh5Z8qLiIgDNaVz9JJm01g/9ulx+j8OzAfub2o28KiktZIWTvDYCyWNShodGxubSlkRETGB2kEv6SgaAX6t7fH+ctglwK9bTtvMs30mcCFwtaRz2u1oe6ntEdsjQ0NDdcuKiIhJ1Ap6Sf00Qv5u28smGLqAltM2trdX/+4AlgNz96/UiIjYH5MGvSQBPwU22f7BBOM+AXwe+EVT25HVB7hIOhL4EvDCgRYdERH11bnqZh5wJbBB0vqq7UbgBADbS6q2LwOP2v5d077HAMsbPys4DLjH9iMdqDsiImqaNOhtPwmoxrifAT9raXsZOH0/a4uIiA7IN2MjIgqXoI+IKFyCPiKicAn6iIjCJegjIgqXoI+IKFyCPiKicAn6iIjCJegjIgqXoI+IKFyCPiKicAn6iIjCJegjIgqXoI+IKFyCPiKicHVWmJol6ZeSNknaKOk7bcZ8QdLbktZXt5ua+uZL2ixpi6TrO/0CIiJiYnVWmNoDXGd7XbUs4FpJq2y/2DLuV7Yvbm6Q1AfcBpwPbAXWSFrRZt+IiJgmk76jt/2a7XXV9rvAJmC45uPPBbbYftn2+8C9wKX7W2xEREzdlM7RS5oNzAGebtP9WUnPSXpY0ilV2zDwatOYrYzzQ0LSQkmjkkbHxsamUlZEREygdtBLOgq4H7jW9jst3euAT9k+HfhH4IF9u7V5KLd7fNtLbY/YHhkaGqpbVkRETKJW0EvqpxHyd9te1tpv+x3bu6rth4B+STNovIOf1TR0JrD9gKuOiIja6lx1I+CnwCbbPxhnzLHVOCTNrR73TWANcJKkEyUdDiwAVnSq+IiImFydq27mAVcCGyStr9puBE4AsL0E+ArwbUl7gN3AAtsG9ki6BlgJ9AF32N7Y2ZcQERETUSOPDy4jIyMeHR3tdRkREYcMSWttj7TryzdjIyIKl6CPiChcgj4ionAJ+oiIwiXoIyIKl6CPiChcgj4ionAJ+oiIwiXoIyIKl6CPiChcgj4ionAJ+oiIwiXoIyIKl6CPiChcgj4ionAJ+oiIwtVZSnCWpF9K2iRpo6TvtBlzhaTnq9tTkk5v6ntF0gZJ6yVlNZGIiC6rs5TgHuA62+skHQ2slbTK9otNY/4Z+Lzt30q6EFgKnNXUf67tNzpXdkRE1DVp0Nt+DXit2n5X0iZgGHixacxTTbusBmZ2uM6IiNhPUzpHL2k2MAd4eoJh3wQebrpv4FFJayUtnOCxF0oalTQ6NjY2lbIiImICdU7dACDpKOB+4Frb74wz5lwaQf+5puZ5trdL+itglaSXbD/Ruq/tpTRO+TAyMnLwrVgeEXGIqvWOXlI/jZC/2/ayccZ8BrgduNT2m/vabW+v/t0BLAfmHmjRERFRX52rbgT8FNhk+wfjjDkBWAZcafs3Te1HVh/gIulI4EvAC50oPCIi6qlz6mYecCWwQdL6qu1G4AQA20uAm4BPAj9u/Fxgj+0R4BhgedV2GHCP7Uc6+QIiImJida66eRLQJGO+BXyrTfvLwOkf3SMiIrol34yNiChcgj4ionAJ+oiIwiXoIyIKl6CPiChcgj4ionAJ+oiIwiXoIyIKl6CPiChcgj4ionAJ+oiIwiXoIyIKl6CPiChcgj4ionAJ+oiIwiXoIyIKN+nCI5JmAXcBxwJ/AJba/lHLGAE/Ai4Cfg983fa6qm9+1dcH3G771o6+gsoDz25j8crNbN+5m+MHB1h0wclcNmd4Op4qIqKjpju/6iwluAe4zva6av3XtZJW2X6xacyFwEnV7SzgJ8BZkvqA24Dzga3AGkkrWvY9YA88u40blm1g9wd7Adi2czc3LNsAkLCPiINaN/KrzlKCrwGvVdvvStoEDAPNYX0pcJdtA6slDUo6DpgNbKmWFETSvdXYjgb94pWb2f3BXs459r/yuyN2/rH97mfEL56r87MsIqI3dr23h08PG4Aj/98gT7z+XXZ/sJfFKzd3LOindI5e0mxgDvB0S9cw8GrT/a1V23jt7R57oaRRSaNjY2NTKYvtO3e3bf+DPaXHiYjotvFyarxc2x+13+5KOgq4H7jW9jut3W128QTtH220lwJLAUZGRqaU0McPDrBt526eeP27H2ofHhzg/uvPm8pDRUR01bxbH2Nbm1A/fnCgY89R6x29pH4aIX+37WVthmwFZjXdnwlsn6C9oxZdcDID/X0fahvo72PRBSd3+qkiIjqqG/k1adBXV9T8FNhk+wfjDFsBfE0NZwNvV+f21wAnSTpR0uHAgmpsR102Z5hbLj+N4cEBROOd/C2Xn5YPYiPioNeN/JInOY8t6XPAr4ANNC6vBLgROAHA9pLqh8E/AfNpXF75Dduj1f4XAT+kcXnlHbb/y2RFjYyMeHR0dH9eT0TEnyVJa22PtOurc9XNk7Q/1948xsDV4/Q9BDxUo86IiJgG+WZsREThEvQREYVL0EdEFC5BHxFRuEmvuukFSWPAv+zn7jOANzpYTqekrqlJXVOTuqamxLo+ZXuoXcdBGfQHQtLoeJcY9VLqmprUNTWpa2r+3OrKqZuIiMIl6CMiCldi0C/tdQHjSF1Tk7qmJnVNzZ9VXcWdo4+IiA8r8R19REQ0SdBHRBTukAl6SXdI2iHphXH6JekfJG2R9LykM5v65kvaXPVd3+W6rqjqeV7SU5JOb+p7RdIGSesldfTPddao6wuS3q6ee72km5r6enm8FjXV9IKkvZL+suqbzuM1S9IvJW2StFHSd9qM6focq1lX1+dYzbq6Psdq1tX1OSbpCEnPSHququvmNmOmb37ZPiRuwDnAmcAL4/RfBDxM4y9tng08XbX3Af8H+DfA4cBzwKe7WNffAv+62r5wX13V/VeAGT06Xl8AHmzT3tPj1TL2EuCxLh2v44Azq+2jgd+0vu5ezLGadXV9jtWsq+tzrE5dvZhj1Zw5qtrup7Ec69ndml+HzDt6208Ab00w5I8LlNteDexboHwu1QLltt8H9i1Q3pW6bD9l+7fV3dU0VtmadjWO13h6erxafBX4eaeeeyK2X7O9rtp+F9jER9c37vocq1NXL+ZYzeM1np4erxZdmWPVnNlV3e2vbq1Xwkzb/Dpkgr6GA16gvAu+SeMn9j4GHpW0VtLCHtTz2epXyYclnVK1HRTHS9LHaSxkc39Tc1eOl6TZwBwa77qa9XSOTVBXs67PsUnq6tkcm+x4dXuOSeqTtB7YAayy3bX5VXtx8EPAAS9QPp0knUvjf8LPNTXPs71d0l8BqyS9VL3j7YZ1NP42xi41VgF7ADiJg+R40fiV+te2m9/9T/vxknQUjf/xr7X9Tmt3m126MscmqWvfmK7PsUnq6tkcq3O86PIcs70XOEPSILBc0qm2mz+rmrb5VdI7+p4uUD4RSZ8Bbgcutf3mvnbb26t/dwDLafyK1hW239n3q6Qbq4D1S5rBQXC8Kgto+ZV6uo+XpH4a4XC37WVthvRkjtWoqydzbLK6ejXH6hyvStfnWPXYO4HHafw20Wz65lenPmzoxg2YzfgfLv4dH/4g45mq/TDgZeBE/vRBxildrOsEYAvwty3tRwJHN20/BczvYl3H8qcvzM0F/m917Hp6vKr+T9A4j39kt45X9drvAn44wZiuz7GadXV9jtWsq+tzrE5dvZhjwBAwWG0P0FiH++Juza9D5tSNpJ/T+BR/hqStwPdofKCB7SU01qW9iMaE/z3wjapvj6RrgJX8aYHyjV2s6ybgk8CPJQHsceOv0x1D49c3aPyHvMf2I12s6yvAtyXtAXYDC9yYVb0+XgBfBh61/bumXaf1eAHzgCuBDdV5VIAbaYRoL+dYnbp6Mcfq1NWLOVanLuj+HDsOuFNSH40zKffZflDSVU11Tdv8yp9AiIgoXEnn6CMioo0EfURE4RL0ERGFS9BHRBQuQR8RUbgEfURE4RL0ERGF+//rO5KB90j+WwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"X = np.array([[1,2],[2,3],[3,4],[1,4],[3,2]])\n",
"tree = iTree(X,0,3)\n",
"tree.draw()\n",
"tree.pathLength([1,5])\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"id": "grave-modeling",
"metadata": {},
"outputs": [
......@@ -314,43 +314,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"id": "consistent-tractor",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"X = np.array([[1,2],[2,3],[3,4],[1,4],[3,2]])\n",
"tree = iTree(X,0,3)\n",
"tree.draw()\n",
"tree.pathLength([1,5])\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "toxic-paintball",
"metadata": {},
"outputs": [],
......@@ -400,7 +364,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "developing-warrant",
"metadata": {},
"outputs": [],
......@@ -448,7 +412,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "sound-exemption",
"metadata": {},
"outputs": [],
......@@ -642,32 +606,6 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "younger-merchandise",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[4, 4, 4, 4, 4],\n",
" [4, 4, 4, 4, 4],\n",
" [4, 4, 4, 4, 4],\n",
" [4, 4, 4, 4, 4],\n",
" [4, 4, 4, 4, 4],\n",
" [4, 4, 4, 4, 4]]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[[4]*5 for _ in range(6)]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "elegant-waste",
"metadata": {},
"outputs": [],
......@@ -2161,7 +2099,7 @@
"execution_count": 13,
"id": "circular-japan",
"metadata": {
"scrolled": false
"scrolled": true
},
"outputs": [
{
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
import json
import os
import random
from sklearn.feature_selection import VarianceThreshold
from sklearn.utils import shuffle
from skmultiflow.drift_detection.adwin import ADWIN
import math
# # iForest ADWIN
# In[2]:
class Node:
def __init__(self,internal=True,left=None,right=None,sAtt=None,sVal=None,size=None):
self.internal = internal
self.size = size
self.left = left
self.right = right
self.sAtt = sAtt
self.sVal = sVal
return
# In[3]:
def c(n):
# print('3333',n)
return 2*H(n-1)-(2*(n-1)/n)
def H(i):
return math.log(i)+0.5772156649
# In[4]:
class iTree:
def __init__(self,X,e,l,X_cati=None):
"""
INPUTS:
X: input data
e: current tree height
l: height limit
OUTPUT:
"""
self.X = X
self.l = l
self.root = self.build(X,e,l,X_cati)
return
def build(self,X,e,l,X_cati=None):
# print('e:',e,'l:',l)
if e>=l or len(X)<=1:
# print(e)
return Node(internal=False,size=len(X))
else:
q = random.randint(0,len(X[0])-1)
if X_cati and X_cati[q]:
p = random.choice(list(set(X[:,q])))
Xl = X[X[:,q]==p]
Xr = X[X[:,q]!=p]
return Node(internal=True,
left=self.build(Xl,e+1,l,X_cati),
right=self.build(Xr,e+1,l,X_cati),
sAtt=q,
sVal=p)
else:
p = random.uniform(min(X[:,q]),max(X[:,q]))
Xl = X[X[:,q]<p]
Xr = X[X[:,q]>=p]
return Node(internal=True,
left=self.build(Xl,e+1,l,X_cati),
right=self.build(Xr,e+1,l,X_cati),
sAtt=q,
sVal=p)
def pathLength(self,x,N=None,e=0,X_cati=None):
"""
INPUTS:
x: an instance
N: a Node in the iTree
e: current path length
"""
if not N:
N = self.root
if not N.internal:
if N.size>1:
# print(1)
return e+(N.size>1)*c(N.size)
else:
# print(0)
return e
a = N.sAtt
if X_cati and X_cati[a]:
if x[a] == N.sVal:
return self.pathLength(x,N.left,e+1,X_cati)
else:
return self.pathLength(x,N.right,e+1,X_cati)
else:
if x[a] < N.sVal:
return self.pathLength(x,N.left,e+1,X_cati)
else:
return self.pathLength(x,N.right,e+1,X_cati)
def draw(self):
if len(self.X[0])!=2:
print('iTree.draw() function only support 2D data')
return
plt.figure()
plt.scatter(self.X[:,0],self.X[:,1])
limits = [[min(self.X[:,0]),max(self.X[:,0])],[min(self.X[:,1]),max(self.X[:,1])]]
self._helper(self.root,limits)
def _helper(self,node,limits):
if not node.internal:
return
att = node.sAtt
val = node.sVal
point1 = [(1-att)*val+att*limits[1-att][0],att*val+(1-att)*limits[1-att][0]]
point2 = [(1-att)*val+att*limits[1-att][1],att*val+(1-att)*limits[1-att][1]]
plt.plot([point1[0],point2[0]],[point1[1],point2[1]])
limitL = [[limits[0][0],(1-att)*val+att*limits[0][1]],[limits[1][0],(1-att)*limits[1][1]+att*val]]
limitR = [[(1-att)*val+att*limits[0][0],limits[0][1]],[(1-att)*limits[1][0]+att*val,limits[1][1]]]
self._helper(node.left,limitL)
self._helper(node.right,limitR)
# In[6]:
import math
import random
class iForest:
def __init__(self,X,t,phi,X_cati=None):
"""
INPUTS:
X: input data
X_type: list of data type in each dimension of X
t: number of trees
phi: subsampling size
OUTPUT:
Forest: a set of t iTrees
"""
self.size = t
self.n = phi
self.forest = []
self._Train(X,t,phi,X_cati)
return
def _Train(self,X,t,phi,X_cati):
# l: height limit of iTrees
l = math.ceil(math.log2(phi))
# l = phi/2
# l = min(math.ceil(math.log2(phi))*2,phi/2)
for i in range(t):
# X_prime: subsample of X, used for training the ith iTree
X_prime = X[np.random.choice(X.shape[0], phi, replace=False), :]
# print(X_prime.shape)
self.forest.append(iTree(X_prime,0,l,X_cati))
def predict(self,x):
h_sum = 0
for i in range(self.size):
# print(self.forest[i].pathLength(x))
h_sum += self.forest[i].pathLength(x)
E = h_sum/self.size
# print('E',E)
# print('c',c(self.n))
return 2**(-E/c(self.n))
# In[7]:
class ADWIN1:
def __init__(self,delta=0.02):
self.W = []
self.delta = delta
self.change = False
def add_element(self,x):
self.change = False
self.W.append(x)
while len(self.W)>1 and self.driftTest():
self.change = True
del self.W[0]
def detected_change(self):
return self.change
def driftTest(self):
for i in range(1,len(self.W)-1):
W0 = self.W[:i]
W1 = self.W[i:]
n0 = len(W0)
n1 = len(W1)
n = len(self.W)
# print('n0',n0,'n1',n1)
m = 1/(1/n0+1/n1)
delta_prime = self.delta/n
epsilon_cut = (1/(2*m)*np.log(4/delta_prime))**(1/2)
mu0 = np.mean(W0)
mu1 = np.mean(W1)
if abs(mu0-mu1)>=epsilon_cut:
return True
return False
# In[8]:
class MADWIN:
def __init__(self,delta=0.02,min_win_size=50,max_size=250):
self.W = []
self.delta = delta
self.min_win_size = min_win_size
self.max_size = max_size
self.change = False
def add_element(self,x):
if len(self.W)>=self.max_size:
del self.W[0]
self.change = False
self.W.append(x)
while len(self.W)>1 and self.driftTest():
self.change = True
del self.W[-1]
def detected_change(self):
ans = self.change
self.change = False
return ans
def driftTest(self):
for i in range(self.min_win_size,len(self.W)-self.min_win_size):
W0 = self.W[:i]
W1 = self.W[i:]
n0 = len(W0)
n1 = len(W1)
n = len(self.W)
# print('n0',n0,'n1',n1)
m = 1/(1/n0+1/n1)
delta_prime = self.delta/n
epsilon_cut = (1/(2*m)*np.log(4/delta_prime))**(1/2)
mu0 = np.mean(W0)
mu1 = np.mean(W1)
if abs(mu0-mu1)>=epsilon_cut:
return True
return False
# In[9]:
class Detector:
def __init__(self,t,phi,delta=0.02,min_win_size=50):
"""
INPUTS:
t: iforest number of trees
phi: iforest subsampling size
delta: adwin threshold
min_win_size: adwin minimal window size
"""
self.t = t
self.phi = phi
self.delta = delta
self.selector = VarianceThreshold()
self.madwin = MADWIN(delta=self.delta)
self.adwin = ADWIN(delta=self.delta)
self.feature_filter = None
self.iforest = None
return
def train(self,X):
X_prime = self.selector.fit_transform(X)
self.feature_filter = self.selector.get_support()
self.iforest = iForest(X_prime, self.t, self.phi)
return
def predict(self,x):
x = x.reshape((1,-1))
xp = self.selector.transform(x)
xp = xp.reshape((-1,))
s = self.iforest.predict(xp)
self.madwin.add_element(s*4)
self.adwin.add_element(s)
adwins = 0
if self.adwin.detected_change():
adwins = 1
if self.madwin.detected_change():
return -1, adwins
return s, adwins
# In[172]:
class algorithm:
def __init__(self,e,window_size,t,phi,delta=0.02,min_win_size=50,thresh=0.7):
"""
INPUT:
e: number of detectors
window_size: buffer size
t: iforest number of trees
phi: iforest subsampling size
delta: adwin threshold
min_win_size: adwin minimal window size
thresh: abnormal threshold
"""
self.e = e
self.window_size = window_size
self.t = t
self.phi = phi
self.delta = delta
self.thresh = thresh
self.min_win_size = min_win_size
self.selector = VarianceThreshold()
self.previous_window = []
self.current_window = []
self.ensemble = []
return
def predict(self,x):
output = []
var = []
if len(self.current_window)==self.window_size:
self.previous_window = self.current_window
self.current_window = []
# feature selection:
self.selector.fit(self.previous_window)
feature_filter = self.selector.get_support()
if self._feature_drift_detection(feature_filter) or len(self.ensemble)==0:
# feature drift occurs OR no existing detector in ensemble
detector = Detector(self.t,self.phi,self.delta,self.min_win_size)
detector.train(self.previous_window)
if len(self.ensemble)>=self.e:
del self.ensemble[0]
self.ensemble.append(detector)
output = [-1]*len(self.previous_window)
var = [-1]*len(self.previous_window)
return output,var
else:
for sample in self.previous_window:
scores = np.zeros((len(self.ensemble),))
for i in range(len(self.ensemble)):
detector = self.ensemble[i]
s,_ = detector.predict(sample)
scores[i] = s
var.append(scores)
valid_scores = scores[scores>=0]
if len(valid_scores)>0:
score = np.mean(valid_scores)
if score>self.thresh:
output.append(0)
else:
output.append(1)
else:
detector = Detector(self.t,self.phi,self.delta,self.min_win_size)
detector.train(self.previous_window)
if len(self.ensemble)>=self.e:
del self.ensemble[0]
self.ensemble.append(detector)
output = [-1]*len(self.previous_window)
var = [-1]*len(self.previous_window)
return output,var
self.current_window.append(x)
return output,var
def _feature_drift_detection(self,feature_filter):
for detector in self.ensemble:
if np.array_equal(detector.feature_filter,feature_filter):
return False
return True
# In[190]:
def evaluation(prediction,label):
TP=0
TN=0
FP=0
FN=0
for i in range(len(label)):
if prediction[i] != -1:
if prediction[i] and label[i]:
TP += 1
elif prediction[i] and not label[i]:
FP += 1
elif not prediction[i] and label[i]:
FN += 1
else:
TN += 1
ACC = (TP+TN)/(TP+TN+FP+FN)
Precision = TP/(TP+FP)
Recall = TP/(TP+FN)
F1 = 2*Precision*Recall/(Precision+Recall)
FPR = (FP+1)/(FP+TN+1)
TPR = (TP+1)/(TP+FN+1)
print('TP:',TP,'TN:',TN,'FP:',FP,'FN:',FN)
print('ACC:',ACC,'Precision:',Precision,'Recall:',Recall,'F1:',F1)
print('FPR:',FPR,'TPR:',TPR)
return
# In[ ]:
This diff is collapsed.
This diff is collapsed.
This Project Repository contains 4 jupyternotebook (.ipynb) files and 3 folders:
ipynbs:
Sample Run.ipynb:
Contains code for the algorithm and an execution example.
Simulated Experiments.ipynb:
Contains code, experiment results and result analysis for Experiments which use simulated data stream.