-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnomaly.py
107 lines (86 loc) · 2.88 KB
/
Anomaly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# exercise 11.4.1
import numpy as np
from matplotlib.pyplot import (figure, imshow, bar, title, xticks, yticks, cm,
subplot, show)
from scipy.io import loadmat
from toolbox_02450 import gausKernelDensity
from sklearn.neighbors import NearestNeighbors
from initData import *
N, M = np.shape(stdX)
### Gausian Kernel density estimator
# cross-validate kernel width by leave-one-out-cross-validation
# (efficient implementation in gausKernelDensity function)
# evaluate for range of kernel widths
widths = stdX.var(axis=0).max() * (2.0**np.arange(-10,3))
logP = np.zeros(np.size(widths))
for i,w in enumerate(widths):
print('Fold {:2d}, w={:f}'.format(i,w))
density, log_density = gausKernelDensity(stdX,w)
logP[i] = log_density.sum()
9
val = logP.max()
ind = logP.argmax()
width=widths[ind]
print('Optimal estimated width is: {0}'.format(width))
# evaluate density for estimated width
density, log_density = gausKernelDensity(stdX,width)
# Sort the densities
i = (density.argsort(axis=0)).ravel()
density = density[i].reshape(-1,)
# Plot density estimate of outlier score
fig = figure()
bar(range(20),density[:20])
title('Density estimate')
show()
fig.savefig('fig/densityEstimate.eps', format='eps', dpi=1200)
fig.clf
### K-neighbors density estimator
# Neighbor to use:
K = 5
# Find the k nearest neighbors
knn = NearestNeighbors(n_neighbors=K).fit(stdX)
D, i = knn.kneighbors(stdX)
density = 1./(D.sum(axis=1)/K)
# Sort the scores
i = density.argsort()
density = density[i]
# Plot k-neighbor estimate of outlier score (distances)
fig = figure()
bar(range(20),density[:20])
title('KNN density: Outlier score')
show()
fig.savefig('fig/knnDensity.eps', format='eps', dpi=1200)
fig.clf
### K-nearest neigbor average relative density
# Compute the average relative density
knn = NearestNeighbors(n_neighbors=K).fit(stdX)
D, i = knn.kneighbors(stdX)
density = 1./(D.sum(axis=1)/K)
avg_rel_density = density/(density[i[:,1:]].sum(axis=1)/K)
# Sort the avg.rel.densities
i_avg_rel = avg_rel_density.argsort()
avg_rel_density = avg_rel_density[i_avg_rel]
# Plot k-neighbor estimate of outlier score (distances)
fig = figure()
bar(range(20),avg_rel_density[:20])
title('KNN average relative density: Outlier score')
show()
fig.savefig('fig/knnAvgDensity.eps', format='eps', dpi=1200)
fig.clf
### Distance to 5'th nearest neighbor outlier score
K = 5
# Find the k nearest neighbors
knn = NearestNeighbors(n_neighbors=K).fit(stdX)
D, i = knn.kneighbors(stdX)
# Outlier score
score = D[:,K-1]
# Sort the scores
i = score.argsort()
score = score[i[::-1]]
# Plot k-neighbor estimate of outlier score (distances)
fig = figure()
bar(range(20),score[:20])
title('27th neighbor distance: Outlier score')
show()
fig.savefig('fig/knnDistanceScore.eps', format='eps', dpi=1200)
fig.clf