-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTrmv_ut.py
106 lines (80 loc) · 3.02 KB
/
Trmv_ut.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Programmed by: Jasdev Singh
#
import flame
import laff as laff
def Trmv_ut_unb_var1(U, x):
"""
Trmv_ut_unb_var1(matrix, vector)
Computes x = U' * x using DOT products.
Traverses matrix U from BOTTOM-RIGHT to TOP-LEFT,
vector x from BOTTOM to TOP.
"""
UTL, UTR, \
UBL, UBR = flame.part_2x2(U, \
0, 0, 'BR')
xT, \
xB = flame.part_2x1(x, \
0, 'BOTTOM')
while UBR.shape[0] < U.shape[0]:
U00, u01, U02, \
u10t, upsilon11, u12t, \
U20, u21, U22 = flame.repart_2x2_to_3x3(UTL, UTR, \
UBL, UBR, \
1, 1, 'TL')
x0, \
chi1, \
x2 = flame.repart_2x1_to_3x1(xT, \
xB, \
1, 'TOP')
laff.scal( upsilon11, chi1 )
laff.dots( u01, x0, chi1 )
UTL, UTR, \
UBL, UBR = flame.cont_with_3x3_to_2x2(U00, u01, U02, \
u10t, upsilon11, u12t, \
U20, u21, U22, \
'BR')
xT, \
xB = flame.cont_with_3x1_to_2x1(x0, \
chi1, \
x2, \
'BOTTOM')
flame.merge_2x1(xT, \
xB, x)
def Trmv_ut_unb_var2(U, x):
"""
Trmv_ut_unb_var2(matrix, vector)
Computes x = U' * x using AXPY operations.
Traverses matrix U from BOTTOM-RIGHT to TOP-LEFT,
vector x from BOTTOM to TOP.
"""
UTL, UTR, \
UBL, UBR = flame.part_2x2(U, \
0, 0, 'BR')
xT, \
xB = flame.part_2x1(x, \
0, 'BOTTOM')
while UBR.shape[0] < U.shape[0]:
U00, u01, U02, \
u10t, upsilon11, u12t, \
U20, u21, U22 = flame.repart_2x2_to_3x3(UTL, UTR, \
UBL, UBR, \
1, 1, 'TL')
x0, \
chi1, \
x2 = flame.repart_2x1_to_3x1(xT, \
xB, \
1, 'TOP')
laff.axpy( chi1, u12t, x2 )
laff.scal( upsilon11, chi1 )
UTL, UTR, \
UBL, UBR = flame.cont_with_3x3_to_2x2(U00, u01, U02, \
u10t, upsilon11, u12t, \
U20, u21, U22, \
'BR')
xT, \
xB = flame.cont_with_3x1_to_2x1(x0, \
chi1, \
x2, \
'BOTTOM')
flame.merge_2x1(xT, \
xB, x)