diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/Cargo.lock" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/Cargo.lock" new file mode 100644 index 0000000..d2a0ba4 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/Cargo.lock" @@ -0,0 +1,201 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bytemuck" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" + +[[package]] +name = "hw5" +version = "0.1.0" +dependencies = [ + "nalgebra", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "nalgebra" +version = "0.33.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26aecdf64b707efd1310e3544d709c5c0ac61c13756046aaaba41be5c4f66a3b" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "safe_arch" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3460605018fdc9612bce72735cba0d27efbcd9904780d44c7e3a9948f96148a" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "simba" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3a386a501cd104797982c15ae17aafe8b9261315b5d07e3ec803f2ea26be0fa" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + +[[package]] +name = "syn" +version = "2.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "wide" +version = "0.7.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b828f995bf1e9622031f8009f8481a85406ce1f4d4588ff746d872043e855690" +dependencies = [ + "bytemuck", + "safe_arch", +] diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/Cargo.toml" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/Cargo.toml" new file mode 100644 index 0000000..55e577a --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/Cargo.toml" @@ -0,0 +1,7 @@ +[package] +name = "hw5" +version = "0.1.0" +edition = "2021" + +[dependencies] +nalgebra = "*" diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/hw5.typ" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/hw5.typ" new file mode 100644 index 0000000..5578235 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/hw5.typ" @@ -0,0 +1,96 @@ +#import "../../../template.typ": * +#show: note.with( + title: "作业5", + author: "YHTQ", + date: datetime.today().display(), + logo: none, + withOutlined : false, + withTitle : false, + withHeadingNumbering: false +) += 7. + 取初值 $x_0 = vec(1, 1)$,有: + $ + A^k x_0 &= (lambda I + mat(0, 1;0, 0))^k vec(1, 1)\ + &= (lambda^k I + k lambda^(k - 1) mat(0, 1;0, 0)) vec(1, 1)\ + &= lambda^k vec(1, 1) + k lambda^(k - 1) vec(1, 0)\ + &= lambda^(k-1) vec(lambda + k, lambda) + $ + 不难验证 $(A^k x_0) / norm(A^k x_0) -> vec(1, 0)$ + $ + B^k x_0 &= (lambda mat(1, 0;0, -1) + mat(0, 1;0, 0))^k vec(1, 1)\ + &= (lambda^k mat(1, 0;0, (-1)^k) + k lambda^(k - 1) mat(0, 1;0, 0)) vec(1, 1)\ + &= lambda^(k - 1) mat(lambda, k;0, (-1)^k lambda) vec(1, 1)\ + &= lambda^(k - 1) vec(lambda + k, (-1)^k lambda)\ + $ + 同样不难验证幂法收敛于 $vec(1, 0)$ += 9. + 求最佳的 $mu$ 等价于: + $ + min_mu max(abs((lambda_2 - mu)/(lambda_1 - mu)), abs((lambda_n - mu)/(lambda_1 - mu))) \ + s.t. abs(lambda_1 - mu) > abs(lambda_n - mu) and abs(lambda_1 - mu) > abs(lambda_2 - mu) + $ + - 首先,事实上约束条件是无用的,只要 $max(abs((lambda_2 - mu)/(lambda_1 - mu)), abs((lambda_n - mu)/(lambda_1 - mu))) < 1$ 则两个条件当然都满足。 + - 其次, $f(mu) := max(abs((lambda_2 - mu)/(lambda_1 - mu)), abs((lambda_n - mu)/(lambda_1 - mu)))$ 对于 $mu$ 在 $lambda_1$ 两侧是连续的,且除去 $lambda_2, lambda_1, abs((lambda_2 - mu)/(lambda_1 - mu)) = abs((lambda_n - mu)/(lambda_1 - mu))$ 的若干点外,函数都是单调的,不可能取最小值,只需计算: + $ + lim_(mu -> infinity) f(mu) = 1\ + lim_(mu -> lambda_1) f(mu) = +infinity\ + f(lambda_2) = (lambda_2 - lambda_n)/(lambda_1 - lambda_2)\ + f(lambda_n) = (lambda_2 - lambda_n)/(lambda_1 - lambda_n)\ + abs((lambda_2 - mu)/(lambda_1 - mu)) = abs((lambda_n - mu)/(lambda_1 - mu)) => abs(lambda_2 - mu) = abs(lambda_n - mu) => mu = (lambda_2 + lambda_n)/2\ + f((lambda_2 + lambda_n)/2) = abs((lambda_2 - lambda_n)/2) / abs(lambda_1 - (lambda_2 + lambda_n)/2)\ + $ + 而事实上,有: + $ + abs((lambda_2 - lambda_n)/2) / abs(lambda_1 - (lambda_2 + lambda_n)/2) = abs(lambda_2 - lambda_n) / abs((lambda_1 - lambda_2) + (lambda_1 - lambda_n)) = (lambda_2 - lambda_n) / ((lambda_1 - lambda_2) + (lambda_1 - lambda_n)) + $ + 不难看出 $f((lambda_2 + lambda_n)/2)$ 是函数的最小值,并且不难验证 $f((lambda_2 + lambda_n)/2) < 1$,因此约束条件也成立 += 11. + 取 $x_0 = vec(1, 0, 0)$,计算得近似特征向量: + $ + vec(0.7887, -0.5773, 0.2113) + $ += 14. + $ + A_0 = mat(1, 0;1, -1) = mat(sqrt(2)/2, sqrt(2)/2 ;sqrt(2)/2,-sqrt(2)/2) mat(sqrt(2), -sqrt(2)/2;0, sqrt(2)/2)\ + A_1 = mat(1/2, 3/2;1/2, -1/2)\ + A_2 = mat(1, 0;1, -1) + $ + 因此 QR 迭代不收敛 += 23 + #lemmaLinear[][ + 设 $A$ 是上三角矩阵,对角元为 $a_(i i)$,若对 $k$ 满足: + $ + a_(i i) != 0, i > k + $ + 则方程组: + $ + A x = 0 + $ + 的解等价于方程组: + $ + A_((k - 1) k) x = 0 + $ + 的解后面加上足够多的 $0$ + ] + #proof[ + 简单验证即可 + ] + 为了计算所有特征向量,只需对所有 $i$ 解: + $ + (A - a_(i i) I) x = 0 + $ + 不难看出其解空间维数为 $1$,解法如下: + - 对于任意 $i$,不难看出 $A - a_(i i) I$ 满足引理中 $k = i$ 条件,因此只需解一个形如: + $ + mat(a_(1 1), a_(1 2), ..., a_(1 (i - 1)), a_(1 i);0, a_(2 2), ..., a_(2 (i - 1)), a_(2 i);0, 0, ..., a_((i-1) (i-1)), a_((i-1) i)) x = 0 + $ + 的方程即可,其中对角元 $a_(i i)$ 非零。因为解空间维数为 $1$,只需找到一个解即可,不妨取 $x_i = -1$,方程组化为: + $ + mat(a_(1 1), a_(1 2), ..., a_(1 (i - 1));0, a_(2 2), ..., a_(2 (i - 1));0, 0, ..., a_((i-1) (i-1))) x = vec(a_(1 i), a_(2 i), ..., a_((i-1) i)) + $ + 按照解上三角线性方程组的解法解出即可。 + 总的复杂度约为: + $ + sum_(i = 1) ^ n (i-1)^2 approx 1/3 n^3 + $ \ No newline at end of file diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/output" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/output" new file mode 100644 index 0000000..cb48de9 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/output" @@ -0,0 +1,3 @@ +max root: -3.0000000000723883, iter: 25 +max root: 1.8793852414928818, iter: 112 +max root: -100.00000000010793, iter: 11 diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/src/main.rs" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/src/main.rs" new file mode 100644 index 0000000..f3db567 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/src/main.rs" @@ -0,0 +1,121 @@ +use std::borrow::Borrow; +use std::iter; + +use na::{DefaultAllocator, OMatrix, Vector}; +use nalgebra::{self as na, Dim, Matrix, OVector, Unit, U1, U3, U8}; +use nalgebra::Normed; + +// 根据多项式向量构造友阵,注意多项式为: +// p(x) = x^n + poly(0) * x^(n-1) + poly(1) * x^(n-2) + ... + poly(n-2) * x + poly(n-1) +fn get_companion_matrix (poly: &Vector) -> OMatrix +where + T: na::RealField + Copy, + D: Dim, + S: na::storage::Storage, + DefaultAllocator: na::allocator::Allocator +{ + let n = poly.nrows(); + let mut companion = OMatrix::::zeros_generic(D::from_usize(n), D::from_usize(n)); + for i in 0..n-1 { + companion[(i+1, i)] = T::one(); + } + for i in 0..n { + companion[(i, n-1)] = -poly[n - 1 - i]; + } + companion +} + +// 用幂法计算矩阵特征值 +struct PowerMethod<'a, T: na::RealField + Copy, D: Dim, S: na::storage::Storage> +where DefaultAllocator: na::allocator::Allocator +{ + name: &'a str, + matrix: Matrix, + x: Unit>, +} + +impl <'a, T: na::RealField + Copy, D: Dim, S: na::storage::Storage> PowerMethod<'a, T, D, S> +where DefaultAllocator: na::allocator::Allocator +{ + fn new(name: &'a str, matrix: Matrix, x: Unit>) -> Self { + Self { name, matrix, x } + } + fn next(self) -> PowerMethod<'a, T, D, S> { + let x1 = self.x.into_inner(); + let y = (&self.matrix) * (x1); + let x = Unit::new_normalize(y); + PowerMethod::new(self.name, self.matrix, x) + } + fn get_lambda(&self) -> T { + let x = self.x.as_ref(); + let y = (&self.matrix) * x; + y.ad_mul(&self.x.as_ref())[0] + } + fn run_until(self, max_iter: usize, tol: T) -> Result<(Self, usize), String> { + let mut lambda_o = self.get_lambda(); + let mut next = self; + for i in 0..max_iter { + next = next.next(); + let lambda = next.get_lambda(); + if (lambda - lambda_o).abs() < tol * lambda.abs() + { + return Ok((next, i)); + } + lambda_o = lambda; + } + Err(format!("{}: max_iter reached", next.name.to_string())) + } +} + +fn polynomial_to_string (poly: &Vector) -> String +where + T: na::RealField + Copy, + D: Dim, + S: na::storage::Storage, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator +{ + let n = poly.nrows(); + iter::once(format!("x^{}", n)) + .chain(poly.iter().enumerate().map(|(i, x)| { + if i == n - 1 { + format!("{}", x) + } else { + if *x == T::zero() { + "".to_string() + } else if *x == T::one() { + format!("x^{}", n - 1 - i) + } else { + format!("{}x^{}", x, n - 1 - i) + } + } + })) + .collect::>().join(" + ") +} + +fn get_max_root_of_polynomial (poly: &Vector) -> () +where + T: na::RealField + Copy, + D: Dim, + S: na::storage::Storage, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator +{ + let companion = get_companion_matrix(poly); + let x = Unit::new_normalize(OVector::::repeat_generic(poly.shape_generic().0, U1 ,T::one())); + let name = polynomial_to_string(poly); + let power_method = PowerMethod::new(name.as_str(), companion, x); + let (power_method, iter) = power_method.run_until(1000, T::from_f64(1e-10).unwrap()).unwrap(); + let lambda = power_method.get_lambda(); + println!("max root: {}, iter: {}", lambda, iter); +} + +fn main() { + get_max_root_of_polynomial( + &OVector::::from_vec(vec![1.0, -5.0, 3.0]) + ); + get_max_root_of_polynomial( + &OVector::::from_vec(vec![0.0, -3.0, -1.0]) + ); + get_max_root_of_polynomial( + &OVector::::from_vec(vec![101.0, 208.01, 10891.01, 9802.08, 79108.9, -99902.0, 790.0, -1000.0]) + ); +} diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/\344\270\212\346\234\272\346\212\245\345\221\212.typ" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/\344\270\212\346\234\272\346\212\245\345\221\212.typ" new file mode 100644 index 0000000..041ea7f --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw5/\344\270\212\346\234\272\346\212\245\345\221\212.typ" @@ -0,0 +1,17 @@ +#import "../../../template.typ": * +#show: note.with( + title: "上机报告", + author: "YHTQ", + date: datetime.today().display(), + logo: none, + withOutlined : false, + withTitle : false, + withHeadingNumbering: false +) +#let output = read("output") +本次作业主要是利用幂法解多项式的模最大根,结果如下: +#block[ +#output +] + +其中迭代终止条件设置为 $abs(lambda_k - lambda_(k - 1)) < 10^(-8) abs(lambda_k)$,可以看到幂法以非常快的速度收敛,效果很好。 \ No newline at end of file diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" index 4cc6ac1..d9d3426 100644 --- "a/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" @@ -49,15 +49,8 @@ $ 则称之为 $p$ 次渐进收敛 ] -= 数值代数 - == 基本知识 - 数值代数的基本问题: - + 线性方程组 - + 线性最小二乘 - + 矩阵特征值 - + 奇异值分析 - == 直接法解线性方程组 - === 三角形方程组和 LU 分解 += 直接法解线性方程组 + == 三角形方程组和 LU 分解 若 $L$ 是下三角矩阵,且对角线上没有零,则称 $L y = b$ 是三角方程组。不难发现,这样的方程组可以在 $O(n^2)$ 时间解出。自然的,如果方程组 $A x = y$ 可以分解成: $ A = L U\ @@ -815,5 +808,248 @@ // $ // 上面的不等式已经表明 $x^T D x != 0$,而若 $x^T D x < 0$,将有 $x^T L x < 0, lambda > 1, 1 - omega - lambda < 0, omega(1 + lambda) > 0$ ] += 特征值问题 + #definition[特征值和特征向量][ + 设 $A in CC^(n times n)$,若存在 $lambda in CC, x in CC^n, x != 0$ 使得: + $ + A x = lambda x + $ + 则称 $lambda$ 为 $A$ 的特征值,$x$ 为对应的特征向量。 - \ No newline at end of file + 我们记 $lambda(A)$ 为 $A$ 特征值的集合,称为 $A$ 的谱集。称特征值在特征多项式中的重数为代数重数 $n_i$,几何重数称为 $m_i$ + ] + 矩阵特征值当然可以通过特征多项式计算,然而高次多项式求根是很困难的问题,因此求特征值往往会通过迭代法进行。 + #proposition[][ + 设 $A = P B Inv(P)$,其中 $P$ 是非奇异矩阵,则 $A, B$ 有相同的特征值,特征向量由 $P$ 变换可得。 + ] + #theorem[Jordan 分解][ + 设 $A$ 有 $r$ 个互不相同的特征值 $lambda_1, lambda_2, ..., lambda_r$,其代数重数分别为 $n_1, n_2, ..., n_r$,则存在非奇异矩阵 $P$ 使得: + $ + P^(-1) A P = diag(J(lambda_1), J(lambda_2), ..., J(lambda_r)) + $ + 其中 $J(lambda_i) in CC^(n_i times n_i)$ 由 Jordan 块构成。 + ] + #theorem[Schur 分解][ + 设 $A in CC^(n times n)$,则存在酉矩阵 $Q$ 使得: + $ + Q^T A Q = T + $ + 其中 $T$ 是上三角矩阵。 + ] + 实践上,Jordan 分析并非不可行,但往往数值相当不稳定,而 Schur 分解更加稳定。 + == 幂法 + 如果我们只想要计算模最大的特征值,那么幂法是相当简单实用的方法。 + #algorithm[幂法][ + 任取初值 $x_0$ 使得 $norm(x_0) = 1$,令: + $ + x_(k + 1) = (A x_k) / norm(A x_k)\ + lambda_(k +1) = inner(x_(k + 1), A x_(k + 1)) + $ + 不断迭代即可。显然事实上有: + $ + x_k = (A^k x_0) / norm(A^k x_0)\ + $ + ] + #theorem[][ + 假设 $A$ 存在唯一模最大的特征值,其代数重数等于几何重数,且初始向量 $x_0$ 在 $lambda_1$ 的特征子空间的投影不为零,则在幂法中,$x_k$ 确实收敛于 $A$ 的模最大特征值对应的特征向量,而 $lambda_k$ 收敛于该特征值。 + ] + #proof[ + 不妨设: + $ + A = P diag(J(lambda_1), J(lambda_2), ..., J(lambda_r)) Inv(P) + $ + 其中 $abs(lambda_1) > abs(lambda_i), i != 1$则: + $ + A^k x_0 = P diag(J(lambda_1)^k, J(lambda_2)^k, ..., J(lambda_r)^k) Inv(P) x_0 + $ + #lemma[][ + 设 $J(lambda)$ 是 Jordan 块,$abs(lambda') > abs(lambda)$,则: + $ + (J(lambda) / lambda')^k -> 0 + $ + ] + 由引理,有: + $ + A^k x_0 = lambda_1^k P diag(I, (J(lambda_2) / lambda_1)^k, ..., (J(lambda_r)/ lambda_1)^k) Inv(P) x_0 \ + (A^k x_0)/norm(A^k x_0) = (P diag(I, (J(lambda_2) / lambda_1)^k, ..., (J(lambda_r)/ lambda_1)^k) Inv(P) x_0) / norm(P diag(I, (J(lambda_2) / lambda_1)^k, ..., (J(lambda_r)/ lambda_1)^k) Inv(P) x_0)\ + -> (P diag(I, 0, ..., 0) Inv(P) x_0)/norm(P diag(I, 0, ..., 0) Inv(P) x_0) + $ + 不难验证: + $ + A (P diag(I, 0, ..., 0) Inv(P) x_0) &= P diag(J(lambda_1), 0, 0, ..., 0) Inv(P) x_0 \ + &= lambda_1 P diag(I, 0, 0, ..., 0) Inv(P) x_0 + $ + 表明 $x_k$ 确实收敛于一个 $lambda_1$ 的特征向量,而 $lambda_k$ 收敛于 $lambda_1$ 是容易验证的。 + ] + #remark[][ + - 容易看出幂法是指数收敛的,取决于 $abs(lambda_2 / lambda_1)$ + - 幂法的运算简单,收敛也很快,但在 $abs(lambda_2 / lambda_1)$ 比较接近 $1$ 时收敛较慢,且一旦 @power_method 中条件不成立,则幂法无法收敛。例如实矩阵可能有两个共轭的特征值,它们的模相等,此时幂法可能不收敛。 + ] + #algorithm[带位移策略的幂法][ + 我们可以转而用幂法计算 $A' := A - sigma I$ 的特征值,此时需要保证: + - $lambda_1 - sigma$ 是 $A - sigma I$ 的最大特征值 + - $max_(i != 1) abs((lambda_i - sigma) / (lambda_1 - sigma))$ 尽可能小 + 这种加速很直观,但一般来说 $sigma$ 很难选取到合适的值。 + ] + #algorithm[带位移策略的反幂法][ + 如果将幂法用于 $Inv(A)$,就可以求出 $A$ 的模最小特征值: + - 选取初值 $x_0, sigma$ + - $x_(k + 1) = ((A - sigma I)^(-1) x_k)/norm((A - sigma I)^(-1) x_k)$ + 它可以收敛到与 $sigma$ 最近的特征值。如果能取得与某个 $lambda$ 比较近的 $sigma$,则算法收敛将很快。实践上,我们不会求逆,而是对矩阵: + $ + A - sigma I + $ + 作 LU 分解,然后再解线性方程组。 + ] + #algorithm[Rayleigh 商迭代][ + 在上面的方法中,我们希望 $sigma$ 离 $lambda$ 越近越好。事实上,我们可以动态地调整 $sigma$ + - 选取初值 $x_0$ 使得 $norm(x_0) = 1$ + - 在每步中,取: + $ + sigma_k = inner(x_k, A x_k) + $ + 令: + $ + x_(k + 1) = ((A - sigma_k I)^(-1) x_k)/norm((A - sigma_k I)^(-1) x_k) + $ + ] + #theorem[][ + 若 Rayleigh 商迭代收敛到某个单特征值,则其收敛速度至少是二次的。若矩阵 $A$ 是对称的,则收敛速度可以达到局部的三次收敛。 + ] + Rayleigh 商迭代有极快的收敛速度,然而每次要解不同的线性方程组,每步计算量更大。 + == QR 迭代 + #algorithm[QR 迭代][ + - 取初值 $A_1 = A$ + - 对矩阵 $A_k$ 做 $Q R$ 分解,有: + $ + A_(k) = Q_k R_k + $ + - 令: + $ + A_(k + 1) = R_k Q_k + $ + ] + #remark[][ + 事实上有: + $ + A_(k + 1) &= R_k Q_k \ + &= (Q_k^T Q_k) R_k Q_k\ + &= Q_k^T A_k Q_k\ + &= ...\ + &= Q_k^T Q_(k-1)^T ... Q_1^T A Q_1 ... Q_(k-1) Q_k + $ + ] + #let Qt = $tilde(Q)$ + #let Rt = $tilde(R)$ + 接下来,我们记: + $ + Qt_k = Q_1 ... Q_k\ + Rt_k = R_k ... R_1 + $ + 将有: + $ + A_(k + 1) &= Qt_k^T A Qt_k\ + Qt_k Rt_k &= Qt_(k - 1) Q_k R_k Rt_(k-1) = Qt_(k - 1) A_k Rt_(k-1)\ + &= Qt_(k -1) Qt_(k-1)^T A Qt_(k-1) Rt_(k-1)\ + &= A Qt_(k-1) Rt_(k-1)\ + &= ...\ + &= A^k + $ + 这表明 $Qt_k Rt_k e_1 = A^k e_1$,换言之按照幂法当 $k$ 充分大时,$Qt_k Rt_k$ 的第一列将收敛到 $A$ 的模最大特征值对应的特征向量。类似的,有: + $ + A Qt_k &= Qt_k A_(k +1) = Qt_k Q_(k + 1) R_(k + 1) = Qt_(k + 1) R_(k + 1)\ + Qt_(k + 1) &= A Qt_k Inv(R_(k + 1)) = Inv(((A Qt_k Inv(R_(k + 1)))^T))\ + &= Inv((A^T)) Qt_(k) R^T_(k + 1)\ + Qt_(k + 1) e_n &= Inv((A^T)) Qt_(k) R^T_(k + 1) e_n = c_n Inv((A^T)) Qt_(k) e_n\ + &= ... \ + &= c (A^T)^(-k) e_n + $ + 也就是说最后一列和反幂法的现象是一致的。 + #theorem[][ + 设 $A$ 具有 $n$ 个互不相等的特征值: + $ + abs(lambda_1) > abs(lambda_2) > ... > abs(lambda_n) + $ + 设 $n$ 阶方阵 $Y$ 的第 $i$ 行是 $lambda_i$ 对应的特征向量。若 $Y$ 有 LU 分解,则 QR 方法产生的 $A_m$ 对角线以下元素收敛到零,并有第 $i$ 个对角元收敛于 $lambda_i$ + ] + #proof[ + 令 $X = Inv(Y), Lambda = diag(lambda_i)$,则: + $ + A = X Lambda Y\ + A^m = X Lambda^m Y = X Lambda^m L U = X (Lambda^m L Lambda^(-m)) Lambda^m U + $ + 由条件,可以验证: + $ + Lambda^m L Lambda^(-m) -> I + $ + 再令 $X = Q R$,不妨设 $R$ 对角元均正,则: + $ + X (Lambda^m L Lambda^(-m)) Lambda^m U = Q (R Lambda^m L Lambda^(-m) Inv(R)) R Lambda^m U + $ + 当 $m$ 足够大时 $R Lambda^m L Lambda^(-m) Inv(R)$ 也非奇异,对其做 QR 分解得: + $ + (R Lambda^m L Lambda^(-m) Inv(R)) = Q_m R_m + $ + 且: + $ + Q_m -> I\ + R_m -> I + $ + 最终有: + $ + A^m = (Q Q_m)(R^m R Lambda^m U) + $ + 这是 $A^m$ 的一个 QR 分解,而由 QR 分解的唯一性: + $ + Q Q_m D_1 = Qt_m\ + D_1 R_m R Lambda^m U = Rt_m + $ + 其中 $D_1$ 是由 $1, -1$ 组成的对角阵。注意到 $A_m = Qt_m^T A Qt_m$,代入得: + $ + A_m = D_1 Q_m^T Q^T A Q Q_m D_1 = D_1 Q_m^T Q^T Q R Lambda Inv(R) Q^T Q Q_m D_1\ + = D_1 Q_m^T R Lambda Inv(R) Q_m D_1 + $ + 而 $Q_m -> I$,验证发现上式确实趋近于上三角矩阵,且对角元依次就是 $lambda_i$ + ] + #algorithm[带位移的 QR 方法][ + QR 方法也可以进行位移,也就是: + - $A_k - sigma_k I = Q_k R_k$ + - $A_(k + 1) = R_k Q_k + sigma_k I$ + 其中有: + $ + A_(k + 1) = Q_k^T Q_k R_k Q_k + sigma_k I = Q_k^T (A_k - sigma_k I) Q_k + sigma_k I = Q_k^T A_k Q_k + $ + 如何选择 $sigma_k$ 是一个需要考虑的问题。可以设想,若 $sigma_k$ 比较接近最小特征值,则收敛可以明显加速。因此实践中往往取 $sigma_k = A_(k (n n))$ + ] + 一旦矩阵有复特征值,我们就不能希望 QR 迭代收敛于上三角矩阵。此时我们需要借助: + #theorem[实 Schur 分解][ + 设 $A in RR^(n times n)$,则存在正交矩阵 $Q$ 使得: + $ + Q^T A Q = diag(R_1, R_2, ..., R_n) + $ + 其中 $R_i$ 要么是单个特征值,要么是由共轭特征值产生的 $2 times 2$ 块。 + ] + QR 分解和矩阵乘法的复杂度达到 $O(n^3)$,而若迭代进行 $n$ 次,复杂度达到 $O(n^4)$,这是通常不能接受的。 + #definition[上 Hessenberg 矩阵][ + 称矩阵 $H$ 为上 Hessenberg 矩阵,如果 $H_(i j) = 0, i > j + 1$,也就是次对角线以下都为零 + ] + #theorem[][ + 设 $A in RR^(n times n)$,则存在正交矩阵 $Q$ 使得: + $ + Q^T A Q = H + $ + 其中 $H$ 是上 Hessenberg 矩阵。 + ] + #algorithm[][ + 求解上 Hessenberg 分解的方法是: + - 先对 $A$ 的第一列做 Householder 变换,将第一列次对角线以下变为零,设变换矩阵为 $Q$ + - 令 $A_1 = Q A Q^T$,容易验证 $A_1$ 第一列次对角线以下元素为零 + - 再对 $A_1$ 的第一列做 Householder 变换,重复进行以上步骤即可 + ] + 计算量为约为 $14/3 n^3$ + #theorem[][ + - 设 $A$ 是非奇异的上 Hessenberg 矩阵,且 QR 分解后 $A = Q R$,则 $R Q$ 也是上 Hessenberg 矩阵。 + - 若 $A$ 是主对角线和次对角线上没有零的上 Hessenberg 矩阵,它是不可约的 + - 若 $A$ 是不可约的上 Hessenberg 矩阵,则 $R Q$ 也是不可约的上 Hessenberg 矩阵 + ] + 对上 Hessenberg 矩阵进行 QR 分解时,可以考虑利用 $n-1$ 个 Givens 变换,一次迭代计算量仅有 $O(n^2)$ \ No newline at end of file diff --git "a/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\205\253\346\254\241\344\275\234\344\270\232.typ" "b/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\205\253\346\254\241\344\275\234\344\270\232.typ" index d272844..b49b8a4 100644 --- "a/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\205\253\346\254\241\344\275\234\344\270\232.typ" +++ "b/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\205\253\346\254\241\344\275\234\344\270\232.typ" @@ -38,9 +38,14 @@ #let kill = "kill" 我们有: $ - f_2 compose f_1 (x) &= gen_2 union ((gen_1 union (x - kill_1)) - kill_2)\ + f_2 compose f_1 (x) &= (gen_2 union (gen_1 - kill_2)) union (x - (kill_1 union kill_2))\ f_1 sup f_2 (x) &= (gen_1 union (x - kill_1)) sect (gen_2 union (x - kill_2))\ &= (gen_1 sect gen_2) union (x - ((kill_1 - gen_2^C) union (kill_2 - gen_1^C))) $ + 因此仍可用二元组表示函数,并且有: + $ + (gen_2, kill_2) compose (gen_1, kill_1) &= (gen_2 union (gen_1 - kill_2), kill_1 union kill_2)\ + (gen_1, kill_1) sup (gen_2, kill_2) &= (gen_1 sect gen_2, (kill_1 - gen_2^C) union (kill_2 - gen_1^C)) + $ \ No newline at end of file