diff --git a/.gitignore b/.gitignore index f655f7e..355a333 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ **/**.pdf -忽略/** \ No newline at end of file +忽略/** +**/**.zip +**/target \ No newline at end of file diff --git a/template.typ b/template.typ index 62a8f49..7578dbe 100644 --- a/template.typ +++ b/template.typ @@ -36,6 +36,8 @@ #let elasticity(P, Q) = $((diff #Q)/(diff #P))/(#Q / #P)$ #let autoVec3(a, delim: "(" ) = $vec(#a _1, #a _2, #a _3, delim: delim)$ #let autoVecN(a, n, delim: "(" ) = $vec(#a _1, #a _2, dots.v, #a _#n, delim: delim)$ +#let autoVecNF(f, n, delim: "(" ) = $vec(#(f(1)), #(f(2)), dots.v, #(f(n)), delim: delim)$ +#let autoRVecNF(f, n ) = $(#(f(1)), #(f(2)), dots, #(f(n)))$ #let autoMat3(delim: "(", ..var) = { let varList = var.pos() let row(n) = varList.map(v => $#v _#n$) @@ -46,6 +48,15 @@ #let with = "with" #let andC = $" 且 "$ #let orC = $" 或 "$ +#let hb = $hat(bold(beta))$ +#let sb = $bold(beta)^star$ +#let bbeta = $bold(beta)$ +#let balpha = $bold(alpha)$ +#let bgamma = $bold(gamma)$ +#let by = $bold(y)$ +#let bx = $bold(x)$ +#let bu = $bold(u)$ +#let bv = $bold(v)$ #let inner(x, y) = $〈#x, #y〉$ #let HomoCoor = math.vec.with(delim: "[") @@ -383,7 +394,9 @@ // box(scale(160%, origin: bottom + right, sym.square.stroked)) //}) #let note(title: "Note title", author: "Name", logo: none, date: none, - preface: none, code_with_line_number: true, withOutlined: true, withTitle: true, withHeadingNumbering: true, body) = { + preface: none, code_with_line_number: true, withOutlined: true, withTitle: true, withHeadingNumbering: true, + withChapterNewPage: false, + body) = { // Set the document's basic properties. set document(author: (author, ), title: title) set page( @@ -413,7 +426,6 @@ "(" + (counter(heading).get() + (num,)).map(str).join(".") + ")") if withHeadingNumbering == true let headingfunc = (it => it) if withHeadingNumbering == false { - set math.equation(numbering: "(1)") } else { headingfunc = (it => { @@ -436,7 +448,13 @@ // ) // Set paragraph spacing. show par: set block(above: 1.2em, below: 1.2em) - + let headingfunc1 = it => it + if withChapterNewPage == true{ + headingfunc1 = it => { + pagebreak() + it + } + } if withTitle{ // Title page. // The page can contain a logo if you pass one with `logo: "logo.png"`. @@ -475,10 +493,12 @@ } else { outline(depth: 3, indent: true) } - - pagebreak() + if withChapterNewPage == false{ + pagebreak() // 补上目录的换页 + } } } + show heading.where(level: 1) : headingfunc1 // Main body @@ -486,7 +506,6 @@ set heading(numbering: "1.") set heading(numbering: none) if withHeadingNumbering == false - // Code show raw.where(block: false): box.with( fill: luma(240), diff --git "a/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ" "b/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ" index 02ecf2f..02e8129 100644 --- "a/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ" +++ "b/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ" @@ -211,99 +211,219 @@ #definition[蕴含][ 称 $calA$ 蕴含 $calB$,记作 $calA models calB$,若任何 $calA$ 的模型都是 $calB$ 的模型 ] - #pagebreak() -= 形式系统 - #definition[形式(演绎)系统][ - 形式系统指使用符号和规则来推导命题的系统,其中包含以下几个要素: - - 形式语言:用来表达命题的符号 - + 一个字符表 - + 由字符表中的字符组成的有限字符串的一个子集,其中的字符串称为 well-formed formula - - 公理:一组 well-formed formula - - 有限个推理规则集 - ] - #example[][ - 命题逻辑是形式系统,其中: - - 字符集是原子公式集以及 $~, ->, (, )$(括号作为技术性符号,未必需要) - - 公式集是所有的命题形式 - - 公理由一组公理模式(schema)来刻画,对于任何公式 $calA_1, calA_2, calA_3$ 都有: - + $calA_1 -> (calA_2 -> calA_1)$ - + $(calA_1 -> (calA_2 -> calA_3)) -> ((calA_1 -> calA_2) -> (calA_1 -> calA_3))$ - + $(not1 calA_1 -> not1 calA_2) -> (calA_2 -> calA_1)$ - - 推理规则包括: - + $calA, (calA -> calB) => calA$ #align(end, [分离规则(MP)]) - 当然命题逻辑定义为形式系统的方式并不唯一 - ] - #remark[][ - $calA$ 等并非 $calL_0$ 中语言,而是元语言中符号。在计算机系统中,会使用 quasi-quote 来将元语言中的符号转化为 $calL_0$ 中的符号,例如 $`calA`$ ,该课程中省略该过程。 - ] - 公理系统是一种证明论(proof theory),证明论中当然还有其他与公理系统等价的系统。Euclid 的几何学是一个公理系统,但是在 19 世纪被发现是不完备的。 - #definition[证明][ - #calL 中的一个证明是指一个公式序列 $calA_i, i = 1, 2, ..., n$,其中要么 $calA_i$ 是公理,要么可以从之前的公式通过推理规则推导出来。$calA_n$ 称为一个定理,亦称 $calA_n$ 可证。 - ] - #definition[演绎][ - 令 $Gamma$ 是 $L$ 中的公式集,若有一个公式序列 $calA_i, i = 1, 2, ..., n$,其中要么 $calA_i in L$,要么 $calA_i$ 是公理,要么可以从之前的公式通过推理规则推导出来。$calA_n$ 称为从 $Gamma$ 可演绎的,记作 $Gamma tack calA_n$。特别的,若 $calA$ 是定理,则有 $emptyset tack calA$ 也记作 $tack calA$ - ] - #theorem[演绎定理][ - 若 $Gamma union {calA} tack calB$,则 $Gamma tack calA -> calB$ - ] - #proof[ - 使用结构归纳法,对 $Gamma union {calA} tack calB$ 的一个演绎 $L$ 做归纳:\ - #pattern-match[ - match L with - - #align_left[#fA =>(此时 $calB$ 是公理),考虑:] - #deduction[ - + $calB := fA$ - + $calB -> (calA -> calB) := fA$ - + $calA -> calB := #MPb(1, 2)$ + == 形式系统 + === 语法 + #definition[形式(演绎)系统][ + 形式系统指使用符号和规则来推导命题的系统,其中包含以下几个要素: + - 形式语言:用来表达命题的符号 + + 一个字符表 + + 由字符表中的字符组成的有限字符串的一个子集,其中的字符串称为 well-formed formula + - 公理:一组 well-formed formula + - 有限个推理规则集 + ] + #example[][ + 命题逻辑是形式系统,其中: + - 字符集是原子公式集(至多可数)以及 $~, ->, (, )$(括号作为技术性符号,未必需要) + - 公式集是所有的命题形式 + - 公理由一组公理模式(schema)来刻画,对于任何公式 $calA_1, calA_2, calA_3$ 都有: + + $calA_1 -> (calA_2 -> calA_1)$ + + $(calA_1 -> (calA_2 -> calA_3)) -> ((calA_1 -> calA_2) -> (calA_1 -> calA_3))$ + + $(not1 calA_1 -> not1 calA_2) -> (calA_2 -> calA_1)$ + - 推理规则包括: + + $calA, (calA -> calB) => calA$ #align(end, [分离规则(MP)]) + 当然命题逻辑定义为形式系统的方式并不唯一 + ] + #remark[][ + $calA$ 等并非 $calL_0$ 中语言,而是元语言中符号。在计算机系统中,会使用 quasi-quote 来将元语言中的符号转化为 $calL_0$ 中的符号,例如 $`calA`$ ,该课程中省略该过程。 + ] + 公理系统是一种证明论(proof theory),证明论中当然还有其他与公理系统等价的系统。Euclid 的几何学是一个公理系统,但是在 19 世纪被发现是不完备的。 + #definition[证明][ + #calL 中的一个证明是指一个公式序列 $calA_i, i = 1, 2, ..., n$,其中要么 $calA_i$ 是公理,要么可以从之前的公式通过推理规则推导出来。$calA_n$ 称为一个定理,亦称 $calA_n$ 可证。 + ] + #definition[演绎][ + 令 $Gamma$ 是 $L$ 中的公式集,若有一个公式序列 $calA_i, i = 1, 2, ..., n$,其中要么 $calA_i in L$,要么 $calA_i$ 是公理,要么可以从之前的公式通过推理规则推导出来。$calA_n$ 称为从 $Gamma$ 可演绎的,记作 $Gamma tack calA_n$。特别的,若 $calA$ 是定理,则有 $emptyset tack calA$ 也记作 $tack calA$ + ] + #theorem[演绎定理][ + 若 $Gamma union {calA} tack calB$,则 $Gamma tack calA -> calB$ + ] + #proof[ + 使用结构归纳法,对 $Gamma union {calA} tack calB$ 的一个演绎 $L$ 做归纳:\ + #pattern-match[ + match L with + - #align_left[#fA =>(此时 $calB$ 是公理),考虑:] + #deduction[ + + $calB := fA$ + + $calB -> (calA -> calB) := fA$ + + $calA -> calB := #MPb(1, 2)$ + ] + - #align_left[#fS => ] + + #align_left[若 $B in Gamma$,则 $Gamma tack calA -> calB$ 显然] + + #align_left[若 $B = A$,只需使用 $tack calA -> calA$ 即可] + - #align_left[#MP("a", "b") => (此时 $b = a -> calB$)] + 由归纳法,有 $Gamma tack calA -> a, Gamma tack calA -> (a -> calB)$,只需证明: + $ + {calA -> a, calA -> (a -> calB)} tack calA -> calB + $ + 来自于: + #deduction[ + + $calA -> a := fS$ + + $calA -> (a -> calB) := fS$ + + $(calA -> (a -> calB)) -> ((calA -> a) -> (calA -> calB)) := fA$ + + #indent(1) $(calA -> a) -> (calA -> calB) := #MPb(3, 2)$ + + #indent(2) $calA -> calB := #MPb(4, 1)$ + ] ] - - #align_left[#fS => ] - + #align_left[若 $B in Gamma$,则 $Gamma tack calA -> calB$ 显然] - + #align_left[若 $B = A$,只需使用 $tack calA -> calA$ 即可] - - #align_left[#MP("a", "b") => (此时 $b = a -> calB$)] - 由归纳法,有 $Gamma tack calA -> a, Gamma tack calA -> (a -> calB)$,只需证明: + + ] + #theorem[演绎定理2][ + 若 $Gamma tack calA -> calB$,则 $Gamma union {calA} tack calB$ + ] + #example[假言三段论 HS/传递性][ + 证明: $ - {calA -> a, calA -> (a -> calB)} tack calA -> calB - $ - 来自于: - #deduction[ - + $calA -> a := fS$ - + $calA -> (a -> calB) := fS$ - + $(calA -> (a -> calB)) -> ((calA -> a) -> (calA -> calB)) := fA$ - + #indent(1) $(calA -> a) -> (calA -> calB) := #MPb(3, 2)$ - + #indent(2) $calA -> calB := #MPb(4, 1)$ + calA -> calB, calB -> calC tack calA -> calC + $ + ] + #proof[ + 容易证明 $calA, calA -> calB, calB -> calC tack calC$,应用演绎定理即可 + ] + #definition[][ + 设 $Gamma$ 是公式集: + - 若存在 $calA$ 使得 $Gamma tack calA, Gamma tack not1 calA$,则称 $Gamma$ 是不一致的,否则是一致的 + - 若公式 $calA$ 满足 $Gamma tack.not calA, Gamma tack.not not1 calA$,则称 $calA$ 与 $Gamma$ 独立 + - 若 $calA, not1 calA in Gamma$ 则 $Gamma tack$ 任何公式,则称形式系统平凡 + - 若 $Gamma' subset Gamma, Gamma' tack calA$,则 $Gamma tack calA$ ,则称形式系统单调 + ] + #proposition[][ + 公式集不一致当且仅当对于任意公式 $calA$,都有 $Gamma tack calA$ + ] + #definition[极大一致][ + - 称公式集 $Gamma$ 是极大一致(MC)的,如果它是极大的一致集。 + - 称 $Gamma' subset Gamma$ 是极大一致子集,如果 $Gamma'$ 是 $Gamma$ 中极大的一致子集 + ] + #theorem[][ + 若 $Gamma$ 是极大一致的,则对于任意 $calA$ 均有 $Gamma tack calA$ 或 $Gamma tack not1 calA$ + ] + #example[极大一致推理][ + 记 $Gamma tack_"MCS" calA$ ,如果对于任何极大一致子集 $Gamma'$,都有 $Gamma' tack calA$,该推理既不平凡也不单调 + ] + === 语义 + 这里,我们重新形式地引入之前的真值指派思想。它描述了命题逻辑的语义。 + #definition[赋值][ + 称一个从 $L$ 的公式到 ${T, F}$ 的函数 $v$ 为一个赋值,如果: + $ + v(not1 calA) = not1 v(calA)\ + v(calA -> calB) = T "if and only if" v(calA) = F "or" v(calB) = T + $ + ] + #definition[模型][ + 令 $v$ 是 $L$ 的一个赋值,若 $v(calA) = T$,则称 $v$ 是 $calA$ 的一个模型,或 $v$ 使 $calA$ 成真,亦称 $v$ 满足 $calA$,记作 $v models calA$ + ] + #definition[重言式][ + 若 $v models calA$ 对任意赋值 $v$ 都成立,则称 $calA$ 是重言式,记作 $models calA$ + ] + + == 完全性定理 + 前面的形式系统使用的是证明论的思想,使用 $tack$,是指纯粹语法的推断。而之前的章节从真值指派看待命题逻辑,使用的是模型论的思想,使用 $models$,指通过语义进行的分析。完全性定理即是说,这两种思想是等价的。 + #theorem[命题逻辑的可靠性(soundness)][ + 若 $ tack calA$,则 $ models calA$ + ] + #proof[ + 设 $ tack calA$ 的推理序列为 $L$ + #pattern-match[ + match L with + - #align_left[#fA => 逐一验证公理都是重言式即可] + - #align_left[#MP("a", "b") => 此时 $b = a -> calB$] + 由归纳法,$a, b$ 都是重言式,不难验证 $b space a : calB$ 当然也是重言式 ] ] + #theorem[命题逻辑的一致性][ + $L$ 是一致的 + ] + #proof[ + 假设 $L$ 不一致,则存在 $L$ 的公式 $calA$ 使得 $calA, not1 calA$ 都是定理,由 @proposition-soundness 可知 $models calA, models not1 calA$,这与赋值的定义矛盾。 + ] + #definition[扩充][ + - $L$ 的一个扩充是指修改或扩大 $L$ 的公理组,使得 $L$ 中的每一个公理仍然是公式得到的形式系统。 + - 称 $L$ 的一个扩充是一致的,若不存在 $L$ 的公式 $calA$ 使得 $calA, not1 calA$ 都是这个扩充的定理。显然扩充是一致的当且仅当不是所有的公式都是定理。 + ] + #proposition[][ + 设 $L^*$ 是一个扩充,$calA$ 不是 $L^*$ 中定理,则 $L^*$ 中增加 $not1 calA$ 得到的形式系统 $L^(**)$ 也是一致的。 + ] + #definition[][ + 称 $L$ 的一个扩充是完全的,如果每个公式 $calA$ ,都满足 $calA, not1 calA$ 其中至少有一个是定理。显然一致的完全扩充是一个极大的一致扩充。 + ] + #theorem[][ + 令 $L^*$ 是一致扩充,则存在 $L^*$ 的一致完全扩充。 + ] + #proof[ + 枚举所有公式 $calA_1, ..., calA_n, ...$,记: + - $A_0$ 是 $L^*$ 的公理 + - 将 $L^*$ 的公理扩充至 $A_i$ 后: + - 若 $calA_i$ 或 $not1 calA_i$ 是定理,则 $A_(i + 1) = A_i$ + - 否则,$A_(i + 1) = A_i union {not1 calA_i}$ + 由 @consistency-extension,每个 $A_i$ 产生的扩充都是一致的。取 $union.big A_i$ 作为新的公理集,则这个扩充是: + - 一致的,既然每个定理都只能用到有限个公理,因此若 $calA, not1 calA$ 都是定理,它们只能用到有限个公理,因此存在某个 $A_i$ 使得 $A_i tack calA, not1 calA$,矛盾 + - 完全的,显然每个公式都满足 $calA, not1 calA$ 其中至少有一个是定理 + ] + #proposition[][ + 若 $L^*$ 是 $L$ 的一个一致扩充,则存在一个赋值使得 $L^*$ 的每个定理取值均为 $T$ + ] + #proof[ + 设 $L'$ 是 $L^*$ 的一致完全扩充,定义赋值如下: + $ + v(calA) = T "if and only if" tack_(L^') calA + $ + $L'$ 的一致完全性保证了定义是合理的。 - ] - #theorem[演绎定理2][ - 若 $Gamma tack calA -> calB$,则 $Gamma union {calA} tack calB$ - ] - #example[假言三段论 HS/传递性][ - 证明: - $ - calA -> calB, calB -> calC tack calA -> calC - $ - ] - #proof[ - 容易证明 $calA, calA -> calB, calB -> calC tack calC$,应用演绎定理即可 - ] - #definition[][ - 设 $Gamma$ 是公式集: - - 若存在 $calA$ 使得 $Gamma tack calA, Gamma tack not1 calA$,则称 $Gamma$ 是不一致的,否则是一致的 - - 若公式 $calA$ 满足 $Gamma tack.not calA, Gamma tack.not not1 calA$,则称 $calA$ 与 $Gamma$ 独立 - - 若 $calA, not1 calA in Gamma$ 则 $Gamma tack$ 任何公式,则称形式系统平凡 - - 若 $Gamma' subset Gamma, Gamma' tack calA$,则 $Gamma tack calA$ ,则称形式系统单调 - ] - #proposition[][ - 公式集不一致当且仅当对于任意公式 $calA$,都有 $Gamma tack calA$ - ] - #definition[极大一致][ - - 称公式集 $Gamma$ 是极大一致(MC)的,如果它是极大的一致集。 - - 称 $Gamma' subset Gamma$ 是极大一致子集,如果 $Gamma'$ 是 $Gamma$ 中极大的一致子集 - ] - #theorem[][ - 若 $Gamma$ 是极大一致的,则对于任意 $calA$ 均有 $Gamma tack calA$ 或 $Gamma tack not1 calA$ - ] - #example[极大一致推理][ - 记 $Gamma tack_"MCS" calA$ ,如果对于任何极大一致子集 $Gamma'$,都有 $Gamma' tack calA$,该推理既不平凡也不单调 - ] \ No newline at end of file + 为了证明它是赋值,只需证明: + $ + v(calA -> calB) = T "if and only if" v(calA) = F "or" v(calB) = T + $ + 简单按定义验证即可。这个赋值就满足题目要求。 + ] + #theorem[命题逻辑的完全性(completeness)][ + 若 $Gamma models calA$,则 $Gamma tack calA$ + ] + #proof[ + 设 $calA$ 是重言式,若其不是定理,则由 @consistency-extension 将 $not1 calA$ 作为公理进行扩充得到一致系统,由 @consistency-extension-value 存在赋值 $v$ 使得 $L^*$ 中每个定理取值均为 $T$,特别的 $v(not1 calA) = T$,然而由重言式定义 $v(calA) = T$,矛盾。 + ] + #theorem[可判定性定理][ + $L$ 中的公式是否是定理是可判定的 + ] + #proof[ + 由完备性定理,只需枚举所有赋值即可。 + ] + == 直觉主义逻辑 + #definition[直觉主义逻辑][ + 直觉主义逻辑由如下资料定义: + - 字符集:公式集及 ${top(真), bot(假), and, or, ->}$ + - 公理模式: + #enum(numbering: + (nums => "(I" + str(nums) + ")") + + )[$calA -> (calB -> calA)$][ + $(calA -> (calB -> calC)) -> ((calA -> calB) -> (calA -> calC))$ + ][ + $(calA and calB) -> calA$ + ][ + $(calA and calB) -> calB$ + ][ + $calA -> (calB -> (calA and calB))$ + ][ + $calA -> (calA or calB)$ + ][ + $calB -> (calA or calB)$ + ][ + $(calA -> calC) -> ((calB -> calC) -> ((calA or calB) -> calC))$ + ][ + $(calA -> calC) -> ((calB -> calC) -> ((calB or calA) -> calC))$ + ][ + $bot -> calA$ + ] + - 推理规则:MP + ] + 主流数学仍是基于经典逻辑的,但是其中有些分支也应用了直觉主义逻辑。 + + + \ No newline at end of file diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ" index 5798db2..a870f9c 100644 --- "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ" +++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ" @@ -6,6 +6,7 @@ date: datetime.today().display(), logo: none, ) + = 前言 - 教师:吴磊(leiwu\@math.pku.edu.cn) - 成绩:作业 40% + 大作业 60%(两到三人组) @@ -38,6 +39,7 @@ 通过训练集得到的近似函数在训练集之外往往一定程度上有效,这被称为泛化。泛化的成立根本上依赖于问题本身具有的某些良好性质,例如连续性,可微性等等。然而随着提供的训练数据在高维空间中迅速变得稀疏,这些简单直观的性质提供的泛化能力往往会在维度升高时迅速衰减,这被称为维度灾难(curse of dimensionality)。 = 传统机器学习 == 线性回归 + #let hR = $hat(cal(R))$ 使用线性函数: $ f_i (x) = beta^T x + beta_0 @@ -46,7 +48,67 @@ $ f_i (x) = (beta^T, beta_0) vec(x, 1) $ - 使用均方误差时,它有解析解。 + 定义均方误差: + $ + hR_n (beta) = 1/n sum_(j=1)^n 1/2 (beta^T x_j - y_j)^2 = 1/(2 n) norm(vec(x_1^T, dots.v, x_n^T) beta - y) + $ + #algorithm[OLS 普通最小二乘][ + 求解: + $ + 0 = nabla hR_n (beta) + $ + 得到: + $ + 1/n sum_(j=1)^n (beta^T x_j - y_j) x_j &= 0\ + sum_(j=1)^n (beta^T x_j) x_j &= sum_(j=1)^n y_j x_j\ + sum_(j=1)^n x_j (beta^T x_j) &= sum_(j=1)^n y_j x_j\ + sum_(j=1)^n x_j x_j^T beta &= (x_1, ..., x_n) y\ + (sum_(j=1)^n x_j x_j^T) beta &= (x_1, ..., x_n) y\ + (x_1, ..., x_n) vec(x_1^T, dots.v, x_n^T) beta &= (x_1, ..., x_n) y\ + $ + 记 $X = vec(x_1^T, dots.v, x_n^T)$,上式化为: + $ + X^T X beta = X^T y + $ + 假设 $X$ 满秩,则上式有唯一解: + $ + hat(beta) = (X^T X)^(-1) X^T y + $ + 否则,需要找到一个最优解。例如许多时候我们希望找到一个最小范数的解,这被称为 Ridge Regression,也就是求解以下问题: + $ + min_(beta in RR^d) norm(beta) \ + s.t. X beta = y + $ + 其中 $n < d, X in RR^(n times d) "满秩", y in RR^n$,此时可以解得解析解: + $ + hb = X^T (X X^T)^(-1) by + $ + #proof[ + 由条件,方程显然有解。设 $bbeta = X^T bu + bv$,代入方程有: + $ + X (X^T bu + bv) &= by\ + bu &= (X X^T)^(-1) (by - X bv)\ + norm(bbeta)^2 &= (bu^T X + bv^T)(X^T bu + bv)\ + &= bu^T X X^T bu + bu^T X bv + bv^T X^T bu + bv^T bv\ + &= (by^T - bv^T X^T)(X X^T)^(-1)(by - X bv) + (by^T - bv^T X^T) (X X^T)^(-1) X bv + bv^T X^T (X X^T)^(-1) (by - X bv) + bv^T bv\ + &= bv^T (I - X^T (X X^T)^(-1) X) bv + by^T (X X^T)^(-1) by + $ + #lemmaLinear[][ + $I - X^T (X X^T)^(-1) X$ 是半正定矩阵 + ] + #proof[ + 令 $P = X^T (X X^T)^(-1) X$,注意到: + - $P^2 = P$ + - $P = P^T$ + 因此: + $ + (I - P)^2 = I - 2P + P^2 = I - P\ + x^T (I - P) x = x^T (I - P)^2 x = x^T (I -P)^T (I - P) x = norm((I- P) x) >= 0 + $ + ] + 由引理,显然上式取得最小值当且仅当 $bv = 0$,此时 $bbeta = x^T (X X^T)^(-1) by$ + ] + ] #example[][ 通常情况下数据是有噪声的,这时需要采用正则化方法,例如: - Ridge Regression:$min_(beta) sum_(i=1)^n (y_i - beta^T x_i)^2 + lambda norm(beta)_2^2$ diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw2.typ" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw2.typ" new file mode 100644 index 0000000..555d875 --- /dev/null +++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw2.typ" @@ -0,0 +1,243 @@ +#import "../../template.typ": proof, note, corollary, lemma, theorem, definition, example, remark +#import "../../template.typ": * +#import "../main.typ": * +#show: note.with( + title: "作业2", + author: "YHTQ", + date: datetime.today().display(), + logo: none, + withOutlined : false, + withTitle : false, +) +#set page(paper: "a3") +#let ltC = math.lt.tilde +#let empty = "" += #empty + + #let yi(i) = $bx_(#i)^T sb + epsilon$ + == #empty + $ + hb &= (X^T X)^(-1) X^T by\ + &= (X^T X)^(-1) X^T autoVecNF(yi, n)\ + &= (X^T X)^(-1) X^T (#autoVecNF(i => $bx_(#i)^T sb$, $n$) + #autoVecNF(i => $epsilon$, $n$))\ + &= (X^T X)^(-1) X^T (#autoVecNF(i => $bx_(#i)^T$, $n$) sb + #autoVecNF(i => $epsilon$, $n$))\ + &= (X^T X)^(-1) X^T (X sb + #autoVecNF(i => $epsilon$, $n$))\ + &= sb + (X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$)\ + $ + 因此: + $ + E(hb) = E(sb) + E((X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$)) = sb + E((X^T X)^(-1) X^T) E#autoVecNF(i => $epsilon$, $n$) = sb + $ + == #empty + #lemmaLinear[][ + 设 $X$ 是矩阵随机变量,则 $E(tr(X)) = tr(E(X))$ + ] + #proof[ + $ + E(tr(X)) = E(sum_i X_(i i)) = sum_i E(X_(i i)) = tr(E(X)) + $ + ] + $ + E(norm(hb - sb)^2) &= E(norm((X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$))^2)\ + &= 1/n E(((X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$))^T ((X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$)))\ + &= 1/n E(#autoRVecNF(i => $epsilon$, $n$) X (X^T X)^(-1) (X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$))\ + &= 1/n E(tr(#autoRVecNF(i => $epsilon$, $n$) X (X^T X)^(-1) (X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$)))\ + &= 1/n E(tr( X (X^T X)^(-1) (X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$) #autoRVecNF(i => $epsilon$, $n$) ))\ + &= 1/n tr(E( X (X^T X)^(-1) (X^T X)^(-1) X^T #autoVecNF(i => $epsilon$, $n$) #autoRVecNF(i => $epsilon$, $n$) ))\ + &= 1/n tr(E( X (X^T X)^(-1) (X^T X)^(-1) X^T) E( #autoVecNF(i => $epsilon$, $n$) #autoRVecNF(i => $epsilon$, $n$) ))\ + &= sigma^2 tr(E( X (X^T X)^(-1) (X^T X)^(-1) X^T))\ + &= sigma^2 E(tr( X (X^T X)^(-1) (X^T X)^(-1) X^T))\ + &= sigma^2 E(tr( (X^T X)^(-1) X^T X (X^T X)^(-1)))\ + &= sigma^2 E(tr (X^T X)^(-1))\ + &= sigma^2 tr(E (X^T X)^(-1))\ + $ + == #empty + 注意到 $X^T$ 是 $d times n$ 矩阵,其每一列服从 $N(0, I_d)$,因此有: + $ + X^T (X^T)^T = X^T X tilde W_d (I, n) + $ + 进而: + $ + (X^T X)^(-1) tilde W^(-1)_d (I, n) + $ + 由 $n > d$ 有: + $ + E((X^T X)^(-1)) = 1/(n - d - 1) I\ + tr(E((X^T X)^(-1))) = d/(n - d - 1) + $ += #empty + 由条件,方程显然有解。设 $bbeta = X^T bu + bv$,代入方程有: + $ + &X (X^T bu + bv) = by\ + bu &= (X X^T)^(-1) (by - X bv)\ + norm(bbeta)^2 &= (bu^T X + bv^T)(X^T bu + bv)\ + &= bu^T X X^T bu + bu^T X bv + bv^T X^T bu + bv^T bv\ + &= (by^T - bv^T X^T)(X X^T)^(-1)(by - X bv) + (by^T - bv^T X^T) (X X^T)^(-1) X bv + bv^T X^T (X X^T)^(-1) (by - X bv) + bv^T bv\ + &= bv^T (I - X^T (X X^T)^(-1) X) bv + by^T (X X^T)^(-1) by + $ + #lemmaLinear[][ + $I - X^T (X X^T)^(-1) X$ 是半正定矩阵 + ] + #proof[ + 令 $P = X^T (X X^T)^(-1) X$,注意到: + - $P^2 = P$ + - $P = P^T$ + 因此: + $ + (I - P)^2 = I - 2P + P^2 = I - P\ + x^T (I - P) x = x^T (I - P)^2 x = x^T (I -P)^T (I - P) x = norm((I- P) x) >= 0 + $ + ] + 由引理,显然上式取得最小值当且仅当 $bv = 0$,此时 $bbeta = x^T (X X^T)^(-1) by$ += + == + #let bb1 = $bbeta_1$ + #let bb2 = $bbeta_2$ + 注意到函数: + $ + funcDef(f, RR^d, RR, bbeta, norm(by - X bbeta)^2_2 + lambda norm(bbeta)_1^2) + $ + 是两个凸函数的和。假设 $bb1, bb2$ 是两个最小值点,应当有: + $ + f((bb1 + bb2)/2) >= (f(bb1) + f(bb2))/2 + $ + 结合凸性,这意味着: + $ + f((bb1 + bb2)/2) = (f(bb1) + f(bb2))/2 + $ + 而: + $ + norm(by - X ((bb1 + bb2)/2))^2_2 <= (norm(by - X bb1)^2_2 + norm(by - X bb2)^2_2)/2\ + norm((bb1 + bb2)/2)_1 <= (norm(bb1)_1 + norm(bb2)_1)/2 + $ + 由等式,两者分别取等号。对前式化简: + $ + norm(by - X ((bb1 + bb2)/2))^2_2 &= (norm(by - X bb1)^2_2 + norm(by - X bb2)^2_2)/2\ + 1/4 (bb1^T + bb2^T) X^T X (bb1 + bb2) - 1/2 (y^T (bb1 + bb2)X + X^T (bb1^T + bb2^T) y) + y^T y&= (norm(by - X bb1)^2_2 + norm(by - X bb2)^2_2)/2\ + (bb1^T - bb2^T) X^T X (bb1 - bb2) &= 0\ + (X(bb1 - bb2))^T X(bb1 - bb2) &= 0\ + X(bb1 - bb2) &= 0\ + $ + 表明 $norm(by - X bb1)^2_2 = norm(by - X bb2)^2_2$,自然将有 $norm(bb1)_1 = norm(bb2)_1 $ + == + #let p1(y) = $norm(by -X #y)^2_2$ + #let p2(y) = $norm(#y)_1$ + 设 $bb1, bb2$ 分别是 $S_1(lambda), S_2(phi(lambda))$ 的解,分别证明它们是对方的解。 + #lemmaLinear[][ $p1(bb1) = p1(bb2), p2(bb1) = p2(bb2)$] + #proof[ + 由 $S_2(phi(lambda))$ 的条件不难发现: + $ + p2(bb2) <= p2(bb1) + $ + 由 $bb2$ 是其最小值及 $p2(bb1) <= p2(bb1)$ 得: + $ + p1(bb2) <= p1(bb1) \ + lambda p1(bb2) <= lambda p1(bb1) \ + $ + 两不等式相加,得: + $ + p1(bb2) + lambda p2(bb2) <= p1(bb1) + lambda p2(bb1) + $ + 然而由于 $bb1$ 是 $S_1 (lambda)$ 的最小值,应当有: + $ + p1(bb2) + lambda p2(bb2) >= p1(bb1) + lambda p2(bb1) + $ + 因此以上不等式全部取等,命题得证。 + ] + 由引理,结论显然。 += #empty + 不难验证函数 $f(t) = 1/2(x - t)^2 + lambda^2/2 norm(t)_0$是仅有两段的分段可导函数,并且 $t -> infinity$ 时 $f(t) -> +infinity$,一定有最小值。可以得到 $f$ 的极小值点仅可能是 $0, x$,因此 $f$ 的最小值为: + $ + cases( + f(t) = lambda^2/2 "if" f(t) <= f(0), + f(0) = 1/2 x^2 "if" f(t) > f(0) + ) + $ + 化简即得所求 += #empty + #lemmaLinear[][ + $k(x, y) = f(x) f(y)$ 一定是半正定函数 + ] + #proof[ + #let fi(i) = $f(x_#i)$ + 不难发现: + $ + (f(x_i) f(x_j))_(i j) = #autoVecNF(fi, $n$) #autoRVecNF(fi, $n$) + $ + 这当然是半正定矩阵 + ] + == #empty + #let sum1 = $sum_(i j)$ + $ + cos(x - y) = cos x cos y + sin x sin y + $ + 由引理 $cos x cos y, sin x sin y$ 都半正定,因此 $cos(x - y)$ 也是半正定的 + == #empty + 同上 + == #empty + 注意到对应矩阵是柯西矩阵,其行列式为: + $ + det (1/(x_i + x_j)) = (product_(i > 1, j < i) (x_i - x_j)^2 )/(product_(i j) (x_i + x_j) ) >= 0 + $ + 同时,其主子式也有以上形式,不难看出: + - 若 $i != j => x_i != x_j$,则所有顺序主子式均正,矩阵正定 + - 否则,注意到 ${x | i != j => x_i != x_j}$ 在 $RR^n$ 中稠密,而半正定矩阵的极限仍然半正定,逼近即可。 + == #empty + $ + e^(-norm(x - y)_1) = product_k e^(-abs(x_(k))) e^(-abs(y_(k))) = (product_k e^(-abs(x_(k)))) (product_k e^(-abs(y_(k)))) + $ + 由引理是半正定的 + == #empty + 注意到 $k(x, y)$ 对应矩阵就是 $k_1 (x, y), k_2 (x, y)$ 对应矩阵的 Hadamard 积,因此由 Schur product theorem 确实是半正定的。 += #empty + == #empty + #let px = $phi(x\;omega)$ + #let pxj = $phi(x\;omega_j)$ + #let pxk = $phi(x\;omega_k)$ + #let py = $phi(y\;omega)$ + #let pyj = $phi(y\;omega_j)$ + #let pyk = $phi(y\;omega_k)$ + $ + E_W sqrt(E (k(x, y) - k_m (x, y))^2) &= E sqrt(E_(x, y) (E_omega (px py) - 1/m sum_j pxj pyj)^2)\ + &<= sqrt(E_(x, y, W) (E_omega (px py) - 1/m sum_j pxj pyj)^2)\ + &= sqrt(E_(x, y) ((E_omega (px py))^2 + E_W (1/m sum_j pxj pyj)^2 - 2 (E_omega (px py))^2))\ + &= sqrt(E_(x, y) ((E_omega (px py))^2 + (sum_(j) E_W (pxj pyj)^2 + sum_(j k) (E_W (pxj pyj))(E_W (pxk pyk)) )/m^2 - 2 (E_omega (px py))^2))\ + &= sqrt(E_(x, y) ( m E_omega (px py)^2 + (m^2-m)/2 (E_omega (px py))^2 )/m^2 - (E_omega (px py))^2)\ + &= sqrt(E_(x, y) ( m E_omega (px py)^2 - (m^2+m)/2 (E_omega (px py))^2 )/m^2 )\ + &<= sqrt(E_(x, y) ( E_omega (px py)^2 )/m )\ + &= sqrt(Q / m) + $ + == #empty + 从上一问可以得到: + $ + E epsilon_m (W) <= C^2/sqrt(m) + $ + 注意到 $epsilon_m (W)$ 事实上是 2- 范数,因此可以使用三角不等式: + $ + D_i epsilon_m (W) + &= epsilon_m (W_1) - epsilon_m (W_2) \ + &<= epsilon_m (W_1 - W_2)\ + &= sqrt(E_(x, y) (1/m (phi(x\;omega_1)phi(y\;omega_1) - phi(x\;omega_2)phi(y\;omega_2)))^2)\ + &<= sqrt(E_(x, y) (2 C^2/m)^2)\ + &= 2 C^2/m\ + + $ + 由 McDiarmid's inequality,有: + $ + P(abs(epsilon_m (W) - E epsilon_m (W)) >= t) <= 2 e^(-t^2/(2 sigma^2)) + $ + 其中 $sigma^2 = 1/4 sum_(i = 1)^m 4/m^2 C^4 = C^4/m$,因此: + $ + P(abs(epsilon_m (W) - E epsilon_m (W)) >= t) <= 2 e^(-(m t^2)/(2 C^4))\ + P(epsilon_m (W) >= C^2/sqrt(m) + t) <= P(abs(epsilon_m (W) - E epsilon_m (W)) >= t) <= 2 e^(-(m t^2)/(2 C^4))\ + $ + 令 $2 e^(-(m t^2)/(2 C^4)) = delta$,解得: + $ + - (m t^2)/(2 C^4) = ln (delta/2)\ + t = sqrt(2 C^4 ln (2/delta))/sqrt(m) + $ + 整理即得: + $ + P(epsilon_m (W) >= C^2/sqrt(m) (1 + 2 ln (2/delta))) <= delta + $ + 证毕 diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/Cargo.lock" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/Cargo.lock" new file mode 100644 index 0000000..db79400 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/Cargo.lock" @@ -0,0 +1,315 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bytemuck" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hw1" +version = "0.1.0" +dependencies = [ + "nalgebra", +] + +[[package]] +name = "libc" +version = "0.2.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "matrixmultiply" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "nalgebra" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c4b5f057b303842cf3262c27e465f4c303572e7f6b0648f60e16248ac3397f4" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "safe_arch" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3460605018fdc9612bce72735cba0d27efbcd9904780d44c7e3a9948f96148a" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "simba" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3a386a501cd104797982c15ae17aafe8b9261315b5d07e3ec803f2ea26be0fa" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + +[[package]] +name = "syn" +version = "2.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wide" +version = "0.7.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b828f995bf1e9622031f8009f8481a85406ce1f4d4588ff746d872043e855690" +dependencies = [ + "bytemuck", + "safe_arch", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/Cargo.toml" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/Cargo.toml" new file mode 100644 index 0000000..825d5ba --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/Cargo.toml" @@ -0,0 +1,7 @@ +[package] +name = "hw1" +version = "0.1.0" +edition = "2021" + +[dependencies] +nalgebra = { version = "*", features = ["rand"]} diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/hw/hw1.typ" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/hw1.typ" similarity index 66% rename from "\350\256\241\347\256\227\346\226\271\346\263\225B/hw/hw1.typ" rename to "\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/hw1.typ" index d3cd09d..72b0eeb 100644 --- "a/\350\256\241\347\256\227\346\226\271\346\263\225B/hw/hw1.typ" +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/hw1.typ" @@ -26,6 +26,7 @@ } } ``` + 计算量约为 $n^2$ = 4. 不难发现: $ @@ -58,4 +59,40 @@ & <= abs(a_(k k)) - (a_(k 1)) / (a_(1 1)) a_(1 k)\ & = a'_(k k)\ $ - 证毕 \ No newline at end of file + 证毕 += 10. + 无妨设 $a_(1 1) = 1$,并设: + $ + A = mat(1, a_(1 2)^T; a_(2 1), A_(2 2)) + $ + 则有: + $ + A_2 = A_(2 2) - a_(2 1) a_(1 2)^T + $ + 任取非零向量 $x$,则: + $ + x^T A_2 x = x^T (A_(2 2)) x - (x^T a_(2 1)) (x^T a_(1 2)) + $ + 而: + $ + (t, x^T) A vec(t, x) + &= (t, x) mat(1, a_(1 2)^T; a_(2 1), A_(2 2)) vec(t, x)\ + &= t^2 + (x^T a_(2 1) + a_(1 2)^T x) t + x^T A_(2 2) x + $ + 对于任何 $t$ 都正,换言之: + $ + 4 x^T A_(2 2) x > (x^T a_(2 1) + a_(1 2)^T x)^2 > 4 (x^T a_(2 1)) (x^T a_(1 2)) => x^T a_2 x > 0 + $ + 证毕 += 14. + 注意到: + $ + A X = e_j + $ + 的解 $X$ 就是 $Inv(A)$ 的第 $j$ 列,而已知 LU 分解,上面方程是容易解出的,因此 $Inv(A)$ 的 $i j$ 元也容易得到。 += 19. + 将等式 $A = L L^T$ 分块分解为: + $ + mat(A_i, B^T; B, C) = mat(L_i, 0; B, I) mat(L_i^T, B^T; 0, I) + $ + 立得 $A_i = L_i L_i^T$,而 $L_i$ 当然是对角元均正的下三角矩阵。 \ No newline at end of file diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/output.txt" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/output.txt" new file mode 100644 index 0000000..4095119 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/output.txt" @@ -0,0 +1,16 @@ +finish solving LU, x = [[1.0, 1.0000000000000002, 0.9999999999999996, 1.0000000000000009, 0.9999999999999982, 1.0000000000000036, 0.9999999999999929, 1.0000000000000142, 0.9999999999999716, 1.0000000000000568, 0.9999999999998863, 1.0000000000002274, 0.9999999999995453, 1.0000000000009095, 0.999999999998181, 1.000000000003638, 0.9999999999927245, 1.000000000014551, 0.999999999970898, 1.000000000058204, 0.9999999998835918, 1.0000000002328164, 0.9999999995343671, 1.0000000009312657, 0.9999999981374685, 1.000000003725063, 0.9999999925498742, 1.0000000149002517, 0.9999999701994966, 1.0000000596010068, 0.9999998807979864, 1.0000002384040272, 0.9999995231919456, 1.0000009536161087, 0.9999980927677825, 1.000003814464435, 0.99999237107113, 1.00001525785774, 0.9999694842845201, 1.0000610314309597, 0.9998779371380806, 1.0002441257238388, 0.9995117485523225, 1.0009765028953548, 0.9980469942092913, 1.0039060115814138, 0.9921879768371866, 1.01562404632557, 0.9687519073490876, 1.0624961853009154, 0.8750076294018072, 1.2499847411818346, 0.500030517694535, 1.9999389643781136, -0.9998779278249614, 4.99975585192486, -6.999511688949468, 16.99902331829793, -30.998046398191832, 64.99609184276756, -126.99217987107068, 256.9843444842836, -510.96862793713626, 1024.9370117485487, -2046.873046994202, 4096.742187976823, -8190.4687519073195, 16383.875007629336, -32764.50003051746, 65531.00012207008, -131055.00048828078, 262097.00195312407, -524127.00781249814, 1048001.0312499963, -2094975.1249999925, 4185857.499999985, -8355328.99999997, 16645128.99999994, -33028126.99999988, 65007744.99999977, -125821438.99999955, 234866688.99999917, -402628606.99999857, 536838144.9999981]] + m * x = [[7.0, 15.000000000000002, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.000000000000004, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.00000000745058, 15.000000029802322, 14.999999940395355, 15.0, 15.0, 14.0]] + b = [[7.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 14.0]] + L2 delta = 6.705522537231457e-8 +finish solving LU col, x = [[0.9999999999999999, 1.0000000000000002, 0.9999999999999993, 1.0000000000000016, 0.9999999999999967, 1.0000000000000069, 0.999999999999986, 1.0000000000000282, 0.9999999999999434, 1.0000000000001135, 0.9999999999997728, 1.0000000000004545, 0.9999999999990907, 1.0000000000018188, 0.9999999999963622, 1.0000000000072757, 0.9999999999854483, 1.0000000000291036, 0.9999999999417926, 1.000000000116415, 0.9999999997671696, 1.000000000465661, 0.9999999990686776, 1.000000001862645, 0.9999999962747099, 1.0000000074505804, 0.999999985098839, 1.0000000298023222, 0.9999999403953554, 1.0000001192092893, 0.9999997615814211, 1.0000004768371578, 0.9999990463256845, 1.000001907348631, 0.9999961853027379, 1.0000076293945241, 0.9999847412109517, 1.0000305175780966, 0.9999389648438068, 1.0001220703123863, 0.9997558593752274, 1.0004882812495453, 0.9990234375009095, 1.0019531249981803, 0.9960937500036413, 1.00781249999271, 0.9843750000146085, 1.031249999970669, 0.9375000000591169, 1.124999999879947, 0.7500000002473821, 1.4999999994761315, 1.1641525521355334e-9, 2.999999997206033, -2.999999992549421, 8.999999977648262, -14.999999925494201, 32.99999973177911, -62.99999898672107, 128.9999960660935, -254.99998450279247, 512.9999384880068, -1022.9997549057014, 2048.999021530153, -4094.996089935308, 8192.984367370624, -16382.93748474128, 32768.74996948268, -65533.99993896581, 131068.99987793349, -262126.99975587445, 524224.9995117787, -1048318.9990236765, 2096128.9980478298, -4190206.9960975675, 8372224.992202764, -16711678.984436044, 33292288.96899416, -66060286.93847661, 130023424.87890634, -251658238.76562515, 469762048.56250024, -805306366.2500002, 1073741824.0000002]] + m * x = [[6.999999999999999, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 14.999999999999998, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 14.999999999999773, 15.0, 15.0, 15.0, 14.999999999996362, 14.999999999992724, 14.999999999992724, 15.000000000014552, 15.0, 15.0, 14.999999999883585, 15.00000000023283, 15.0, 15.0, 14.999999998137355, 15.00000000372529, 15.0, 15.0, 14.999999970197678, 15.0, 15.0, 15.000000238418579, 14.0]] + b = [[7.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 14.0]] + L2 delta = 2.403102424134844e-7 +finish solving Cholesky, x = [[0.03534139337757466, 0.08041553926000304, 0.02619867359193721, 0.041626023483655725, 0.0723697506143703, 0.04445063122901202, 0.03538342865626058, 0.0340941270032824, 0.007887862406983245, 0.046792978753601805, 0.028111845119165844, 0.041004149452271356, 0.03725026220110166, 0.08755573072709014, 0.030794441549951736, 0.0245736429529061, 0.0003716276126492122, 0.07678272928415417, -0.0010099662722848463, 0.08313953322542045, 0.043682264501817725, 0.0008931689474933123, 0.018111513654630026, -0.006761718736668384, 0.08773760598412723, 0.08885354173056573, -0.002476363081352236, 0.01995120702878338, -0.004533102814770753, 0.04131388207422882, 0.02430340348295933, 0.022093234825975527, 0.08240884856169332, 0.008097055146664308, 0.026353703003460148, 0.026785033262465868, 0.08230821391905677, 0.013573506190998839, 0.06671747656621929, 0.009980087325276325, 0.04658820691767878, 0.08098685136810206, 0.04982840075726402, 0.07925963533749461, 0.0058350651338775275, 0.06169449027759675, 0.05512884799796731, 0.017322743368113805, 0.06984826084247024, 0.005856446009849362, 0.09503924442318275, 0.0016843069621432527, 0.00012039465331574768, 0.08635313196817142, -0.012725684090987188, 0.06846720592962971, 0.06445302716963025, -0.0009109828097855359, 0.03310744118019627, 0.009453673687311452, 0.075089522388855, 0.018087891366020632, 0.06045621203694237, 0.08378826202842497, 0.08149768883746936, 0.06521509131324148, 0.06089311205577097, 0.004429131212749789, 0.06948347551321568, 0.05032466458760423, 0.004168219382366817, 0.08830866337843012, -0.001855899509932966, 0.025763588947278617, 0.02996900748860722, 0.0518274419544246, 0.06961769611407236, 0.04684554865436761, 0.025070058349330574, 0.09555709580898376, 0.0026743466465205578, 0.08298386126701655, 0.07922537116572524, 0.07000124984941178, 0.08208002731859311, 0.036746447703153044, 0.028725413758466503, 0.051058032594878386, 0.004949908534704026, 0.011614348294556426, 0.055546732772401415, 0.032681309482593554, 0.06373856747777683, -0.008555224450610258, 0.04297201177949575, 0.032866440681389525, 0.011599992044786983, 0.0821213077681242, 0.08652859171941611, 0.03493771859532425]] + m * x = [[0.43382947303574965, 0.8656954595695423, 0.3840282986630309, 0.5148286590428648, 0.8097741608563708, 0.552259491560751, 0.4323790447949003, 0.38421256109606783, 0.15976572982671666, 0.5039294950621671, 0.3689155793975316, 0.4754036018429811, 0.5010625021903782, 0.9436020110219546, 0.42007378917951355, 0.27690249869166195, 0.10507264836355239, 0.767188954181906, 0.14982259978672616, 0.8740676304837374, 0.5208553471910911, 0.07072546763138088, 0.17524658675712518, 0.038231932272073416, 0.9594678828351696, 0.9737966602084323, 0.08404111794582675, 0.1925026043917108, 0.015934060955304665, 0.43290912141047677, 0.30644115172979763, 0.32764460030440795, 0.854278775589573, 0.18973310303179655, 0.2984191184437317, 0.37651224954717555, 0.8634406786440323, 0.28476075239526444, 0.6907283591784681, 0.21310655673666135, 0.5568490078701662, 0.9062851213559634, 0.6585304942782368, 0.8482598192660876, 0.19930477695386664, 0.6779088159078124, 0.6303057136253837, 0.2982045425215756, 0.7216617978026656, 0.22345196536414663, 0.9579331972038201, 0.11200270869793101, 0.08924138546347214, 0.8509260302440427, 0.027563496987929245, 0.7363994023749403, 0.7120864948161467, 0.08845064025197116, 0.3396171026794886, 0.2027337004421658, 0.7784367889418821, 0.3164246480860036, 0.7064382737638693, 0.9798365211586615, 0.9639802417163599, 0.7945417140256551, 0.6785753430837009, 0.17466789969648455, 0.7495885509325108, 0.5768983407716247, 0.1803155217897025, 0.885398953656735, 0.09551325722637907, 0.2857489974514604, 0.3772811057877754, 0.6178611231469254, 0.7948499517495158, 0.563143241007079, 0.39310322795665714, 0.9833153630856888, 0.2052844235412059, 0.9117383304824114, 0.9452388227736808, 0.8613178969784362, 0.9275479707384959, 0.47826991810859004, 0.37505861788269645, 0.5442556482419544, 0.11217146623647509, 0.1766401242526697, 0.5997629855011641, 0.4460983950761138, 0.6615117598097515, 0.021158334751170002, 0.4540313340257368, 0.38323641063817804, 0.23098766889738354, 0.919341661445445, 0.9823449435576096, 0.4359057776726586]] + b = [[0.4338294730357497, 0.8737370134955426, 0.3866481660222246, 0.5189912613912304, 0.8170111359178079, 0.5567045546836523, 0.43591738766052635, 0.38762197379639607, 0.160554516067415, 0.5086087929375274, 0.37172676390944825, 0.4795040167882083, 0.5047875284104885, 0.952357584094664, 0.4231532333345088, 0.2793598629869526, 0.10510981112481732, 0.7748672271103214, 0.14972160315949767, 0.8823815838062794, 0.5252235736412728, 0.07081478452613021, 0.17705773812258818, 0.03755576039840658, 0.9682416434335824, 0.9826820143814889, 0.08379348163769151, 0.19449772509458918, 0.015480750673827592, 0.43704050961789964, 0.3088714920780936, 0.3298539237870055, 0.8625196604457425, 0.190542808546463, 0.3010544887440777, 0.3791907528734222, 0.8716715000359382, 0.28611810301436436, 0.69740010683509, 0.214104565469189, 0.5615078285619343, 0.9143838064927736, 0.6635133343539634, 0.8561857827998371, 0.1998882834672544, 0.6840782649355721, 0.6358185984251804, 0.299936816858387, 0.7286466238869127, 0.22403760996513156, 0.9674371216461385, 0.11217113939414536, 0.08925342492880373, 0.85956134344086, 0.02629092857883053, 0.7432461229679033, 0.7185317975331097, 0.0883595419709926, 0.3429278467975083, 0.20367906781089695, 0.7859457411807675, 0.3182334372226058, 0.7124838949675636, 0.9882153473615041, 0.9721300106001071, 0.8010632231569794, 0.684664654289278, 0.17511081281775953, 0.7565368984838325, 0.5819308072303853, 0.18073234372793923, 0.8942298199945781, 0.09532766727538577, 0.2883253563461883, 0.3802780065366361, 0.623043867342368, 0.8018117213609232, 0.567827795872516, 0.3956102337915902, 0.9928710726665873, 0.20555185820585797, 0.9200367166091131, 0.9531613598902534, 0.8683180219633775, 0.9357559734703553, 0.4819445628789054, 0.37793115925854315, 0.5493614515014423, 0.1126664570899455, 0.17780155908212536, 0.6053176587784043, 0.4493665260243731, 0.6678856165575293, 0.020302812306108975, 0.45832853520368644, 0.386523054706317, 0.23214766810186227, 0.9275537922222575, 0.9909978027295514, 0.43939954953219107]] + L2 delta = 0.0517521712207962 +finish solving improved Cholesky, x = [[0.035341393377574665, 0.08041553926000303, 0.02619867359193721, 0.041626023483655725, 0.07236975061437032, 0.04445063122901202, 0.03538342865626058, 0.034094127003282404, 0.007887862406983245, 0.046792978753601805, 0.028111845119165847, 0.04100414945227137, 0.037250262201101676, 0.08755573072709015, 0.03079444154995174, 0.0245736429529061, 0.000371627612649212, 0.07678272928415417, -0.0010099662722848467, 0.08313953322542046, 0.04368226450181773, 0.0008931689474933127, 0.018111513654630026, -0.006761718736668387, 0.08773760598412725, 0.08885354173056573, -0.002476363081352239, 0.019951207028783384, -0.004533102814770753, 0.041313882074228814, 0.024303403482959337, 0.022093234825975527, 0.08240884856169334, 0.008097055146664308, 0.02635370300346015, 0.026785033262465868, 0.08230821391905678, 0.01357350619099884, 0.06671747656621929, 0.009980087325276325, 0.04658820691767879, 0.08098685136810206, 0.04982840075726403, 0.07925963533749462, 0.005835065133877526, 0.06169449027759676, 0.05512884799796731, 0.01732274336811381, 0.06984826084247026, 0.005856446009849362, 0.09503924442318276, 0.0016843069621432516, 0.00012039465331574808, 0.08635313196817142, -0.01272568409098719, 0.06846720592962971, 0.06445302716963025, -0.000910982809785538, 0.033107441180196276, 0.009453673687311454, 0.075089522388855, 0.018087891366020632, 0.060456212036942375, 0.08378826202842499, 0.08149768883746936, 0.06521509131324149, 0.06089311205577097, 0.0044291312127497885, 0.0694834755132157, 0.050324664587604234, 0.004168219382366816, 0.08830866337843013, -0.0018558995099329679, 0.02576358894727862, 0.02996900748860722, 0.0518274419544246, 0.06961769611407237, 0.04684554865436762, 0.02507005834933057, 0.09555709580898378, 0.002674346646520557, 0.08298386126701657, 0.07922537116572526, 0.07000124984941179, 0.08208002731859312, 0.03674644770315305, 0.028725413758466503, 0.0510580325948784, 0.004949908534704026, 0.011614348294556426, 0.055546732772401415, 0.03268130948259355, 0.06373856747777684, -0.00855522445061026, 0.04297201177949576, 0.032866440681389525, 0.011599992044786983, 0.08212130776812421, 0.08652859171941614, 0.034937718595324255]] + m * x = [[0.4338294730357497, 0.8656954595695421, 0.3840282986630309, 0.5148286590428648, 0.809774160856371, 0.552259491560751, 0.4323790447949003, 0.3842125610960679, 0.15976572982671666, 0.5039294950621671, 0.3689155793975316, 0.47540360184298125, 0.5010625021903783, 0.943602011021955, 0.42007378917951366, 0.27690249869166195, 0.10507264836355239, 0.767188954181906, 0.14982259978672616, 0.8740676304837375, 0.5208553471910912, 0.07072546763138088, 0.17524658675712518, 0.0382319322720734, 0.9594678828351698, 0.9737966602084323, 0.08404111794582672, 0.19250260439171085, 0.01593406095530466, 0.4329091214104767, 0.30644115172979774, 0.32764460030440795, 0.8542787755895732, 0.18973310303179658, 0.2984191184437317, 0.3765122495471756, 0.8634406786440325, 0.2847607523952645, 0.6907283591784681, 0.21310655673666135, 0.5568490078701662, 0.9062851213559635, 0.658530494278237, 0.8482598192660878, 0.19930477695386667, 0.6779088159078125, 0.6303057136253837, 0.29820454252157563, 0.7216617978026657, 0.22345196536414663, 0.9579331972038203, 0.11200270869793102, 0.08924138546347216, 0.8509260302440427, 0.027563496987929245, 0.7363994023749403, 0.7120864948161467, 0.08845064025197115, 0.33961710267948864, 0.2027337004421658, 0.7784367889418821, 0.3164246480860037, 0.7064382737638695, 0.9798365211586616, 0.9639802417163599, 0.7945417140256552, 0.6785753430837009, 0.17466789969648455, 0.749588550932511, 0.5768983407716247, 0.18031552178970253, 0.8853989536567352, 0.09551325722637907, 0.28574899745146043, 0.3772811057877754, 0.6178611231469255, 0.7948499517495159, 0.5631432410070791, 0.3931032279566571, 0.9833153630856889, 0.2052844235412059, 0.9117383304824115, 0.945238822773681, 0.8613178969784363, 0.927547970738496, 0.4782699181085901, 0.37505861788269645, 0.5442556482419545, 0.11217146623647509, 0.1766401242526697, 0.5997629855011641, 0.4460983950761137, 0.6615117598097516, 0.02115833475117001, 0.4540313340257368, 0.38323641063817804, 0.23098766889738356, 0.9193416614454453, 0.9823449435576098, 0.43590577767265865]] + b = [[0.4338294730357497, 0.8737370134955426, 0.3866481660222246, 0.5189912613912304, 0.8170111359178079, 0.5567045546836523, 0.43591738766052635, 0.38762197379639607, 0.160554516067415, 0.5086087929375274, 0.37172676390944825, 0.4795040167882083, 0.5047875284104885, 0.952357584094664, 0.4231532333345088, 0.2793598629869526, 0.10510981112481732, 0.7748672271103214, 0.14972160315949767, 0.8823815838062794, 0.5252235736412728, 0.07081478452613021, 0.17705773812258818, 0.03755576039840658, 0.9682416434335824, 0.9826820143814889, 0.08379348163769151, 0.19449772509458918, 0.015480750673827592, 0.43704050961789964, 0.3088714920780936, 0.3298539237870055, 0.8625196604457425, 0.190542808546463, 0.3010544887440777, 0.3791907528734222, 0.8716715000359382, 0.28611810301436436, 0.69740010683509, 0.214104565469189, 0.5615078285619343, 0.9143838064927736, 0.6635133343539634, 0.8561857827998371, 0.1998882834672544, 0.6840782649355721, 0.6358185984251804, 0.299936816858387, 0.7286466238869127, 0.22403760996513156, 0.9674371216461385, 0.11217113939414536, 0.08925342492880373, 0.85956134344086, 0.02629092857883053, 0.7432461229679033, 0.7185317975331097, 0.0883595419709926, 0.3429278467975083, 0.20367906781089695, 0.7859457411807675, 0.3182334372226058, 0.7124838949675636, 0.9882153473615041, 0.9721300106001071, 0.8010632231569794, 0.684664654289278, 0.17511081281775953, 0.7565368984838325, 0.5819308072303853, 0.18073234372793923, 0.8942298199945781, 0.09532766727538577, 0.2883253563461883, 0.3802780065366361, 0.623043867342368, 0.8018117213609232, 0.567827795872516, 0.3956102337915902, 0.9928710726665873, 0.20555185820585797, 0.9200367166091131, 0.9531613598902534, 0.8683180219633775, 0.9357559734703553, 0.4819445628789054, 0.37793115925854315, 0.5493614515014423, 0.1126664570899455, 0.17780155908212536, 0.6053176587784043, 0.4493665260243731, 0.6678856165575293, 0.020302812306108975, 0.45832853520368644, 0.386523054706317, 0.23214766810186227, 0.9275537922222575, 0.9909978027295514, 0.43939954953219107]] + L2 delta = 0.051752171220795486 diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/src/main.rs" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/src/main.rs" new file mode 100644 index 0000000..3907e99 --- /dev/null +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/code/hw1/src/main.rs" @@ -0,0 +1,452 @@ +extern crate nalgebra as na; +use std::{cell::OnceCell , usize}; + +use na::{zero, DefaultAllocator, OMatrix, OVector, Dim, Const}; + +type Matrix = OMatrix; +type Vector = OVector; + +fn max(a: T, b: T) -> T { + if a > b { + a + } else { + b + } +} + +fn r_eq (a: &Matrix, b: &Matrix) -> bool +where + T: na::RealField + Copy, + R: na::Dim, + C: na::Dim, + DefaultAllocator: na::allocator::Allocator +{ + (a - b).norm() < T::from_f64(0.1).unwrap() * max(a.norm(), b.norm()) +} +fn is_upper_triangular(m: &Matrix) -> bool +where + T: na::RealField + Copy, + R: na::Dim, + DefaultAllocator: na::allocator::Allocator +{ + for i in 0..(m.nrows() - 1) { + for j in 0..i { + if m[(i, j)].abs() > T::default_epsilon() { + return false; + } + } + } + true +} +fn is_lower_triangle(m: &Matrix) -> bool +where + T: na::RealField + Copy, + R: na::Dim, + DefaultAllocator: na::allocator::Allocator + { + for i in 0..(m.nrows() - 1) { + for j in (i + 1)..m.ncols() { + if m[(i, j)].abs() > T::default_epsilon() { + return false; + } + } + } + true +} + +// 解线性方程 m x = b,其中 m 为上三角矩阵 +fn solve_upper_linear_equation + ( + m: &Matrix, + b: &Vector + ) -> Vector +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + assert!(is_upper_triangular(m)); + let mut x: Vector = Matrix::>::zeros_generic(Dim::from_usize(m.nrows()), Dim::from_usize(1)); + for i in (0..m.nrows()).rev() { + let sum = (i + 1..m.nrows()).fold(zero(), |sum, j| sum + m[(i, j)] * x[j]); // sigma_(k = i + 1)^(m.nrows()) m(i, k) * x(k) + x[i] = (b[i] - sum) / m[(i, i)]; + } + x +} +// 解线性方程 m x = b,其中 m 为下三角矩阵 +fn solve_lower_linear_equation + ( + m: &Matrix, + b: &Vector + ) -> Vector +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + assert!(is_lower_triangle(m)); + let mut x: Vector = Vector::zeros_generic(Dim::from_usize(m.nrows()), Dim::from_usize(1)); + for i in 0..m.nrows() { + let sum = (0..i).fold(zero(), |sum, j| sum + m[(i, j)] * x[j]); // sigma_(k = 0)^(i - 1) m(i, k) * x(k) + x[i] = (b[i] - sum) / m[(i, i)]; + } + x +} +fn solve_linear_equation_ensures + ( + m: &Matrix, + b: &Vector, + x: &Vector + ) -> () +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + assert!(r_eq(&(m * x), b)); +} +// LU 分解,求 a = l * u,其中 l 为单位下三角矩阵,u 为上三角矩阵。该函数会消耗掉 a 的所有权。 +fn lu_decomposite + ( + mut a: Matrix + ) -> (Matrix, Matrix) +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let mut l: Matrix = Matrix::identity_generic(Dim::from_usize(a.nrows()), Dim::from_usize(a.ncols())); + for i in 0..a.nrows() { + for j in (i + 1)..a.nrows() { + if a[(i, i)] == T::zero() { + panic!("zero pivot"); + } + l[(j, i)] = a[(j, i)] / a[(i, i)]; + for k in i..a.nrows() { + a[(j, k)] = a[(j, k)] - l[(j, i)] * a[(i, k)]; + } + } + } + (l, a) +} +fn lu_requires + ( + a: &Matrix + ) -> () +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + assert!(a.nrows() == a.ncols()); +} +fn lu_ensures + ( + a: &Matrix, + r: &(Matrix, Matrix) + ) -> () +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let (l, u) = r; + assert!(l.nrows() == u.nrows()); + assert!(l.ncols() == u.ncols()); + assert!(is_upper_triangular(u)); + assert!(is_lower_triangle(l)); + for i in 0..l.nrows() { + assert_eq!(l[(i, i)], T::one()); + } + assert!(r_eq(&(l * u), a)); +} +// 列主元 LU 分解,返回值为 (l, u, p),其中 l 为单位下三角矩阵,u 为上三角矩阵,p 为置换矩阵。该函数会消耗掉 a 的所有权。 +fn lu_col_pivot +( + mut a: Matrix +) -> (Matrix, Matrix, Matrix) +where +T: na::RealField + Copy, +R: Dim, +DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let mut l_temp: Matrix = Matrix::identity_generic(Dim::from_usize(a.nrows()), Dim::from_usize(a.ncols())); + let mut p: Matrix = Matrix::identity_generic(Dim::from_usize(a.nrows()), Dim::from_usize(a.ncols())); + for i in 0..a.nrows() { + let pivot_index = a.column(i).rows_range(i..a.nrows()).iamax() + i; // 选取列 i 中绝对值最大的元素所在的行 + a.swap_rows(i, pivot_index); + p.swap_rows(i, pivot_index); + l_temp.swap_columns(i, pivot_index); // 注意这里记录的是 P_i^(-1) L_i^(-1),而不是 L_i + for j in (i + 1)..a.nrows() { + if a[(i, i)] == T::zero() { + panic!("zero pivot in col pivot lu"); + } + let ratio = a[(j, i)] / a[(i, i)]; + // for k in 0..l_temp.ncols() { + // l_temp[(j, k)] = l_temp[(j, k)] + ratio * l_temp[(i, k)]; + // } + for k in 0..l_temp.nrows() { + l_temp[(k, i)] = l_temp[(k, i)] + ratio * l_temp[(k, j)]; + } + for k in i..a.ncols() { + a[(j, k)] = a[(j, k)] - ratio * a[(i, k)]; + } + } + } + ((p.clone()) * l_temp, a, p) +} +fn lu_col_ensures + ( + a: &Matrix, + r: &(Matrix, Matrix, Matrix) + ) -> () +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let (l, u, p) = r; + assert!(l.nrows() == u.nrows()); + assert!(l.ncols() == u.ncols()); + assert!(is_upper_triangular(u)); + assert!(is_lower_triangle(l)); + for i in 0..l.nrows() { + assert_eq!(l[(i, i)], T::one()); + } + assert!(r_eq(&(p * a), &(l * u))); +} + +struct LU<'a, T, R> +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + a: &'a Matrix, + lu: OnceCell<(Matrix, Matrix)>, // 惰性计算 LU 分解,使用时再计算 + lu_col: OnceCell<(Matrix, Matrix, Matrix)>, // 惰性计算列主元 LU 分解,使用时再计算 +} +impl <'a, T, R> LU<'a, T, R> +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + fn new(a: &'a Matrix) -> Self { + Self { + a, + lu: OnceCell::new(), + lu_col: OnceCell::new(), + } + } + fn get_lu(&self) -> &(Matrix, Matrix) { + self.lu.get_or_init(|| lu_decomposite(self.a.clone())) + } + fn get_lu_col(&self) -> &(Matrix, Matrix, Matrix) { + self.lu_col.get_or_init(|| lu_col_pivot(self.a.clone())) + } + fn solve_linear_equation_lu(&self, b: &Vector) -> Vector { + // a x = b => l u x = b + let (l, u) = self.get_lu(); + let y = solve_lower_linear_equation(&l, b); + let x = solve_upper_linear_equation(&u, &y); + x + + } + fn solve_linear_equation_lu_col(&self, b: &Vector) -> Vector { + // a x = b => p a x = p b => l u x = p b + let (l, u, p) = self.get_lu_col(); + let y = solve_lower_linear_equation(&l, &(p * b)); // l y = p b + solve_upper_linear_equation(&u, &y) // u x = y + } +} + +fn cholesky_decomposite + ( + a: &Matrix + ) -> Matrix +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let mut l: Matrix = Matrix::zeros_generic(Dim::from_usize(a.nrows()), Dim::from_usize(a.ncols())); + for i in 0..a.nrows() { + l[(i, i)] = (a[(i, i)] - (0..i).fold(T::zero(), |sum, j| sum + l[(i, j)] * l[(i, j)])).sqrt(); // l(i, i) = sqrt(a(i, i) - sigma_(k = 0)^(i - 1) l(i, k) ^ 2) + for j in 0..i { + let sum = (0..j).fold(T::zero(), |sum, k| sum + l[(i, k)] * l[(j, k)]); // sigma_(k = 0)^(j - 1) l(i, k) * l(j, k) + l[(i, j)] = (a[(i, j)] - sum) / l[(j, j)]; + } + } + l +} + +fn cholesky_ensures + ( + a: &Matrix, + l: &Matrix + ) -> () +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + assert!(l.nrows() == l.ncols()); + assert!(r_eq(&(l * l.transpose()), a)); +} + +fn improved_cholesky_decomposite + ( + a: &Matrix + ) -> (Matrix, Matrix) +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let mut l: Matrix = Matrix::zeros_generic(Dim::from_usize(a.nrows()), Dim::from_usize(a.ncols())); + let mut d: Matrix = Matrix::zeros_generic(Dim::from_usize(a.nrows()), Dim::from_usize(a.ncols())); + for i in 0..a.nrows() { + d[(i, i)] = a[(i, i)] - (0..i).fold(T::zero(), |sum, k| sum + l[(i, k)] * l[(i, k)] * d[(k, k)]); // d(i, i) = a(i, i) - sigma_(k = 0)^(i - 1) l(i, k) ^ d(k, k) + l[(i, i)] = T::one(); + for j in 0..i { + let sum = (0..j).fold(T::zero(), |sum, k| sum + l[(i, k)] * l[(j, k)] * d[(k, k)]); // sigma_(k = 0)^(j - 1) l(i, k) * l(j, k) * d(k, k) + l[(i, j)] = (a[(i, j)] - sum) / d[(j, j)]; + } + } + (l, d) +} + +fn improved_cholesky_ensures + ( + a: &Matrix, + r: &(Matrix, Matrix) + ) -> () +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + let (l, d) = r; + assert!(l.nrows() == l.ncols()); + assert!(d.nrows() == d.ncols()); + assert!(is_lower_triangle(l)); + assert!(r_eq(&(l * d * l.transpose()), a)); +} + +struct Cholesky<'a, T, R> +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + a: &'a Matrix, + cholesky: OnceCell>, // 惰性计算 Cholesky 分解,使用时再计算 + improved_cholesky: OnceCell<(Matrix, Matrix)>, // 惰性计算改进 Cholesky 分解,使用时再计算 +} +impl <'a, T, R> Cholesky<'a, T, R> +where + T: na::RealField + Copy, + R: Dim, + DefaultAllocator: na::allocator::Allocator + na::allocator::Allocator, +{ + fn new(a: &'a Matrix) -> Self { + Self { + a, + cholesky: OnceCell::new(), + improved_cholesky: OnceCell::new(), + } + } + fn get_cholesky(&self) -> &Matrix { + self.cholesky.get_or_init(|| cholesky_decomposite(self.a)) + } + fn get_improved_cholesky(&self) -> &(Matrix, Matrix) { + self.improved_cholesky.get_or_init(|| improved_cholesky_decomposite(self.a)) + } + fn solve_linear_equation_cholesky(&self, b: &Vector) -> Vector { + // a x = b => l l^T x = b + let l = self.get_cholesky(); + let y = solve_lower_linear_equation(&l, b); + let x = solve_upper_linear_equation(&l.transpose(), &y); + x + } + fn solve_linear_equation_improved_cholesky(&self, b: &Vector) -> Vector { + // a x = b => l d l^T x = b + let (l, d) = self.get_improved_cholesky(); + let mut y = solve_lower_linear_equation(&l, b); + for i in 0..d.nrows() { + y[i] = y[i] / d[(i, i)]; + } + let x = solve_upper_linear_equation(&l.transpose(), &y); + x + } +} + +enum SubSign { + NonNegative, + Negative, +} + +fn signed_sub_of_usize(a: usize, b: usize) -> (usize, SubSign) { + if a > b { + (a - b, SubSign::NonNegative) + } else { + (b - a, SubSign::Negative) + } +} + +fn main() { + let matrix_in_exercise_1 = Matrix::, Const<84>>::from_fn( + |i, j| + match signed_sub_of_usize(i, j) { + (1, SubSign::NonNegative) => 8.0, + (0, _) => 6.0, + (1, SubSign::Negative) => 1.0, + _ => 0.0 + } + ); + // let matrix_in_exercise_1 = Matrix2::new( + // 1.0, 1.5, + // 20.0, 1.0 + // ); + let b_in_exercise_1 = Vector::>::from_fn( + |i, _| + match i { + 0 => 7.0, + 83 => 14.0, + _ => 15.0 + } + ); + lu_requires(&matrix_in_exercise_1); + let (l, u) = lu_decomposite(matrix_in_exercise_1.clone()); + lu_ensures(&matrix_in_exercise_1, &(l, u)); + let (lc, uc, pc) = lu_col_pivot(matrix_in_exercise_1.clone()); + lu_col_ensures(&matrix_in_exercise_1, &(lc, uc, pc)); + let lu = LU::new(&matrix_in_exercise_1); + let x = lu.solve_linear_equation_lu(&b_in_exercise_1); + println!("finish solving LU, x = {:?}\n m * x = {:?}\n b = {:?}\n L2 delta = {:?}", x, matrix_in_exercise_1 * x, b_in_exercise_1, (matrix_in_exercise_1 * x - b_in_exercise_1).norm()); + solve_linear_equation_ensures(&matrix_in_exercise_1, &b_in_exercise_1, &x); + let x_col = lu.solve_linear_equation_lu_col(&b_in_exercise_1); + solve_linear_equation_ensures(&matrix_in_exercise_1, &b_in_exercise_1, &x_col); + println!("finish solving LU col, x = {:?}\n m * x = {:?}\n b = {:?}\n L2 delta = {:?}", x_col, matrix_in_exercise_1 * x_col, b_in_exercise_1, (matrix_in_exercise_1 * x_col - b_in_exercise_1).norm()); + + let matrix_in_exercise_2 = Matrix::, Const<100>>::from_fn( + |i, j| + match signed_sub_of_usize(i, j) { + (1, _) => 1.0, + (0, _) => 10.0, + _ => 0.0 + } + ); + let b2 = Vector::>::new_random(); + let cholesky = Cholesky::new(&matrix_in_exercise_2); + cholesky_ensures(&matrix_in_exercise_2, cholesky.get_cholesky()); + improved_cholesky_ensures(&matrix_in_exercise_2, cholesky.get_improved_cholesky()); + let x_cholesky = cholesky.solve_linear_equation_cholesky(&b2); + println!("finish solving Cholesky, x = {:?}\n m * x = {:?}\n b = {:?}\n L2 delta = {:?}", x_cholesky, matrix_in_exercise_2 * x_cholesky, b2, (matrix_in_exercise_2 * x_cholesky - b2).norm()); + solve_linear_equation_ensures(&matrix_in_exercise_2, &b2, &x_cholesky); + let x_improved_cholesky = cholesky.solve_linear_equation_improved_cholesky(&b2); + println!("finish solving improved Cholesky, x = {:?}\n m * x = {:?}\n b = {:?}\n L2 delta = {:?}", x_improved_cholesky, matrix_in_exercise_2 * x_improved_cholesky, b2, (matrix_in_exercise_2 * x_improved_cholesky - b2).norm()); + solve_linear_equation_ensures(&matrix_in_exercise_2, &b2, &x_improved_cholesky); + +} diff --git "a/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" "b/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" index 5631ae7..6adde83 100644 --- "a/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" +++ "b/\350\256\241\347\256\227\346\226\271\346\263\225B/main.typ" @@ -5,6 +5,7 @@ author: "YHTQ", date: datetime.today().display(), logo: none, + withChapterNewPage: true ) = 前言 - 授课教师:张磊 @@ -171,6 +172,127 @@ $ 因此 $D$ 与 $A$ 合同,导致其对角元都正。令 $D = T T^T$,则有: $ - + A = L T T^T L^T = (L T) (L T)^T $ + 注意到 $L$ 是单位下三角矩阵,不难验证 $L T$ 是对角元均正的下三角矩阵 ] + #algorithm[][ + 求 Cholesky 因子当然可以通过 LU 分解,但事实上有更快的办法。只需从: + $ + A = L L^T + $ + 便可得到等式: + $ + a_(i j) = sum_(p = 1)^j l_(i p) l_(j p) + $ + 这些等式呈三角形,因此可以依次回代动态规划求解。具体而言: + ```hs + choleskyij :: Matrix -> Map (Int, Int) Double -> Int -> Int -> Map (Int, Int) Double -- 计算 l_(i j),假设 i >= j + choleskyij A L i j = Map.insert L (i, j) lij + where lij = if i = j + then sqrt (A i i - sum [(L i p)^2 | p <- [1..j - 1]]) + else (A i j - sum [(L i p) * (L j p) | p <- [1..j - 1]]) / (L j j) + cholesky :: Matrix -> Matrix + cholesky A = foldl (choleskyij A) Map.empty [(i, j) | i <- [1..n], j <- [1..i]] + + ``` + 若只考虑乘加运算,运算量约为: + $ + sum_(i=1)^n sum_(j=1)^i 2j approx 1/3 n^3 + $ + // $ + // sum_(k=1)^n sum_(j = k + 1)^n sum_(r = j)^n 2 = sum_(r = 2)^n sum_(j = 2)^r sum_(k=1)^(j-1) 2 = sum_(r = 2)^n sum_(j = 2)^r 2(j - 1) ~ sum_(r = 2)^n r^2 ~ 1/3 n^3 + // $ + 之后,只需解: + $ + L y = b\ + L^T x = y + $ + 两个三角形方程组即可得到解。 + + 此外,该分解是稳定的,既然由: + $ + a_(i i) = sum_(p = 1)^(i - 1) l_(i p)^2 + $ + 不难得到: + $ + abs(l_(i j)) <= sqrt(a_(i i)) + $ + ] + #algorithm[改进的平方根法][ + 平方根法需要进行开方运算。为了避免开方,可以求如下形式的分解: + $ + A = L D L^T + $ + 其中 $L$ 是单位下三角矩阵,$D$ 是对角元均正的对角矩阵。(该分解称为 $L D L^T$ 分解)这样,方程变为: + $ + a_(i j) = sum_(k = 1)^(j - 1) l_(i k) d_k l_(j k) + l_(i j) d_j + $ + 反解出: + $ + d_j = a_(j j) - sum_(k = 1)^(j - 1) l_(j k)^2 d_k\ + l_(i j) = (a_(i j) - sum_(k = 1)^(j - 1) l_(i k) l_(j k) d_k) / d_j + $ + 同样进行动态规划即可。它的运算量也约为 $1/3 n^3$ + ] += 线性方程组的敏度分析 + == 向量范数与矩阵范数 + #definition[向量范数][ + 称满足: + - 正定性:$norm(x) >= 0, norm(x) = 0<=> x = 0$ + - 齐次性:$norm(a x) = a norm(x)$ + - 三角不等式:$norm(a + b) <= norm(a) + norm(b)$ + 的 $RR-$ 线性空间 $X$ 到 $RR$ 的函数为范数。常用范数包含: + - $p-$范数:$norm(x) = root(1/p, sum_i abs(x_i)^p)$ + - $infinity-$ 范数:$norm(x) = max_i abs(x_i)$ + - $0-$ "范数":$"card" {x_i | x_i != 0}$ (并不齐次) + ] + #definition[相容性][ + - 称矩阵范数相容,如果 $norm(A B) <= norm(A) norm(B)$ + - 称矩阵范数与向量范数相容,如果 $norm(A x) <= norm(A) norm(x)$ + 一般的,我们假定我们使用的范数都是相容的。 + ] + #theorem[][ + 给定 $RR^n$ 上向量范数,则可以定义矩阵范数: + $ + norm(A) = sup_(x != 0) (norm(A x))/norm(x) + $ + 它是 $RR^(m times n)$ 上的相容矩阵范数,也称为算子范数。它与向量范数相容。 + ] + #example[矩阵范数][ + 常用的矩阵范数包括: + - $1-$ 范数:$max_(j) sum_i a_(i j)$,也称列范数 + - $infinity-$ 范数:$max_(i) sum_j a_(i j)$,它是向量无穷范数的诱导 + - $2-$ 范数:$norm(A) = A^T A$ 最大特征值的平方根,也称为谱范数,它是向量 $2-$ 范数的诱导 + ] + #proposition[][ + 设 $A in RR^(m times n)$,则有: + - $norm(A)_2 = max_(norm(bx)_2 = 1, norm(by)_2 = 1) norm(by^T A bx)$ + - $norm(A)_2 = norm(A^T)_2 = sqrt(norm(A A^T)_2)$ + - 设 $U$ 是正交矩阵,则 $norm(U A)_2 = norm(A U) = norm(A)$ + ] + #definition[谱半径][ + 称矩阵的特征值的绝对值最大值为谱半径,记为 $rho(A)$ + ] + #theorem()[][ + - 对于任意矩阵范数,有 $norm(A) >= rho(A)$ + - 对于任意 $epsilon > 0$,存在矩阵范数使得 $norm(A) <= rho(A) + epsilon$ + ] + #proof[ + - 设 $lambda$ 是特征值且 $abs(lambda) = rho(A)$,$alpha$ 是对应的特征向量。将有: + $ + rho(A) norm(alpha e_1^T)= norm(rho(A) alpha e_1^T) = norm(A alpha e_1^T) <= norm(A) norm(alpha e_1^T) + $ + 两边消去即可。 + ] + #theorem[][ + 设 $A in CC^(n times m)$,则: + $ + lim_(n -> +infinity) A^n = 0 <=> rho(A) < 1 + $ + 更进一步,$lim_(n -> +infinity) A^n exists <=> rho(A) <= 1$ + ] + #proof[ + 使用 Jordan 分解即可。 + ] + diff --git "a/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232.typ" "b/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232.typ" new file mode 100644 index 0000000..f32ca92 --- /dev/null +++ "b/\350\275\257\344\273\266\345\210\206\346\236\220/hw/2100012990-\351\203\255\345\255\220\350\215\200-\350\275\257\345\210\206\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232.typ" @@ -0,0 +1,43 @@ +#import "../../template.typ": proof, note, corollary, lemma, theorem, definition, example, remark +#import "../../template.typ": * +#show: note.with( + title: "作业4", + author: "YHTQ", + date: datetime.today().display(), + logo: none, + withOutlined : false, + withTitle : false, + withHeadingNumbering: false +) +#let Node = $"NodeValue"$ += 分析方向 + 反向,向控制流图的前驱进行 += 抽象域 + - 抽象值集合为程序中表达式集合的全体 + - $gamma(S)$ 表示从当前位置开始,繁忙表达式恰为 $S$ 的所有实际(有限)执行序列 + - 初始值:表达式全集(exit 节点的初始值为空集) += 转换函数 + $ + f_v (S) = (S - "exp_with_value_change"_v) union "exp_calculated"_v + $ + 其中: + - $"exp_with_value_change"_v$ 指所有与该语句中值被改变的变量相关的表达式 + - $"exp_calculated"_v$ 指所有在该语句中被计算(读取)的表达式 += 合并操作 + 所有后继节点的值取交,也即 $Node(v) := f_v (sect.big_(v' in "succ"(v)) Node(v'))$(方便起见,暂时认为零个集合的交是空集) += 正确性 + #lemmaLinear[][给定从某个节点 $v$ 开始的终止执行,则算法稳定后 $Node(v)$ 中的所有表达式都是该执行的繁忙表达式,即该表达式在其中变量被赋值前一定在执行中被读取。] + #proof[ + 设 $e in Node(v)$,由于算法的稳定,将有: + $ + Node(v) = (sect.big_(v' in "succ"(v)) Node(v') - "exp_with_value_change"_v) union "exp_calculated"_v + $ + 讨论: + - 若 $e in "exp_calculated"_v$,则 $e$ 在节点 $v$ 被读取,当然是繁忙表达式。 + - 否则,若 $e in sect.big_(v' in "succ"(v)) Node(v') - "exp_with_value_change"_v$,此时 $v$ 一定不是 exit 节点,假设该具体执行中下一个节点为 $v'$,由前式显然有: + $ + e in Node(v') - "exp_with_value_change"_v + $ + 递归证明 $e$ 是 $v'$ 节点开始的繁忙表达式,则上式同时表明 $e$ 的值没有在节点 $v$ 被修改,当然有 $e$ 是 $v$ 节点开始的繁忙表达式。 + 由于该执行序列在有限步终止(执行到 exit 节点),上面的递归证明可以终止。 + ] \ No newline at end of file diff --git "a/\350\275\257\344\273\266\345\210\206\346\236\220/main.typ" "b/\350\275\257\344\273\266\345\210\206\346\236\220/main.typ" index e31c1ab..848568a 100644 --- "a/\350\275\257\344\273\266\345\210\206\346\236\220/main.typ" +++ "b/\350\275\257\344\273\266\345\210\206\346\236\220/main.typ" @@ -6,6 +6,7 @@ date: datetime.today().display(), logo: none, ) +#let sup = math.union.sq = 前言 == 基本概念 给定软件系统,回答关于系统行为有关问题的技术称为软件分析技术。能否彻底避免软件中出现特定类型(例如内存泄漏)的缺陷?逻辑上而言这是不可能的,既然编程语言中总有无法证明的表达式 $T$,则以下程序: @@ -236,7 +237,56 @@ 给定程序,判断每个变量的值可能在哪些行被赋值。这与数据流分析很接近,其中具体域是所有程序行号,转换函数是在赋值语句处替换,其他语句不变。注意算法中需要初始将所有节点全部加入待更新节点,否则非赋值语句可能保持 $bot$ 不变,导致算法停止。 ] #example[可用表达式分析][ - 给定程序,如果从入口到某一位置 $p$ 的所有路径都计算了表达式 $e$,并且最后一次执行后没有改变与 $e$ 相关的变量,则称 $e$ 是 $p$ 处的一个可用表达式。这个问题中的具体域是程序中的所有表达式,转换函数是计算表达式时增加,变量赋值时删去相关的的表达式,分支合并时取交而非并。 + 给定程序,如果从入口到某一位置 $p$ 的所有路径都计算了表达式 $e$,并且最后一次执行后没有改变与 $e$ 相关的变量,则称 $e$ 是 $p$ 处的一个可用表达式。这个问题中的具体域是程序中的所有表达式,每个节点的初始值是程序表达式的全集,转换函数是计算表达式时增加,变量赋值时删去相关的的表达式,分支合并时取交而非并。 + ] + #example[活跃变量分析][ + 给定程序,如果语句 $s$ 执行前,一个变量的值在 $s$ 或后续的语句仍然使用,则称之为活跃变量。与之前问题的主要区别在于我们需要倒着程序的执行顺序进行分析,抽象值含义是从当前位置开始任意长度的具体执行序列的集合(注意由于程序会无限执行,在倒着分析的时候不能假设所有的执行序列都在最后的节点终止)初始值为空集,每个节点处先删去写入的变量,再加入读取的变量,分支合并时取并。 + ] + #remark[][ + 一般来说,数据流分析只能考虑从 entry 开始或者从 exit 结束的有限执行序列。对于可能无穷的执行序列,逻辑上无法分析。对于活跃变量分析,我们可以转而证明在任意位置结束的有限执行序列都正确,进而保证时间上的正确性。 + ] + 一般而言,数据流分析有以下步骤: + - 设计抽象域 + - 编写节点转换函数,注意可能需要处理复杂表达式,此时可以对递归的表达式结构进行分析,或者直接对三地址码进行分析。 + - 处理控制流路径的分叉和合并 + + 之前都没有讨论算法的终止性和合流性(节点选择顺序不影响结果),为此我们引入一套数据流分析的框架,并基于该框架进行证明。 + #definition[半格 semilattice][ + 称 $(S, sup)$ 为半格,如果满足: + - 幂等性:$x sup x = x$ + - 交换性:$x sup y = y sup x$ + - 非对称性:$$ + 不难验证,$x subset y := x sup y = y$ 是偏序关系。 + + 有最小值 $bot$ 的半格称为有界半格。 + + 称一个有界半格的高度指其中全序子集元素个数的最大值。 + ] + #theorem[不动点定理][ + 给定有限高度的有界半格 $S$ 和单调函数 $f: S -> S$,则 $bot, f(bot), ..., $ 一定在有限步停止在 $f$ 的最小不等点 + ] + #proof[ + - 首先证明序列终止。不难验证: + $ + bot subset f(bot) => f(bot) subset f(f(bot)) => ... + $ + 说明该序列是链,而有限高度意味着链有限长,进而最终稳定,而稳定的值当然是不动点。 + - 其次,为了证明它是最小不动点,取 $x$ 是一个不动点,则: + $ + bot subset x => f(bot) subset f(x) = x => ... + $ + 反复应用可得结论 + ] + #definition[数据流单调分析框架][ + 可以定义一般框架: + - 一个控制流图 $G$ + - 一个有限高度的有界半格 $S$ + - 一个 entry + - 一个单调函数 $f$ + 可以证明,对于该框架(对应的随机选取节点进行更新的算法称为工单算法),依次选取节点更新的算法称为轮询算法),容易证明轮询算法一定收敛到更新函数的最小不动点,而工单算法也会收敛到该点,因此一定收敛并且收敛到相同值 + ] + #definition[分配性][ + 假设转换函数 $f_v$ 满足 $f_v (x) sup f_v (y) = f_v (x sup y)$,则称满足分配性。满足分配性的数据流分析不会因为提前合并引入不精确性。由集合交/并操作构造的转换函数都满足分配性。 ] == 过程间分析 == 指针分析