From 1ec5a82bfe3f9e74e5c77502d22112f07109c36f Mon Sep 17 00:00:00 2001
From: yhtq <1414672068@qq.com>
Date: Thu, 5 Dec 2024 15:41:46 +0800
Subject: [PATCH] 12.5

---
 .gitignore                                    |   3 +-
 ...\346\234\272\346\212\245\345\221\2122.typ" |   7 +-
 .../main.typ"                                 |  71 +++-
 .../\344\275\234\344\270\232/ml-5_2-hw.typ"   |  84 ++++
 .../code6/.gitignore"                         | 163 +++++++
 .../code6/README.md"                          |   1 +
 .../code6/pdm.lock"                           | 402 ++++++++++++++++++
 .../code6/pyproject.toml"                     |  18 +
 .../code6/src/code6/main.py"                  | 205 +++++++++
 .../code6/train_output"                       | 129 ++++++
 .../main.typ"                                 |  67 +++
 .../\344\275\234\344\270\232/hw6.typ"         | 141 ++++++
 12 files changed, 1285 insertions(+), 6 deletions(-)
 create mode 100644 "\346\225\260\347\220\206\351\200\273\350\276\221/\344\275\234\344\270\232/ml-5_2-hw.typ"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/.gitignore"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/README.md"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pdm.lock"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pyproject.toml"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/src/code6/main.py"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/train_output"
 create mode 100644 "\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw6.typ"
diff --git a/.gitignore b/.gitignore
index fa23b12..047b2fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 忽略/** 
 **/**.zip
 **/target
-.VSCodeCounter/** 
\ No newline at end of file
+.VSCodeCounter/** 
+data/**
\ No newline at end of file
diff --git "a/\345\271\266\350\241\214\344\270\216\345\210\206\345\270\203\345\274\217\350\256\241\347\256\227/\344\270\212\346\234\272\346\212\245\345\221\212/\344\270\212\346\234\272\346\212\245\345\221\2122.typ" "b/\345\271\266\350\241\214\344\270\216\345\210\206\345\270\203\345\274\217\350\256\241\347\256\227/\344\270\212\346\234\272\346\212\245\345\221\212/\344\270\212\346\234\272\346\212\245\345\221\2122.typ"
index e359aa9..6187964 100644
--- "a/\345\271\266\350\241\214\344\270\216\345\210\206\345\270\203\345\274\217\350\256\241\347\256\227/\344\270\212\346\234\272\346\212\245\345\221\212/\344\270\212\346\234\272\346\212\245\345\221\2122.typ"
+++ "b/\345\271\266\350\241\214\344\270\216\345\210\206\345\270\203\345\274\217\350\256\241\347\256\227/\344\270\212\346\234\272\346\212\245\345\221\212/\344\270\212\346\234\272\346\212\245\345\221\2122.typ"
@@ -167,7 +167,7 @@
     GCC 编译器可以自动利用向量化指令完成某些运算，但需要非常多的额外信息，包括内存对齐，非别名等等。编译时开启 `-fopt-info-vec-all` 可以帮助我们知道向量化失败的原因。本次实现的初版矩阵乘法为：
     ```cpp
     template <int M, int N, int S, typename T>
-Matrix<T, M, S> mul_parallel (const Matrix<T, M, N>& a, const Matrix<T, N, S>& b) {
+    Matrix<T, M, S> mul_parallel (const Matrix<T, M, N>& a, const Matrix<T, N, S>& b) {
     if constexpr (debug){
         if (a.ncols() != b.nrows()) {
             fmt::print("Error: Matrix size not match!\n");
@@ -184,7 +184,7 @@ Matrix<T, M, S> mul_parallel (const Matrix<T, M, N>& a, const Matrix<T, N, S>& b
     T* c_data = c.get();
     const T* a_data = a.get();
     const T* b_data = b.get();
-    以下 assume 可以帮助进行自动向量化
+    // 以下 assume 可以帮助进行自动向量化
     __builtin_assume_aligned(c_data, BYTE_ALIGNMENT);
     __builtin_assume_aligned(a_data, BYTE_ALIGNMENT);
     __builtin_assume_aligned(b_data, BYTE_ALIGNMENT);
@@ -260,7 +260,7 @@ Matrix<T, M, S> mul_parallel (const Matrix<T, M, N>& a, const Matrix<T, N, S>& b
       ```
       可以很大程度上避免忘记 gap 造成的 Bug
     === 循环展开
-      事实上，不难发现核心循环的最内层循环次数并不多，因此循环展开可能获得很大的收益。这里最开始担心编译器无法充分优化，手动写了一层展开：
+      不难发现核心循环的最内层循环次数并不多，因此循环展开可能获得很大的收益。这里最开始担心编译器无法充分优化，手动写了一层展开：
       ```cpp
       // 由于一个 cache_line 事实上只有两个向量，我们手动展开
       template<typename T>
@@ -507,4 +507,5 @@ Matrix<T, M, S> mul_parallel (const Matrix<T, M, N>& a, const Matrix<T, N, S>& b
     - 线程数为 $1, 2, 4, 8$ 时，运行时间几乎线性下降，这和我们之前的分析是一致的。
     - 线程数为 $16$ 时，并没有获得线性加速。推测是因为服务器使用的是 12 个逻辑核的 E5-2650 v4，对于矩阵乘法这样运算非常密集的任务，超线程带来的性能提升当然比不上物理线程。
     - $m = n = p = 2048$ 时，矩阵乘法花费约 400ms，性能已经比较好。但是可以看到，与 1024 的实验相比，缓存失效率暴增，说明还有很大的优化空间。
+    - 每个实验中，分支预测失败率都非常低，这或许是编译器充分循环展开带来的收益。
     - 每个实验中，平均一个 CPU 周期都完成了两条以上的指令，可见现代 CPU 为了尽可能提高性能，也做出了相当大的努力。
diff --git "a/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ" "b/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ"
index bba1681..4b12d67 100644
--- "a/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ"
+++ "b/\346\225\260\347\220\206\351\200\273\350\276\221/main.typ"
@@ -1270,7 +1270,7 @@
     ]
     #proposition[][
       对于任何无穷基数 $m$，任何一致一阶系统都有基数为 $m$ 的模型
-    ]
+    ]<model-cardinality>
     #theorem[Compactness][
       若一个一阶系统 $S$ 的公理集的任何有限子集都有模型，则 $S$ 有模型
     ]
@@ -1364,4 +1364,71 @@
           + $1(1 1) = 1 := #transitivity-b(3, 4)$
         ]
       ]
-    ]
\ No newline at end of file
+    ]
+  == 一阶算术
+    #definition[算术语言][
+      定义一阶语言，包含：
+      - 常元: $0$
+      - 函项符: $s$（后继）$, +, *$
+      - 谓词符: $=$
+      以及公理：
+      + N1: $not1 (s(x_1) = 0)$
+      + N2: $s(x_1) = s(x_2) -> x_1 = x_2$
+      + N3: $x_1 + 0 = x_1$
+      + N4: $x_1 + s(x_2) = s(x_1 + x_2)$
+      + N5: $x_1 * 1 = x_1$
+      + N6: $x_1 * s(x_2) = x_1 * x_2 + x_1$
+      + N7: $calA(0) -> (forall x_1 (calA(x_1) -> calA(s(x_1)))) -> (forall x_1 calA(x_1))$，其中 $x_1$ 在 $calA$ 中自由出现
+
+      记 $0^((n))$ 代表 $0$ 的 $n$ 次后继。显然它是一个闭项。
+  一般的，称包含上面定理和公理产生的定理集的一阶理论为一阶算术。
+    ]
+    注意 N7 理论上不等价于通常的 Peano 公理中的数学归纳法，后者讨论的是所有自然数子集上的性质，这是不可枚举的。而 N7 只是公理模式，它是可数多个公理。
+    #lemma[][
+      $m = n <=> 0^((m)) = 0^((n))$
+    ]
+    #proof[
+      使用归纳法即可
+    ]
+    #proposition[][
+      任何 $NN$ 的模型都是无穷的。
+    ]
+    显然，通常的自然数就是一阶算术的一个模型。看起来，这可以表明 $NN$ 是一致的系统。然而，“通常的自然数”这个说法本身依赖于其他数学基础（如何定义自然数），这是不可靠的。事实上，如果这确实是标准模型，可以想象其他任何模型和标准模型几乎没有区别。因此只要 $NN$ 是一致的，它就几乎是完全的。
+    #theorem[Godel 不完全性定理][
+      算术系统 $NN$ 是不完全的
+    ]
+    它的证明我们后面会介绍
+  == 形式集合论
+    在朴素集论中，人们遇到了 Russel 悖论。为了消除 Russel 悖论，主要有两种思路。一种是 Russel 自己提出的类型论，但在数学中更常见的是 Zermelo-Fraenkel 公理集合论。
+    #definition[一阶 ZF 语言][
+      定义 ZF 语言：
+      - 没有常元或函项符
+      - 谓词符: $=, in$
+      - 常用的缩写：
+        - $t_1 subset.eq t_2 := forall x_1 (x_1 in t_1 -> x in t_2)$
+        - $t_1 subset t_2 := t_1 subset.eq t_2 and t_1 != t_2$
+      - 公理：
+        + ZF1: $x_1 = x_2 <-> forall x_3 (x_3 in x_1 <-> x_3 in x_2)$（外延公理）（作为推论，可以证明 $x_1 = x_2 <-> x_1 subset.eq x_2 and x_2 subset.eq x_1$
+        + ZF2: $exists x_1, forall x_2, not1 x_2 in x_1$（空集公理）（事实上，由 Z1 可以证明在规范模型中，空集是唯一的，因此往往记作 $emptyset$）
+        + ZF3: $exists x_3 forall x_4 (x_4 in x_3 <-> x_4 = x_1 or x_4 = x_2 )$（对集公理）（事实上，可以证明是存在唯一的，因此往往将其记作 ${x_1, x_2}$
+        + ZF4: $exists x_2, forall x_3 (x_3 in x_2 <-> exists x_4 (x_4 in x_1 and x_3 in x_4))$（并集公理，存在一个集合是某个集合所有元素的并，进而可以定义 $t_1 union t_2 := union.big {t_1, t_2}$
+        + ZF5: $exists x_2 (forall x_3, x_3 in x_2 <-> x_3 subset x_1)$（幂集公理，存在集合是某个集合的所有子集的集合）
+        + ZF6: $(forall x_1, exists_1 x_2 calA(x_1, x_2)) -> (forall x_3 exists x_4 forall x_5(x_5 in x_4 <-> exists x_6 (x_6 in x_3 and calA(x_6, x_5))))$（替换公理模式，相当于对于任意集合 $x$ 和"函项" $f$，都存在集合 $f(x)$
+        + ZF7: $exists x_1 (emptyset in x_1 and (forall x_2 (x_2 in x_1 -> x_2 union {x_2} in x_1)))$（无穷公理，也就是无穷集（归纳集）存在）
+    ]
+    #remark[][
+      - ZF 公理体系中没有非集合的个体，但事实上也可以加入非集合的个体，这种扩充往往称为 ZFA
+    ]
+    *假设 ZF 是一致的，也即存在模型*，则以它的规范模型为基础，可以建立所有数学的基础。
+    
+    除了 ZF 的公理外，还有一些已经被证明独立于 ZF （换言之，若 ZF 是一致的，则加上它们之后一致的）的公式，包括：
+    - 选择公理（等价于 Zorn 引理和良序定理）
+    - 连续统假设（CH）
+
+    人们往往认为，以集为数学基础是安全的。哪怕 ZF 系统被发现存在悖论，也能通过添加公理排除掉。如果我们把朴素集称为类，则可以将不是 ZFC 中的集合的类称为真类。NGB 公理集合论选择通过排除真类来避免 Russel 悖论。
+  == 一致性问题
+    前面讨论一阶逻辑的一致性时，采用了朴素集合论的语言。然而 ZF 系统本身就在定义集合，再使用朴素集合论似乎有些循环。例如，@model-cardinality 给出 ZF 应该有可枚举的模型，而 ZF 中可以证明存在不可枚举的模型，听起来也十分荒谬。
+    #proposition()[相对一致性][
+      设 $S^*$ 是 $S$ 的一个扩充，若 $S^*$ 是一致的，则 $S$ 是一致的
+    ]
+    一阶逻辑具有绝对一致性，然而一阶逻辑的扩充，例如 ZF 系统，是否具有某种的一致性仍然是一个未解问题。
\ No newline at end of file
diff --git "a/\346\225\260\347\220\206\351\200\273\350\276\221/\344\275\234\344\270\232/ml-5_2-hw.typ" "b/\346\225\260\347\220\206\351\200\273\350\276\221/\344\275\234\344\270\232/ml-5_2-hw.typ"
new file mode 100644
index 0000000..818e6fd
--- /dev/null
+++ "b/\346\225\260\347\220\206\351\200\273\350\276\221/\344\275\234\344\270\232/ml-5_2-hw.typ"
@@ -0,0 +1,84 @@
+#import "../../template.typ": proof, note, corollary, lemma, theorem, definition, example, remark
+#import "../../template.typ": *
+#import "../main.typ": not1, True, False, infer
+#import "../main.typ": *
+#show: note.with(
+  title: "作业5_1",
+  author: "YHTQ",
+  date: datetime.today().display(),
+  logo: none,
+  withOutlined : false,
+  withTitle : false,
+  withHeadingNumbering: false
+)
+#set heading(numbering: 
+  (..nums) => 
+    {
+      let nums1 = nums.pos()
+      nums1.insert(0, 12)
+      numbering("1.1.(a)", ..nums1)
+    }
+)
+= #empty
+  首先证明：
+  $
+    forall y (forall z (z < y -> calA(z)) -> calA(y)) tack calA(0)
+  $
+  #deduction()[
+    + $not1 (z < x) :=$ 基本结论
+    + $not1 (z < x) -> (z < x -> calA(z)) := tauto$
+    + $z < x -> calA(z) := $ MP
+    + $forall z (z < x -> calA(z)) := GEN$
+    + $calA(x) := $ MP
+  ]
+  再证明：
+  $
+    forall y (forall z (z < y -> calA(z)) -> calA(y)) tack calA(x)
+  $
+  #let basic = "基本结论"
+  令 $calB(x') := forall z, z <= x' -> calA(z)$，有：
+  #deduction[
+    + $calB(0) -> (forall x_1 (calB(x_1) -> calB(s(x_1)))) -> calB(x) := $ N7
+    + $calA(0) :=$ 前已证
+    + $z <= 0 -> z = 0 := basic$
+    + $z = 0 -> calA(0) := $ E8
+    + $calB(0) := $ transitivity
+    + $(forall z (z < s(x_1) -> calA(z))) -> calA(s(x_1)) := forall$ elim
+    + $(forall z (z < s(x_1) -> calA(z))) <-> calB(x_1) := basic, $ 可证等价替换性
+    + $calB(x_1) -> calA(s(x_1)) := $ 可证等价替换性
+    + $calB(s(x_1)) <-> (forall z (z <= x_1 or z = s(x_1)) -> calA(z)) := $ 可证等价替换性
+    + $forall z (z <= x_1 or z = s(x_1)) -> calA(z) <-> forall z (z <= x_1 -> calA(z)) and (z = s(x_1) -> calA(z)) := tauto, $替换
+    + $forall z (z <= x_1 -> calA(z)) and (z = s(x_1) -> calA(z)) <-> forall z (z <= x_1 -> calA(z)) and forall z(z = s(x_1) -> calA(z)) := forall and$
+    + $calB(s(x_1)) <-> calB(x_1) and forall z(z = s(x_1) -> calA(z)) := $ transitivity
+    + $forall z(z = s(x_1) -> calA(z)) <-> forall z(z = s(x_1) -> calA(s(x_1))) := $ 可证等价替换性
+    + $forall z(z = s(x_1) -> calA(s(x_1))) <-> (exists z (z = s(x_1))) -> calA(s(x_1))$
+    + $(exists z (z = s(x_1))) := exists$ intro $s(x_1)$
+    + $forall z(z = s(x_1) -> calA(s(x_1))) <-> calA(s(x_1)) := $ 可证等价替换性
+    + $calB(s(x_1)) <-> calB(x_1) and calA(s(x_1))$
+    + $(calB(x_1) -> calA(s(x_1))) -> (calB(x_1) -> (calB(x_1) and calA(s(x_1)))) := tauto$
+    + $calB(x_1) -> (calB(x_1) and calA(s(x_1))) := MPb(8, 18)$
+    + $calB(x_1) -> calB(s(x_1)) := $ 可证等价替换性
+    + $forall x_1 (calB(x_1) -> calB(s(x_1))) := GEN$
+    + $calB(x) := $ MP 5, 21, 1
+    + $x <= x -> calA(x) := forall$ elim x
+    + $x <= x :=  basic$
+    + $calA(x) := $ MP
+    // + $(forall x_1 (calB(x_1) -> calB(s(x_1)))) -> calB(x) := $ MP
+    // + $(forall z' (z' < s(x_1) -> calA(s(x_1))) -> calA(s(x_1))) := forall$ elim
+    // + $x_1 = x -> not1 s(x_1) < x := basic$ 
+    // + $not1 s(x_1) < x -> calB(x_1) := fA$
+    // + $s(x_1) = x -> calB(s(x_1)) := $ transitivity
+    // + $s(x_1) > x -> not1 s(x_1) < x := basic$
+    // + $s(x_1) > x -> calB(s(x_1)) := $ transitivity
+    // + $s(x_1) < x -> x_1 < x := basic$
+    // + $x_1 < x -> calB(x_1) -> calA(x_1) := tauto$
+    // + $s(x_1) < x -> calB(x_1) -> calA(x_1) := $ transitivity
+    // + $s(x_1) < x -> calB(x_1) -> (s(x_1) < x -> calA(s(x_1)))$
+    // + $0 < y + 1 := $ 基本结论
+    // + $0 < x := $ E8, MP
+    // + $$
+  ]
+  由于左侧都是闭式，演绎定理立刻给出原结论
+= #empty
+  目前还没有发现 ZF 中存在矛盾，因此 ZF 中没有目前已知的矛盾。ZFC 公理系统提供了足够强的构造能力，绝大多数数学系统（例如算术系统）都可以构造于 ZFC 系统中。换言之，只要 ZFC 系统是一致的/有模型，则这些具体的数学系统也是一致的/有模型的，这就将不同数学分支所研究的数学系统的一致性问题全部转化到了 ZFC 系统的一致性问题上。
+  
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/.gitignore" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/.gitignore"
new file mode 100644
index 0000000..d34d673
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/.gitignore"
@@ -0,0 +1,163 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm-project.org/#use-with-ide
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+*.ckpt
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/README.md" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/README.md"
new file mode 100644
index 0000000..9d9a24c
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/README.md"
@@ -0,0 +1 @@
+# code6
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pdm.lock" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pdm.lock"
new file mode 100644
index 0000000..76aa52f
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pdm.lock"
@@ -0,0 +1,402 @@
+# This file is @generated by PDM.
+# It is not intended for manual editing.
+
+[metadata]
+groups = ["default"]
+strategy = ["inherit_metadata"]
+lock_version = "4.5.0"
+content_hash = "sha256:0f3090956348c6f577b18f5d4c57f0de852c41f6f09948cc1b3802f5f5c49132"
+
+[[metadata.targets]]
+requires_python = "==3.12.*"
+
+[[package]]
+name = "filelock"
+version = "3.16.1"
+requires_python = ">=3.8"
+summary = "A platform independent file lock."
+groups = ["default"]
+files = [
+    {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
+    {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
+]
+
+[[package]]
+name = "fsspec"
+version = "2024.10.0"
+requires_python = ">=3.8"
+summary = "File-system specification"
+groups = ["default"]
+files = [
+    {file = "fsspec-2024.10.0-py3-none-any.whl", hash = "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871"},
+    {file = "fsspec-2024.10.0.tar.gz", hash = "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493"},
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.4"
+requires_python = ">=3.7"
+summary = "A very fast and expressive template engine."
+groups = ["default"]
+dependencies = [
+    "MarkupSafe>=2.0",
+]
+files = [
+    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
+    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.2"
+requires_python = ">=3.9"
+summary = "Safely add untrusted strings to HTML/XML markup."
+groups = ["default"]
+files = [
+    {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"},
+    {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"},
+    {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+summary = "Python library for arbitrary-precision floating-point arithmetic"
+groups = ["default"]
+marker = "python_version >= \"3.9\""
+files = [
+    {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
+    {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
+]
+
+[[package]]
+name = "networkx"
+version = "3.4.2"
+requires_python = ">=3.10"
+summary = "Python package for creating and manipulating graphs and networks"
+groups = ["default"]
+files = [
+    {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"},
+    {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"},
+]
+
+[[package]]
+name = "numpy"
+version = "2.1.3"
+requires_python = ">=3.10"
+summary = "Fundamental package for array computing in Python"
+groups = ["default"]
+files = [
+    {file = "numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e"},
+    {file = "numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958"},
+    {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8"},
+    {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564"},
+    {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512"},
+    {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b"},
+    {file = "numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc"},
+    {file = "numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0"},
+    {file = "numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9"},
+    {file = "numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a"},
+    {file = "numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761"},
+]
+
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.4.5.8"
+requires_python = ">=3"
+summary = "CUBLAS native runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
+    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
+    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-win_amd64.whl", hash = "sha256:5a796786da89203a0657eda402bcdcec6180254a8ac22d72213abc42069522dc"},
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.4.127"
+requires_python = ">=3"
+summary = "CUDA profiling tools runtime libs."
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
+    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
+    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922"},
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.4.127"
+requires_python = ">=3"
+summary = "NVRTC native runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:a961b2f1d5f17b14867c619ceb99ef6fcec12e46612711bcec78eb05068a60ec"},
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.4.127"
+requires_python = ">=3"
+summary = "CUDA Runtime native Libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
+    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
+    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:09c2e35f48359752dfa822c09918211844a3d93c100a715d79b59591130c5e1e"},
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.1.0.70"
+requires_python = ">=3"
+summary = "cuDNN runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+dependencies = [
+    "nvidia-cublas-cu12",
+]
+files = [
+    {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
+    {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.2.1.3"
+requires_python = ">=3"
+summary = "CUFFT native runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+dependencies = [
+    "nvidia-nvjitlink-cu12",
+]
+files = [
+    {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399"},
+    {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9"},
+    {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-win_amd64.whl", hash = "sha256:d802f4954291101186078ccbe22fc285a902136f974d369540fd4a5333d1440b"},
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.5.147"
+requires_python = ">=3"
+summary = "CURAND native runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9"},
+    {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b"},
+    {file = "nvidia_curand_cu12-10.3.5.147-py3-none-win_amd64.whl", hash = "sha256:f307cc191f96efe9e8f05a87096abc20d08845a841889ef78cb06924437f6771"},
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.6.1.9"
+requires_python = ">=3"
+summary = "CUDA solver native runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+dependencies = [
+    "nvidia-cublas-cu12",
+    "nvidia-cusparse-cu12",
+    "nvidia-nvjitlink-cu12",
+]
+files = [
+    {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e"},
+    {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260"},
+    {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-win_amd64.whl", hash = "sha256:e77314c9d7b694fcebc84f58989f3aa4fb4cb442f12ca1a9bde50f5e8f6d1b9c"},
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.3.1.170"
+requires_python = ">=3"
+summary = "CUSPARSE native runtime libraries"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+dependencies = [
+    "nvidia-nvjitlink-cu12",
+]
+files = [
+    {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3"},
+    {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1"},
+    {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-win_amd64.whl", hash = "sha256:9bc90fb087bc7b4c15641521f31c0371e9a612fc2ba12c338d3ae032e6b6797f"},
+]
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.21.5"
+requires_python = ">=3"
+summary = "NVIDIA Collective Communication Library (NCCL) Runtime"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.4.127"
+requires_python = ">=3"
+summary = "Nvidia JIT LTO Library"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1"},
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.4.127"
+requires_python = ">=3"
+summary = "NVIDIA Tools Extension"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
+files = [
+    {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3"},
+    {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a"},
+    {file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"},
+]
+
+[[package]]
+name = "pillow"
+version = "11.0.0"
+requires_python = ">=3.9"
+summary = "Python Imaging Library (Fork)"
+groups = ["default"]
+files = [
+    {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"},
+    {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"},
+    {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"},
+    {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"},
+    {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"},
+    {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"},
+    {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"},
+    {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"},
+]
+
+[[package]]
+name = "setuptools"
+version = "75.6.0"
+requires_python = ">=3.9"
+summary = "Easily download, build, install, upgrade, and uninstall Python packages"
+groups = ["default"]
+marker = "python_version >= \"3.12\""
+files = [
+    {file = "setuptools-75.6.0-py3-none-any.whl", hash = "sha256:ce74b49e8f7110f9bf04883b730f4765b774ef3ef28f722cce7c273d253aaf7d"},
+    {file = "setuptools-75.6.0.tar.gz", hash = "sha256:8199222558df7c86216af4f84c30e9b34a61d8ba19366cc914424cdbd28252f6"},
+]
+
+[[package]]
+name = "sympy"
+version = "1.13.1"
+requires_python = ">=3.8"
+summary = "Computer algebra system (CAS) in Python"
+groups = ["default"]
+marker = "python_version >= \"3.9\""
+dependencies = [
+    "mpmath<1.4,>=1.1.0",
+]
+files = [
+    {file = "sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8"},
+    {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"},
+]
+
+[[package]]
+name = "torch"
+version = "2.5.1"
+requires_python = ">=3.8.0"
+summary = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+groups = ["default"]
+dependencies = [
+    "filelock",
+    "fsspec",
+    "jinja2",
+    "networkx",
+    "nvidia-cublas-cu12==12.4.5.8; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cuda-cupti-cu12==12.4.127; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cuda-runtime-cu12==12.4.127; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cudnn-cu12==9.1.0.70; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cufft-cu12==11.2.1.3; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-curand-cu12==10.3.5.147; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cusolver-cu12==11.6.1.9; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-cusparse-cu12==12.3.1.170; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-nccl-cu12==2.21.5; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-nvjitlink-cu12==12.4.127; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "nvidia-nvtx-cu12==12.4.127; platform_system == \"Linux\" and platform_machine == \"x86_64\"",
+    "setuptools; python_version >= \"3.12\"",
+    "sympy==1.12.1; python_version == \"3.8\"",
+    "sympy==1.13.1; python_version >= \"3.9\"",
+    "triton==3.1.0; platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\"",
+    "typing-extensions>=4.8.0",
+]
+files = [
+    {file = "torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:ed231a4b3a5952177fafb661213d690a72caaad97d5824dd4fc17ab9e15cec03"},
+    {file = "torch-2.5.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:3f4b7f10a247e0dcd7ea97dc2d3bfbfc90302ed36d7f3952b0008d0df264e697"},
+    {file = "torch-2.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:73e58e78f7d220917c5dbfad1a40e09df9929d3b95d25e57d9f8558f84c9a11c"},
+    {file = "torch-2.5.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:8c712df61101964eb11910a846514011f0b6f5920c55dbf567bff8a34163d5b1"},
+]
+
+[[package]]
+name = "torchvision"
+version = "0.20.1"
+requires_python = ">=3.8"
+summary = "image and video datasets and models for torch deep learning"
+groups = ["default"]
+dependencies = [
+    "numpy",
+    "pillow!=8.3.*,>=5.3.0",
+    "torch==2.5.1",
+]
+files = [
+    {file = "torchvision-0.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a31256ff945d64f006bb306813a7c95a531fe16bfb2535c837dd4c104533d7a"},
+    {file = "torchvision-0.20.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:17cd78adddf81dac57d7dccc9277a4d686425b1c55715f308769770cb26cad5c"},
+    {file = "torchvision-0.20.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:9f853ba4497ac4691815ad41b523ee23cf5ba4f87b1ce869d704052e233ca8b7"},
+    {file = "torchvision-0.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:4a330422c36dbfc946d3a6c1caec3489db07ecdf3675d83369adb2e5a0ca17c4"},
+]
+
+[[package]]
+name = "triton"
+version = "3.1.0"
+summary = "A language and compiler for custom Deep Learning operations"
+groups = ["default"]
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""
+dependencies = [
+    "filelock",
+]
+files = [
+    {file = "triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8182f42fd8080a7d39d666814fa36c5e30cc00ea7eeeb1a2983dbb4c99a0fdc"},
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+requires_python = ">=3.8"
+summary = "Backported and Experimental Type Hints for Python 3.8+"
+groups = ["default"]
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pyproject.toml" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pyproject.toml"
new file mode 100644
index 0000000..69d7e0d
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/pyproject.toml"
@@ -0,0 +1,18 @@
+[project]
+name = "code6"
+version = "0.1.0"
+description = "Default template for PDM package"
+authors = [
+    {name = "yhtq", email = "1414672068@qq.com"},
+]
+dependencies = ["torch>=2.5.1", "torchvision>=0.20.1"]
+requires-python = "==3.12.*"
+readme = "README.md"
+license = {text = "MIT"}
+
+
+[tool.pdm]
+distribution = false
+
+[tool.pdm.scripts]
+start = "src/code6/main.py"
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/src/code6/main.py" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/src/code6/main.py"
new file mode 100644
index 0000000..466a8a4
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/src/code6/main.py"
@@ -0,0 +1,205 @@
+from typing import Type
+import torch
+import torch.nn as nn
+import torchvision
+import torchvision.transforms as transforms
+
+
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+    print('GPU is available')
+else:
+    print('GPU is not available')
+# Hyper-parameters 
+input_size = 784
+hidden_size = 500
+hidden_size2 = 300
+num_classes = 10
+num_epochs = 5
+batch_size = 100
+learning_rate = 0.001
+
+# MNIST dataset 
+train_dataset = torchvision.datasets.MNIST(root='../../data', 
+                                           train=True, 
+                                           transform=transforms.ToTensor(),  
+                                           download=True)
+
+test_dataset = torchvision.datasets.MNIST(root='../../data', 
+                                          train=False, 
+                                          transform=transforms.ToTensor())
+
+# Data loader
+train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
+                                           batch_size=batch_size, 
+                                           shuffle=True)
+
+test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
+                                          batch_size=batch_size, 
+                                          shuffle=False)
+
+# Fully connected neural network with one hidden layer
+class NeuralNet(nn.Module):
+    def __init__(self, input_size, hidden_size, num_classes):
+        super(NeuralNet, self).__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size) 
+        self.relu = nn.ReLU()
+        self.fc2 = nn.Linear(hidden_size, hidden_size2)
+        self.relu2 = nn.ReLU()
+        self.fc3 = nn.Linear(hidden_size2, num_classes)
+    
+    def forward(self, x):
+        out = self.fc1(x)
+        out = self.relu(out)
+        out = self.fc2(out)
+        out = self.relu2(out)
+        out = self.fc3(out)
+        return out
+
+class MySGD(torch.optim.Optimizer):
+    def __init__(self, params, lr=0.01):
+        defaults = dict(lr=lr)
+        super(MySGD, self).__init__(params, defaults)
+
+    def step(self, closure=None):
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad
+                p.data.add_(-group['lr'], d_p)
+    
+    def print_info(self) -> str:
+        return f"SGD with lr={self.param_groups[0]['lr']}"
+
+class MySGD_WithHeavyBall(torch.optim.Optimizer):
+    def __init__(self, params, lr=0.01, momentum_factor=0.9):
+        defaults = dict(lr=lr, momentum_factor=momentum_factor)
+        super(MySGD_WithHeavyBall, self).__init__(params, defaults)
+        for group in self.param_groups:
+            for p in group['params']:
+                self.state[p]['v'] = torch.zeros_like(p.data)
+
+    def step(self, closure=None):
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad
+                # v_(t + 1) = beta v_t - grad
+                # x_(t + 1) = x_t + lr v_(t + 1)
+                v = self.state[p]['v']
+                v.mul_(group['momentum_factor']).sub_(d_p)
+                p.data.add_(group['lr'], v)
+    
+    def print_info(self) -> str:
+        return f"SGD with Heavy Ball with lr={self.param_groups[0]['lr']}, momentum_factor={self.param_groups[0]['momentum_factor']}"
+
+class MySGD_WithNesterov(torch.optim.Optimizer):
+    def __init__(self, params, lr=0.01, momentum_factor=0.9):
+        defaults = dict(lr=lr, momentum_factor=momentum_factor)
+        super(MySGD_WithNesterov, self).__init__(params, defaults)
+        self.t = 0
+        for group in self.param_groups:
+            for p in group['params']:
+                self.state[p]['x'] = torch.zeros_like(p.data)
+
+    def step(self, closure=None):
+        beta_t: float = (self.t - 1) / (self.t + 2)
+        self.t += 1
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad
+                # x_(t + 1) = p_t - eta grad
+                # p_(t + 1) = x_(t + 1) + beta_t (x_(t + 1) - x_t)
+                x_t = self.state[p]['x']
+                x_t1 = p.data.sub(d_p, alpha = group['lr'])
+                p.data = x_t1.add(x_t1.sub(x_t), alpha = beta_t)
+                x_t.copy_(x_t1)
+    
+    def print_info(self) -> str:
+        return f"SGD with Nesterov with lr={self.param_groups[0]['lr']}, momentum_factor={self.param_groups[0]['momentum_factor']}"
+
+class MyAdam(torch.optim.Optimizer):
+    def __init__(self, params, lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-8):
+        defaults = dict(lr=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon)
+        super(MyAdam, self).__init__(params, defaults)
+        self.t = 0
+        for group in self.param_groups:
+            for p in group['params']:
+                self.state[p]['m'] = torch.zeros_like(p.data)
+                self.state[p]['v'] = torch.zeros_like(p.data)
+
+    def step(self, closure=None):
+        self.t += 1
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                d_p = p.grad
+                m_t = self.state[p]['m']
+                v_t = self.state[p]['v']
+                m_t.mul_(group['beta_1']).add_(d_p, alpha = 1 - group['beta_1'])
+                v_t.mul_(group['beta_2']).addcmul_(d_p, d_p, value = 1 - group['beta_2'])
+                m_hat = m_t / (1 - group['beta_1'] ** self.t)
+                v_hat = v_t / (1 - group['beta_2'] ** self.t)
+                
+                p.data.sub_(m_hat / (v_hat.sqrt() + group['epsilon']), alpha = group['lr'])
+    
+    def print_info(self) -> str:
+        return f"Adam with lr={self.param_groups[0]['lr']}, beta_1={self.param_groups[0]['beta_1']}, beta_2={self.param_groups[0]['beta_2']}, epsilon={self.param_groups[0]['epsilon']}"
+        
+def train_opt(opt: Type[torch.optim.Optimizer], lr: float) -> None:
+    model = NeuralNet(input_size, hidden_size, num_classes).to(device)
+    # Loss and optimizer
+    criterion = nn.CrossEntropyLoss()
+    # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
+    optimizer = opt(model.parameters(), lr=lr)
+    print(optimizer.print_info())
+
+    # Train the model
+    total_step = len(train_loader)
+    for epoch in range(num_epochs):
+        for i, (images, labels) in enumerate(train_loader):  
+            # Move tensors to the configured device
+            images = images.reshape(-1, 28*28).to(device)
+            labels = labels.to(device)
+            
+            # Forward pass
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            
+            if (i+1) % 100 == 0:
+                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
+                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
+
+    # Test the model
+    # In test phase, we don't need to compute gradients (for memory efficiency)
+    with torch.no_grad():
+        correct = 0
+        total = 0
+        for images, labels in test_loader:
+            images = images.reshape(-1, 28*28).to(device)
+            labels = labels.to(device)
+            outputs = model(images)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+        print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
+
+    # Save the model checkpoint
+    torch.save(model.state_dict(), f'{optimizer.print_info()}.ckpt')
+
+train_opt(MySGD, 0.05)
+train_opt(MySGD_WithHeavyBall, 0.03)
+train_opt(MySGD_WithNesterov, 0.01)
+train_opt(MyAdam, 0.001)
\ No newline at end of file
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/train_output" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/train_output"
new file mode 100644
index 0000000..f6c4282
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/code6/train_output"
@@ -0,0 +1,129 @@
+GPU is available
+SGD with lr=0.05
+Epoch [1/5], Step [100/600], Loss: 1.4742
+Epoch [1/5], Step [200/600], Loss: 0.6716
+Epoch [1/5], Step [300/600], Loss: 0.3644
+Epoch [1/5], Step [400/600], Loss: 0.4390
+Epoch [1/5], Step [500/600], Loss: 0.3300
+Epoch [1/5], Step [600/600], Loss: 0.3280
+Epoch [2/5], Step [100/600], Loss: 0.5176
+Epoch [2/5], Step [200/600], Loss: 0.3191
+Epoch [2/5], Step [300/600], Loss: 0.3006
+Epoch [2/5], Step [400/600], Loss: 0.3043
+Epoch [2/5], Step [500/600], Loss: 0.2298
+Epoch [2/5], Step [600/600], Loss: 0.2984
+Epoch [3/5], Step [100/600], Loss: 0.0901
+Epoch [3/5], Step [200/600], Loss: 0.3639
+Epoch [3/5], Step [300/600], Loss: 0.2157
+Epoch [3/5], Step [400/600], Loss: 0.3086
+Epoch [3/5], Step [500/600], Loss: 0.1639
+Epoch [3/5], Step [600/600], Loss: 0.1208
+Epoch [4/5], Step [100/600], Loss: 0.2894
+Epoch [4/5], Step [200/600], Loss: 0.1108
+Epoch [4/5], Step [300/600], Loss: 0.1534
+Epoch [4/5], Step [400/600], Loss: 0.2088
+Epoch [4/5], Step [500/600], Loss: 0.2160
+Epoch [4/5], Step [600/600], Loss: 0.1488
+Epoch [5/5], Step [100/600], Loss: 0.1651
+Epoch [5/5], Step [200/600], Loss: 0.1749
+Epoch [5/5], Step [300/600], Loss: 0.1752
+Epoch [5/5], Step [400/600], Loss: 0.1696
+Epoch [5/5], Step [500/600], Loss: 0.1583
+Epoch [5/5], Step [600/600], Loss: 0.1383
+Accuracy of the network on the 10000 test images: 95.46 %
+SGD with Heavy Ball with lr=0.03, momentum_factor=0.9
+Epoch [1/5], Step [100/600], Loss: 0.4187
+Epoch [1/5], Step [200/600], Loss: 0.2630
+Epoch [1/5], Step [300/600], Loss: 0.3553
+Epoch [1/5], Step [400/600], Loss: 0.1421
+Epoch [1/5], Step [500/600], Loss: 0.2827
+Epoch [1/5], Step [600/600], Loss: 0.3259
+Epoch [2/5], Step [100/600], Loss: 0.1240
+Epoch [2/5], Step [200/600], Loss: 0.1328
+Epoch [2/5], Step [300/600], Loss: 0.1717
+Epoch [2/5], Step [400/600], Loss: 0.1230
+Epoch [2/5], Step [500/600], Loss: 0.1293
+Epoch [2/5], Step [600/600], Loss: 0.1465
+Epoch [3/5], Step [100/600], Loss: 0.1492
+Epoch [3/5], Step [200/600], Loss: 0.0796
+Epoch [3/5], Step [300/600], Loss: 0.1309
+Epoch [3/5], Step [400/600], Loss: 0.1431
+Epoch [3/5], Step [500/600], Loss: 0.0291
+Epoch [3/5], Step [600/600], Loss: 0.0153
+Epoch [4/5], Step [100/600], Loss: 0.0797
+Epoch [4/5], Step [200/600], Loss: 0.0354
+Epoch [4/5], Step [300/600], Loss: 0.0437
+Epoch [4/5], Step [400/600], Loss: 0.0369
+Epoch [4/5], Step [500/600], Loss: 0.0259
+Epoch [4/5], Step [600/600], Loss: 0.0417
+Epoch [5/5], Step [100/600], Loss: 0.0575
+Epoch [5/5], Step [200/600], Loss: 0.0389
+Epoch [5/5], Step [300/600], Loss: 0.0445
+Epoch [5/5], Step [400/600], Loss: 0.0387
+Epoch [5/5], Step [500/600], Loss: 0.0969
+Epoch [5/5], Step [600/600], Loss: 0.0322
+Accuracy of the network on the 10000 test images: 97.89 %
+SGD with Nesterov with lr=0.01, momentum_factor=0.9
+Epoch [1/5], Step [100/600], Loss: 2.1211
+Epoch [1/5], Step [200/600], Loss: 0.5482
+Epoch [1/5], Step [300/600], Loss: 0.5049
+Epoch [1/5], Step [400/600], Loss: 0.2170
+Epoch [1/5], Step [500/600], Loss: 0.3352
+Epoch [1/5], Step [600/600], Loss: 0.2422
+Epoch [2/5], Step [100/600], Loss: 0.1738
+Epoch [2/5], Step [200/600], Loss: 0.2192
+Epoch [2/5], Step [300/600], Loss: 0.1178
+Epoch [2/5], Step [400/600], Loss: 0.2663
+Epoch [2/5], Step [500/600], Loss: 0.1225
+Epoch [2/5], Step [600/600], Loss: 0.0401
+Epoch [3/5], Step [100/600], Loss: 0.0568
+Epoch [3/5], Step [200/600], Loss: 0.1573
+Epoch [3/5], Step [300/600], Loss: 0.0705
+Epoch [3/5], Step [400/600], Loss: 0.2491
+Epoch [3/5], Step [500/600], Loss: 0.0589
+Epoch [3/5], Step [600/600], Loss: 0.4658
+Epoch [4/5], Step [100/600], Loss: 0.0818
+Epoch [4/5], Step [200/600], Loss: 0.1055
+Epoch [4/5], Step [300/600], Loss: 0.0510
+Epoch [4/5], Step [400/600], Loss: 0.1572
+Epoch [4/5], Step [500/600], Loss: 0.1439
+Epoch [4/5], Step [600/600], Loss: 0.7126
+Epoch [5/5], Step [100/600], Loss: 0.3807
+Epoch [5/5], Step [200/600], Loss: 0.2038
+Epoch [5/5], Step [300/600], Loss: 0.4482
+Epoch [5/5], Step [400/600], Loss: 0.2462
+Epoch [5/5], Step [500/600], Loss: 0.3246
+Epoch [5/5], Step [600/600], Loss: 0.1234
+Accuracy of the network on the 10000 test images: 92.0 %
+Adam with lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08
+Epoch [1/5], Step [100/600], Loss: 0.2376
+Epoch [1/5], Step [200/600], Loss: 0.3365
+Epoch [1/5], Step [300/600], Loss: 0.2056
+Epoch [1/5], Step [400/600], Loss: 0.1639
+Epoch [1/5], Step [500/600], Loss: 0.1574
+Epoch [1/5], Step [600/600], Loss: 0.1961
+Epoch [2/5], Step [100/600], Loss: 0.0914
+Epoch [2/5], Step [200/600], Loss: 0.1725
+Epoch [2/5], Step [300/600], Loss: 0.1098
+Epoch [2/5], Step [400/600], Loss: 0.0807
+Epoch [2/5], Step [500/600], Loss: 0.1363
+Epoch [2/5], Step [600/600], Loss: 0.0472
+Epoch [3/5], Step [100/600], Loss: 0.0556
+Epoch [3/5], Step [200/600], Loss: 0.0572
+Epoch [3/5], Step [300/600], Loss: 0.0629
+Epoch [3/5], Step [400/600], Loss: 0.0777
+Epoch [3/5], Step [500/600], Loss: 0.0436
+Epoch [3/5], Step [600/600], Loss: 0.0775
+Epoch [4/5], Step [100/600], Loss: 0.1248
+Epoch [4/5], Step [200/600], Loss: 0.1043
+Epoch [4/5], Step [300/600], Loss: 0.0586
+Epoch [4/5], Step [400/600], Loss: 0.1358
+Epoch [4/5], Step [500/600], Loss: 0.0816
+Epoch [4/5], Step [600/600], Loss: 0.0265
+Epoch [5/5], Step [100/600], Loss: 0.0430
+Epoch [5/5], Step [200/600], Loss: 0.0089
+Epoch [5/5], Step [300/600], Loss: 0.0333
+Epoch [5/5], Step [400/600], Loss: 0.0156
+Epoch [5/5], Step [500/600], Loss: 0.0098
+Epoch [5/5], Step [600/600], Loss: 0.0460
+Accuracy of the network on the 10000 test images: 97.94 %
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ"
index 93c2da6..4358cc5 100644
--- "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ"
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/main.typ"
@@ -686,4 +686,71 @@
   === 卷积神经网络
     深度学习的发展受到计算机视觉研究的推动，卷积神经网络是其中的代表，它受到了对人类视觉神经元研究的启发。
 
+    卷积的想法是，我们将数据的某个局部看作相互联系的整体，该局部应该以某种方式共享参数。这样在实现上，也可以极大地减少需要存储的参数数量。实践上，我们用某个待学习的卷积核对数据的某个局部进行卷积，从而得到新的数据。
+
+    同时，卷积的形式具有很好的平移不变性，这也在很多时候体现了数据的特征。
+
+    卷积网络中还有一个技巧称为 pooling 或者 downsampling，它的目的是减少数据的维度，同时保留数据的主要特征。它相当于对于每个 Channel 的分块做一个集中操作，常见的包括 max pooling （取最大值）和 average pooling（取平均值）。
+
+    经典的卷积神经网络包括 AlexNet，采用了 ReLU 激活函数和 GPU 加速，在 ImageNet 上取得了巨大的成功。VGGNet 采用了更深的网络结构，ResNet 采用了残差连接，将上一层信号的一部分直接传递到下一层，要学习的目标变成了信号的变化量。理论上，这个 trick 并不改变网络的表达能力，但极大地加速了训练过程，使得更深的网络变得可能。
+  === 递归神经网络
+    对于序列化的数据，例如文本，我们往往希望能够保留数据的顺序信息。递归神经网络是一种很好的选择。抽象来说，给定输入 $x_i$ 输出 $y_i$，希望训练一个模型 $H_t$ 使得：
+    $
+      y_t = H_t (x_1, x_2, ..., x_t)
+    $
+    大致思路是：
+    - 设定一个额外的隐藏状态 $h_t$，使得 $h_t = f(x_t, h_(t - 1))$，这个 $f$ 是 $t-$无关的。
+    - $y_t = H (x_t, h_(t - 1))$
+    但这会导致对早期信号的记忆很差。一个重要的改进是 LSTM(Long Short Term Memory)，它认为之前的 $h$ 是短期记忆，同时引入了一个额外的长期记忆单元，使得网络能够更好地处理长期依赖问题。同时，引入了 Gate 机制，让网络决定是否更新长期记忆。
+  == 神经网络的训练 
+    抽象来说，神经网络的训练是一个优化问题：
+    $
+      min_theta 1/n sum_i l(f(x_i, theta), y_i) + lambda norm(theta)
+    $
+    然而，$f$ 往往具有以下特点：
+    - 非常复杂，连梯度都并不容易计算
+    - 非凸
+    - 维度很高
+    因此训练神经网络时，往往会使用 GPU 进行并行化计算。然而，深度学习中，宽度往往是完美并行的，然而深度却产生了数据依赖，无法简单的并行化。这是制约网络变深的一个重要原因。
+
+    同时，我们往往会遇到其他重要问题，包括：
+    - 不同层的梯度规模差距很大，导致作为优化问题看待时，条件数很大。经验上，靠近输出层的梯度很大，靠近输入层的梯度很小，很容易造成梯度爆炸或者梯度消失。这也是为什么 sigmoid 等饱和的激活函数不受欢迎，因为它们会导致梯度消失。
+    === 学习率选择
+      为了解决梯度消失问题，另一种思路是，为梯度一直较小的模型设计一个较大的学习率，而为梯度较大的模型设计一个较小的学习率。这就是 Adagrad 的思路，它的更新规则是考虑该参数的累计梯度 $G_t = sum_t g_t^2$，并且在下一步中采用学习率：
+      $
+        eta/(sqrt(G_t) + epsilon)
+      $
+      其中 $epsilon$ 是防止除零的常数。
+
+      然而，经验上学习率一直降低并不是一个好的策略。因此已经被彻底抛弃了。
+
+      另一种思路是 rProp，它的更新规则是直接抛弃梯度的大小信息，直接根据梯度的符号来更新参数。然而，当 batch 比较小并采用随机梯度下降时，这种方法往往完全无法使用。
+
+      它的一个改进是 RMSProp，它的更新规则是：
+      $
+        v_(t + 1) = beta v_t + (1 - beta) g_t^2\
+        theta_(t + 1) = theta_t - eta g_t/(sqrt(v_(t + 1) + epsilon))
+      $
+
+      最常用的算法是 Adam，它是以上几种方法的改进。具体而言，它的更新规则是：
+      - 计算梯度的一阶 momentum 和 二阶 momentum：
+        $
+          m_(t + 1) = beta_1 m_t + (1 - beta_1) g_t\
+          v_(t + 1) = beta_2 v_t + (1 - beta_2) g_t^2
+        $
+      - 做 bias correction：
+        $
+          m_(t + 1) = m_(t + 1)/(1 - beta_1^t)\
+          v_(t + 1) = v_(t + 1)/(1 - beta_2^t)
+        $
+        这是为了解决 $0-$初始化产生的问题。
+      - 更新参数：
+        $
+          theta_(t + 1) = theta_t - eta m_(t + 1)/(sqrt(v_(t + 1) + epsilon))
+        $
+      它已经成为了训练较大规模神经网络的标准方法。
+    === Normaliztion
+      另一个重要的问题是梯度消失。一种解决方法是 Batch Normalization，它的思路是对每个 batch 的数据进行归一化，使得每个维度的数据均值为 $0$，方差为 $1$。尽管原因未知，实践表明这样可以极大地增强泛化能力。
+
+      另一种常见方法是 drop out，也就是随机选择一些神经元不参与训练。通过一些计算可以证明，这样的效果相当于添加一个正则化项。
 = 理论基础
diff --git "a/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw6.typ" "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw6.typ"
new file mode 100644
index 0000000..debd025
--- /dev/null
+++ "b/\346\234\272\345\231\250\345\255\246\344\271\240\346\225\260\345\255\246\345\257\274\345\274\225/\344\275\234\344\270\232/hw6.typ"
@@ -0,0 +1,141 @@
+#import "../../template.typ": *
+#import "../main.typ": *
+#show: note.with(
+  title: "作业6",
+  author: "YHTQ",
+  date: datetime.today().display(),
+  logo: none,
+  withOutlined : false,
+  withTitle : false,
+  withHeadingNumbering: true
+)
+= #empty
+  == #empty
+    注意到：
+    $
+      t(x) = 
+      cases(
+        1 + x quad -1 < x <= 0,
+        1 - x quad 0 < x < 1,
+        0 quad "otherwise"
+      )
+    $
+    因此：
+    $
+      t(x) = (sigma(1 - x) - sigma(-x)) + (sigma(1 + x) - sigma(x)) - 1
+    $
+    进而 $P_M f$ 可以被两层 RELU 网络表示
+  == #empty
+    考虑：
+    $
+      abs(f(x) - P_M f(x)) = abs(f(x) - sum_k f(x_k) t((x - x_k) / h))\
+    $
+    假设 $x in [x_(k - 1), x_k]$，则上式等于：
+    $
+      abs(f(x) -  f(x_(k - 1)) t((x - x_(k - 1)) / h) - f(x_k) t((x - x_k) / h))\
+      = abs(f(x) -  f(x_(k - 1)) ((x_k - x) / h) - f(x_k) ((x - x_(k - 1)) / h))\
+      = abs(((x_k - x) / h) (f(x) - f(x_(k - 1))) + ((x - x_(k - 1)) / h) (f(x) - f(x_k)))\
+      <= abs(((x_k - x) / h) (f(x) - f(x_(k - 1)))) + abs(((x - x_(k - 1)) / h) (f(x) - f(x_k)))\
+      <= abs( (f(x) - f(x_(k - 1)))) + abs((f(x) - f(x_k)))\
+    $
+    因此，对于任何 $epsilon > 0$，由连续函数的一致连续性，取 $delta$ 使得 $abs(x - y) < delta => abs(f(x) - f(x)) < epsilon$，令 $M$ 充分大使得 $h < delta$，立刻有：
+    $
+      abs(f(x) - P_M f(x)) < 2 epsilon
+    $
+  == #empty
+    对于 $f(x) = x^2$，做类似估计有：
+    $
+      abs(f(x) - P_M f(x)) <= abs(((x_k - x) / h) (x^2 - x_(k - 1)^2) + ((x - x_(k - 1)) / h) (x^2 - x_k^2))\
+      = 1/h (x - x_(k - 1)) (x_k - x) h\
+      <= h^2/4
+    $
+    因此只需：
+    $
+      1/(4 M^2) < epsilon\
+      M > 1/(2 sqrt(epsilon)) 
+    $
+= #empty
+  == #empty
+    #let fs = $f^*$
+    上一题已经证明：
+    $
+      abs(P_(2^l) fs (x) - fs(x)) < (1/2^l)^2/4 = 1/4 2^(-2l)
+    $
+  == #empty
+    用归纳法，假设：
+    $
+      x in [m/(2^l), (m + 1)/(2^l)]\
+    $
+    简略起见，设 $m = 4k$（其他情形类似），则：
+    $
+      x in [k/(2^(l - 2)), (k + 1)/(2^(l - 2))]\
+    $
+    归纳假设给出：
+    $
+      P_(2^(l-2)) fs(x) - P_(2^(l - 1)) fs(x) = (g_(l-1) (x))/(2^(2(l-1)))
+    $
+    同时：
+    $
+      P_(2^(l-2)) fs(x) - P_(2^(l - 1)) fs(x) \
+      = (k/(2^(l - 2)))^2 ((k + 1) - 2^(l - 2) x) + ((k + 1)/(2^(l - 2)))^2 (2^(l - 2) x - k) - (k/(2^(l - 2)))^2 ((2k + 1) - 2^(l - 1) x) - ((2k + 1)/(2^(l - 1)))^2 (2^(l - 1) x - 2 k)\
+    $
+    换言之：
+    $
+      g_(l - 1) (x) =(2 k)^2 ((k + 1) - 2^(l - 2) x) + (2 k + 2)^2 (2^(l-2) x - k) - (2 k)^2 ((2k + 1) - 2^(l-1)  x) - (2k + 1)^2 (2^(l-1) x - 2 k)\
+      = (2 k)^2 (2^(l-2)x - k) + (2^(l-2) x - k) ((2 k + 2)^2 - 2 (2 k + 1)^2)\
+      = 2 (2^(l-2)x - k)\
+      <= 2 (2^(l-2) (4 k + 1)/(2^l) - k) = 1/2
+    $
+    因此：
+    $
+      g_l (x) = max(0, 1 - abs(2 g_(l - 1) (x) - 1))\
+      = max(0, 2 g_(l - 1) (x))\
+      = 2 (2^(l-1)x - 2 k)
+    $
+    由类似的推导，它就是：
+    $
+      P_(2^(l-1)) fs(x) - P_(2^(l)) fs(x)
+    $
+  == #empty
+    显然 $g(x) = t(2 x - 1)$ 是两层 RELU 网络，因此 $g_l (x)$ 可以表示为 $2 l$ 层 RELU 网络
+= #empty
+  == #empty
+    #let hR = $hat(cal(R))$
+    #let hRd = $hR_"drop"$
+    #let mask = $dot.circle$
+    $
+      hRd = E (1/(2n) sum_(i = 1)^n ( (beta mask xi)^T x_i - y_i^2))\
+      = 1/(2n) sum_(i = 1)^n E ( (beta mask xi)^T x_i - y_i)^2\
+      = 1/(2n) sum_(i = 1)^n E ( (beta mask xi)^T x_i - beta^T x + p beta^T x - y_i)^2\
+      = 1/(2n) sum_(i = 1)^n E ( (beta mask xi)^T x_i - beta^T x)^2 + (p beta^T x - y_i)^2\
+      = hR(hat(beta)) + 1/(2n) sum_(i = 1)^n D ( (beta mask xi)^T x_i)^2 \
+      = hR(hat(beta)) + 1/(2n) sum_(i = 1)^n D (sum_d xi_d beta_d x_(i, d))^2 \
+      = hR(hat(beta)) + 1/(2n) sum_(i = 1)^n sum_d D(xi_d beta_d x_(i, d))^2 \
+      = hR(hat(beta)) + 1/(2n) sum_(i = 1)^n sum_k (1-p)/p (p beta_k)^2 (x_(i, k))^2 \
+      = hR(hat(beta)) + 1/(2) sum_k (1-p)/p (p beta_k)^2 1/n sum_(i = 1)^n  (x_(i, k))^2 \
+      = hR(hat(beta)) + (1 - p)/(2 p) sum_k omega_k tilde(beta)_j^2 \
+      // = 1/(2n) sum_(i = 1)^n E ( (beta mask xi)^T x_i - p beta^T x)^2 + (p beta^T x - y_i)^2\
+      // = 1/(2n) sum_(i = 1)^n D ( (beta mask xi)^T x_i) + (p beta^T x - y_i)^2\
+      // = 1/(2n) sum_(i = 1)^n D ( sum_k xi_k beta_k x_(i, k) ) + (p beta^T x - y_i)^2\
+      // = 1/(2n) sum_(i = 1)^n  sum_k D (xi_k) (beta_k x_(i, k))^2 + (p beta^T x - y_i)^2\
+      // = 1/(2n) sum_(i = 1)^n  sum_k p (1-p) (beta_k x_(i, k))^2 + (p beta^T x - y_i)^2\
+      // = 1/(2n) sum_(i = 1)^n  sum_k p (1-p) (beta_k x_(i, k))^2 + (p beta^T x - beta^T x + beta^T x - y_i)^2\
+      // = 1/(2n) sum_(i = 1)^n  sum_k p (1-p) (beta_k x_(i, k))^2 + (p-1)^2 (beta^T x)^2 + 2 (p - 1) beta^T x (beta^T x - y_i) + (beta^T x - y_i)^2\
+      // = hR(beta) + 1/(2n) sum_(i = 1)^n  sum_k p (1-p) (beta_k x_(i, k))^2 + (p-1)^2 (beta^T x)^2 + 2 (p - 1) beta^T x (beta^T x - y_i)\
+
+    $
+    // 对于某个 $x$，假设 $x in [u, u']$，其中 $u$ 是 $M = 2^(l - 1)$ 时划分点。
+    // - 假设 $x in [u, v], v = (u + u') / 2$ 是 $M = 2^l$ 时划分点，有：
+    //   $
+    //     P_(2^(l - 1)) fs(x) - P_(2^l) fs(x) = u^2 t((x - u) / h) + u'^2 t((x - u') / h) - u^2 t((x - u) / (2 h)) - v^2 t((x - v) / (2 h))\
+    //     = u^2 ((u' - x) / h) + u'^2 ((x - u) / h) - u^2 ((v - x) / (2 h)) - v^2 ((x - u) / (2 h))\
+    //   $
+    //   归纳假设给出：
+    //   $
+    //     P_(2^(l - 2)) fs(x) - P_(2^(l - 1)) fs(x) = u^2 ((u' - x) / h) + u'^2 ((x - u) / h) - u^2 ((v - x) / (2 h)) - v^2 ((x - u) / (2 h))\
+    //   $
+= #empty
+  代码为：
+  #raw(read("../code6/src/code6/main.py"), lang: "python", block: true)
+  输出为：
+  #raw(read("../code6/train_output"), lang: "text", block: true)
\ No newline at end of file