From 3a85db63674adfdd06915bcbeb8198191674ab3d Mon Sep 17 00:00:00 2001 From: Ida Iyes Date: Sun, 28 Apr 2024 22:19:53 +0300 Subject: [PATCH] Phoneme codec for cit names --- bin/mw_datatool/src/cmd/gen_map.rs | 7 +- bin/mw_datatool/src/cmd/info.rs | 9 + bin/mw_datatool/src/cmd/map_ascii.rs | 2 +- bin/mw_datatool/src/cmd/reencode.rs | 5 +- doc/src/tech/dataformat-player.md | 18 +- lib/mw_common/src/lib.rs | 1 + lib/mw_common/src/phoneme.rs | 439 +++++++++++++++++++++++++++ lib/mw_dataformat/src/header.rs | 35 ++- lib/mw_dataformat/src/read.rs | 55 +++- lib/mw_dataformat/src/write.rs | 46 ++- 10 files changed, 581 insertions(+), 36 deletions(-) create mode 100644 lib/mw_common/src/phoneme.rs diff --git a/bin/mw_datatool/src/cmd/gen_map.rs b/bin/mw_datatool/src/cmd/gen_map.rs index 7b200e2..5cd6c3b 100644 --- a/bin/mw_datatool/src/cmd/gen_map.rs +++ b/bin/mw_datatool/src/cmd/gen_map.rs @@ -1,6 +1,6 @@ use std::io::BufWriter; -use mw_common::{game::{MapGenTileData, TileKind}, grid::*}; +use mw_common::{game::{MapGenTileData, TileKind}, grid::*, phoneme::Ph}; use crate::prelude::*; use crate::{CommonArgs, GenMapArgs}; @@ -28,7 +28,10 @@ pub fn main(common: &CommonArgs, args: &GenMapArgs) -> AnyResult<()> { .start_is()?; let is = b_is .with_map_lz4compressed(&map, true, &mut scratch)? - .with_cits([Pos(12, 17), Pos(7, 3)])? + .with_cits([ + (Pos(12, 17), [Ph::A, Ph::B, Ph::E, Ph::Z].as_slice()), + (Pos(7, 3), [Ph::I, Ph::D, Ph::A].as_slice()), + ])? .with_named_players(["iyes", "georgie", "gr.NET"])? .finish()?; let b_file = b_file.with_is(is)?; diff --git a/bin/mw_datatool/src/cmd/info.rs b/bin/mw_datatool/src/cmd/info.rs index 2975af8..8668d35 100644 --- a/bin/mw_datatool/src/cmd/info.rs +++ b/bin/mw_datatool/src/cmd/info.rs @@ -1,3 +1,4 @@ +use mw_common::phoneme::{lang, render_str}; use mw_dataformat::read::MwFileReader; use crate::prelude::*; @@ -55,5 +56,13 @@ pub fn main(common: &CommonArgs, args: &InfoArgs) -> AnyResult<()> { eprintln!("{}: {:?}", i, name); } + eprintln!(); + eprintln!("Cits:"); + let cit_pos = isr.read_cits_pos()?.to_owned(); + let iter_cit_names = isr.read_cits_names()?; + for (i, (pos, name)) in cit_pos.iter().cloned().zip(iter_cit_names).enumerate() { + eprintln!("{}: Y:{},X:{} {:?}", i, pos.y(), pos.x(), render_str::(name)); + } + Ok(()) } diff --git a/bin/mw_datatool/src/cmd/map_ascii.rs b/bin/mw_datatool/src/cmd/map_ascii.rs index 747d64f..5cfcc0c 100644 --- a/bin/mw_datatool/src/cmd/map_ascii.rs +++ b/bin/mw_datatool/src/cmd/map_ascii.rs @@ -22,7 +22,7 @@ pub fn main(common: &CommonArgs, _args: &MapAsciiArgs) -> AnyResult<()> { let (_, mut isr) = mfr.read_is()?; let map: MapDataTopo = isr.read_map_dyntopo(Some(&mut scratch), false)?; - let cits = isr.read_cits()?; + let cits = isr.read_cits_pos()?; fn f_tile_ascii(cits: &[Pos], pos: Pos, kind: TileKind) -> u8 { if cits.iter().position(|p| *p == pos).is_some() { diff --git a/bin/mw_datatool/src/cmd/reencode.rs b/bin/mw_datatool/src/cmd/reencode.rs index 8fe3567..5b8a68c 100644 --- a/bin/mw_datatool/src/cmd/reencode.rs +++ b/bin/mw_datatool/src/cmd/reencode.rs @@ -89,8 +89,9 @@ fn reencode(reader: R, writer: W, args: &Reenco } }; - let cits = isr.read_cits()?; - let b_is = b_is.with_cits(cits.iter().cloned())?; + let cit_pos = isr.read_cits_pos()?.to_owned(); + let iter_cit_names = isr.read_cits_names()?; + let b_is = b_is.with_cits(cit_pos.iter().cloned().zip(iter_cit_names))?; let b_is = if args.anonymize || isr.is_anonymized() { b_is.with_anonymous_players(isr.n_players())? diff --git a/doc/src/tech/dataformat-player.md b/doc/src/tech/dataformat-player.md index 8ed2569..48e12f0 100644 --- a/doc/src/tech/dataformat-player.md +++ b/doc/src/tech/dataformat-player.md @@ -46,9 +46,11 @@ It begins with a header: - `u8`: map size (radius) - `u8`: number of players - `u8`: number of cities/regions - - `u16`: length of the whole Initialization Sequence / offset at which Messages will start - - `u16`: length of player names data (0 for an anonymized stream) - `u32`: length of compressed map data in bytes + - `u16`: length of the Rules data + - `u16`: length of the Cits names data + - `u16`: length of the player names data (0 for an anonymized stream) + - `u16`: (reserved) The `flags` field is encoded as follows: @@ -108,9 +110,17 @@ Hex example: After the map data, regions are encoded the same way: one byte per tile, in concentric ring order. The byte is the city/region ID for that tile. -### City Locations +### City Info -Then follows the list of city coordinates. +First, locations for each city on the map: + - `(u8, u8)`: (y, x) location + +Then, names for each city on the map: + - `u8`: length in bytes + - …: phonemes + +The name uses a special Phoneme encoding (undocumented, see source code), +which can be rendered/localized based on client language. ### Player Names diff --git a/lib/mw_common/src/lib.rs b/lib/mw_common/src/lib.rs index c66d899..145512d 100644 --- a/lib/mw_common/src/lib.rs +++ b/lib/mw_common/src/lib.rs @@ -52,4 +52,5 @@ pub mod algo; pub mod driver; pub mod grid; pub mod plid; +pub mod phoneme; pub mod game; diff --git a/lib/mw_common/src/phoneme.rs b/lib/mw_common/src/phoneme.rs new file mode 100644 index 0000000..17f378c --- /dev/null +++ b/lib/mw_common/src/phoneme.rs @@ -0,0 +1,439 @@ +//! Phoneme Encoding for Place Names + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum Ph { + Space, + A, + E, + I, + O, + U, + Ya, + Ye, + Yi, + Yo, + Yu, + B, + Ch, + D, + F, + G, + H, + K, + Kh, + L, + M, + N, + P, + R, + S, + Sh, + T, + V, + Z, + Ts, + Zh, + Dj, + Bb, + Cch, + Dd, + Ff, + Gg, + Kk, + Ll, + Mm, + Nn, + Pp, + Rr, + Ss, + Ssh, + Tt, + Vv, + Zz, +} + +pub trait RenderPhoneme { + fn render(self, prev: Self, next: Self, out: &mut String); +} + +pub fn render_str(phs: &[Ph]) -> String +where Ph: RenderPhoneme, +{ + let mut s = String::new(); + for i in 0..phs.len() { + let cur = phs[i]; + let prev = if i == 0 { + Ph::Space + } else { + phs[i - 1] + }; + let next = if i == phs.len() - 1 { + Ph::Space + } else { + phs[i + 1] + }; + cur.render(prev, next, &mut s); + } + s +} + +impl From for u8 { + fn from(value: Ph) -> Self { + value as u8 + } +} + +impl Ph { + pub fn is_vowel(self) -> bool { + self.is_vowel_simple() || self.is_vowel_iotated() + } + pub fn is_vowel_simple(self) -> bool { + match self { + Ph::A | Ph::E | Ph::I | Ph::O | Ph::U => true, + _ => false, + } + } + pub fn is_vowel_iotated(self) -> bool { + match self { + Ph::Ya | Ph::Ye | Ph::Yi | Ph::Yo | Ph::Yu => true, + _ => false, + } + } + pub fn is_consonant_single(self) -> bool { + match self { + Ph::B | Ph::Ch | Ph::D | Ph::F | Ph::G | Ph::H | Ph::K | Ph::Kh | Ph::L | Ph::M | Ph::N | Ph::P | Ph::R | Ph::S | Ph::Sh | Ph::T | Ph::V | Ph::Z | Ph::Ts | Ph::Zh | Ph::Dj => true, + _ => false, + } + } + pub fn is_consonant_double(self) -> bool { + match self { + Ph::Bb | Ph::Cch | Ph::Dd | Ph::Ff | Ph::Gg | Ph::Kk | Ph::Ll | Ph::Mm | Ph::Nn | Ph::Pp | Ph::Rr | Ph::Ss | Ph::Ssh | Ph::Tt | Ph::Vv | Ph::Zz => true, + _ => false, + } + } +} + +pub mod lang { + use super::*; + + /// English + pub struct EN; + /// Bulgarian + pub struct BG; + /// Russian + pub struct RU; + /// Ukrainian + pub struct UK; + /// Serbian + pub struct SR; + + impl RenderPhoneme for Ph { + fn render(self, prev: Self, next: Self, out: &mut String) { + out.push_str(match (prev, self, next) { + (_, Ph::Space, _) => " ", + (_, Ph::A, _) => "a", + (_, Ph::E, _) => "e", + (_, Ph::O, _) => "o", + (_, Ph::U, _) => "u", + (_, Ph::I, _) => "i", + (_, Ph::Ya, _) => "ya", + (_, Ph::Ye, _) => "ye", + (_, Ph::Yo, _) => "yo", + (_, Ph::Yu, _) => "yu", + (_, Ph::Yi, _) => "yi", + (_, Ph::B, _) => "b", + (_, Ph::Bb, _) => "bb", + (_, Ph::Ch, _) => "ch", + (_, Ph::Cch, _) => "cch", + (_, Ph::D, _) => "d", + (_, Ph::Dd, _) => "dd", + (_, Ph::F, _) => "f", + (_, Ph::Ff, _) => "ff", + (_, Ph::G, _) => "g", + (_, Ph::Gg, _) => "gg", + (_, Ph::H, _) => "h", + (_, Ph::K, _) => "k", + (_, Ph::Kk, _) => "kk", + (_, Ph::Kh, _) => "kh", + (_, Ph::L, _) => "l", + (_, Ph::Ll, _) => "ll", + (_, Ph::M, _) => "m", + (_, Ph::Mm, _) => "mm", + (_, Ph::N, _) => "n", + (_, Ph::Nn, _) => "nn", + (_, Ph::P, _) => "p", + (_, Ph::Pp, _) => "pp", + (_, Ph::R, _) => "r", + (_, Ph::Rr, _) => "rr", + (_, Ph::S, _) => "s", + (_, Ph::Ss, _) => "ss", + (_, Ph::Sh, _) => "sh", + (_, Ph::Ssh, _) => "ssh", + (_, Ph::T, _) => "t", + (_, Ph::Tt, _) => "tt", + (_, Ph::V, _) => "v", + (_, Ph::Vv, _) => "vv", + (_, Ph::Z, _) => "z", + (_, Ph::Zz, _) => "zz", + (_, Ph::Ts, _) => "ts", + (_, Ph::Zh, _) => "zh", + (_, Ph::Dj, Ph::Space) => "dge", + (_, Ph::Dj, _) => "j", + }); + } + } + + impl RenderPhoneme for Ph { + fn render(self, prev: Self, next: Self, out: &mut String) { + out.push_str(match (prev, self, next) { + (_, Ph::Space, _) => " ", + (_, Ph::A, _) => "а", + (_, Ph::E, _) => "е", + (_, Ph::O, _) => "о", + (_, Ph::U, _) => "у", + (_, Ph::I, _) => "и", + (_, Ph::Ya, _) => "я", + (Ph::Space, Ph::Ye, _) => "йе", + (p, Ph::Ye, _) if p.is_vowel() => "йе", + (_, Ph::Ye, _) => "ье", + (Ph::Space, Ph::Yo, _) => "йо", + (p, Ph::Yo, _) if p.is_vowel() => "йо", + (_, Ph::Yo, _) => "ьо", + (_, Ph::Yu, _) => "ю", + (Ph::Space, Ph::Yi, _) => "йи", + (p, Ph::Yi, _) if p.is_vowel() => "йи", + (_, Ph::Yi, _) => "ьи", + (_, Ph::B, _) => "б", + (_, Ph::Bb, _) => "бб", + (_, Ph::Ch, _) => "ч", + (_, Ph::Cch, _) => "тч", + (_, Ph::D, _) => "д", + (_, Ph::Dd, _) => "дд", + (_, Ph::F, _) => "ф", + (_, Ph::Ff, _) => "фф", + (_, Ph::G, _) => "г", + (_, Ph::Gg, _) => "гг", + (_, Ph::H, _) => "х", + (_, Ph::K, _) => "к", + (_, Ph::Kk, _) => "кк", + (_, Ph::Kh, _) => "кх", + (_, Ph::L, _) => "л", + (_, Ph::Ll, _) => "лл", + (_, Ph::M, _) => "м", + (_, Ph::Mm, _) => "мм", + (_, Ph::N, _) => "н", + (_, Ph::Nn, _) => "нн", + (_, Ph::P, _) => "п", + (_, Ph::Pp, _) => "пп", + (_, Ph::R, _) => "р", + (_, Ph::Rr, _) => "рр", + (_, Ph::S, _) => "с", + (_, Ph::Ss, _) => "сс", + (_, Ph::Sh, Ph::T) => "щ", + (_, Ph::Sh, _) => "ш", + (_, Ph::Ssh, _) => "шш", + (Ph::Sh, Ph::T, _) => "", + (_, Ph::T, _) => "т", + (_, Ph::Tt, _) => "тт", + (_, Ph::V, _) => "в", + (_, Ph::Vv, _) => "вв", + (_, Ph::Z, _) => "з", + (_, Ph::Zz, _) => "зз", + (_, Ph::Ts, _) => "ц", + (_, Ph::Zh, _) => "ж", + (_, Ph::Dj, _) => "дж", + }); + } + } + + impl RenderPhoneme for Ph { + fn render(self, prev: Self, next: Self, out: &mut String) { + out.push_str(match (prev, self, next) { + (_, Ph::Space, _) => " ", + (_, Ph::A, _) => "а", + (Ph::Zh, Ph::E, _) => "е", + (Ph::Ts, Ph::E, _) => "е", + (Ph::Sh, Ph::E, _) => "е", + (Ph::Ssh, Ph::E, _) => "е", + (Ph::Ch, Ph::E, _) => "е", + (Ph::Cch, Ph::E, _) => "е", + (_, Ph::E, _) => "э", + (_, Ph::O, _) => "о", + (_, Ph::U, _) => "у", + (_, Ph::I, _) => "и", + (_, Ph::Ya, _) => "я", + (_, Ph::Ye, _) => "е", + (_, Ph::Yo, _) => "ё", + (_, Ph::Yu, _) => "ю", + (Ph::Space, Ph::Yi, _) => "йи", + (p, Ph::Yi, _) if p.is_vowel() => "йи", + (_, Ph::Yi, _) => "ьи", + (_, Ph::B, _) => "б", + (_, Ph::Bb, _) => "бб", + (_, Ph::Ch, Ph::Space) => "чь", + (Ph::Sh, Ph::Ch, _) => "щ", + (_, Ph::Ch, _) => "ч", + (_, Ph::Cch, Ph::Space) => "тчь", + (_, Ph::Cch, _) => "тч", + (_, Ph::D, _) => "д", + (_, Ph::Dd, _) => "дд", + (_, Ph::F, _) => "ф", + (_, Ph::Ff, _) => "фф", + (_, Ph::G, _) => "г", + (_, Ph::Gg, _) => "гг", + (_, Ph::H, _) => "г", + (_, Ph::K, _) => "к", + (_, Ph::Kk, _) => "кк", + (_, Ph::Kh, _) => "х", + (_, Ph::L, Ph::Space) => "лл", + (_, Ph::L, _) => "л", + (_, Ph::Ll, _) => "лл", + (_, Ph::M, _) => "м", + (_, Ph::Mm, _) => "мм", + (_, Ph::N, _) => "н", + (_, Ph::Nn, _) => "нн", + (_, Ph::P, _) => "п", + (_, Ph::Pp, _) => "пп", + (_, Ph::R, _) => "р", + (_, Ph::Rr, _) => "рр", + (_, Ph::S, _) => "с", + (_, Ph::Ss, _) => "сс", + (_, Ph::Sh, Ph::Ch) => "щ", + (_, Ph::Sh, _) => "ш", + (_, Ph::Ssh, _) => "шш", + (_, Ph::T, _) => "т", + (_, Ph::Tt, _) => "тт", + (_, Ph::V, _) => "в", + (_, Ph::Vv, _) => "вв", + (_, Ph::Z, _) => "з", + (_, Ph::Zz, _) => "зз", + (_, Ph::Ts, _) => "ц", + (_, Ph::Zh, _) => "ж", + (_, Ph::Dj, _) => "дж", + }); + } + } + + impl RenderPhoneme for Ph { + fn render(self, prev: Self, next: Self, out: &mut String) { + out.push_str(match (prev, self, next) { + (_, Ph::Space, _) => " ", + (_, Ph::A, _) => "а", + (_, Ph::E, _) => "е", + (_, Ph::O, _) => "о", + (_, Ph::U, _) => "у", + (_, Ph::I, _) => "і", + (_, Ph::Ya, _) => "я", + (_, Ph::Ye, _) => "є", + (Ph::Space, Ph::Yo, _) => "йо", + (p, Ph::Yo, _) if p.is_vowel() => "йо", + (_, Ph::Yo, _) => "ьо", + (_, Ph::Yu, _) => "ю", + (_, Ph::Yi, _) => "ї", + (_, Ph::B, _) => "б", + (_, Ph::Bb, _) => "бб", + (Ph::Sh, Ph::Ch, _) => "щ", + (_, Ph::Ch, _) => "ч", + (_, Ph::Cch, _) => "тч", + (_, Ph::D, _) => "д", + (_, Ph::Dd, _) => "дд", + (_, Ph::F, _) => "ф", + (_, Ph::Ff, _) => "фф", + (_, Ph::G, _) => "ґ", + (_, Ph::Gg, _) => "ґґ", + (_, Ph::H, _) => "г", + (_, Ph::K, _) => "к", + (_, Ph::Kk, _) => "кк", + (_, Ph::Kh, _) => "х", + (_, Ph::L, _) => "л", + (_, Ph::Ll, _) => "лл", + (_, Ph::M, _) => "м", + (_, Ph::Mm, _) => "мм", + (_, Ph::N, _) => "н", + (_, Ph::Nn, _) => "нн", + (_, Ph::P, _) => "п", + (_, Ph::Pp, _) => "пп", + (_, Ph::R, _) => "р", + (_, Ph::Rr, _) => "рр", + (_, Ph::S, _) => "с", + (_, Ph::Ss, _) => "сс", + (_, Ph::Sh, Ph::Ch) => "щ", + (_, Ph::Sh, _) => "ш", + (_, Ph::Ssh, _) => "шш", + (_, Ph::T, _) => "т", + (_, Ph::Tt, _) => "тт", + (_, Ph::V, _) => "в", + (_, Ph::Vv, _) => "вв", + (_, Ph::Z, _) => "з", + (_, Ph::Zz, _) => "зз", + (_, Ph::Ts, _) => "ц", + (_, Ph::Zh, _) => "ж", + (_, Ph::Dj, _) => "дж", + }); + } + } + + impl RenderPhoneme for Ph { + fn render(self, prev: Self, next: Self, out: &mut String) { + out.push_str(match (prev, self, next) { + (_, Ph::Space, _) => " ", + (_, Ph::A, _) => "а", + (_, Ph::E, _) => "е", + (_, Ph::O, _) => "о", + (_, Ph::U, _) => "у", + (_, Ph::I, _) => "и", + (Ph::N, v, _) if v.is_vowel_iotated() => "", + (Ph::L, v, _) if v.is_vowel_iotated() => "", + (Ph::Nn, v, _) if v.is_vowel_iotated() => "", + (Ph::Ll, v, _) if v.is_vowel_iotated() => "", + (_, Ph::Ya, _) => "ја", + (_, Ph::Ye, _) => "је", + (_, Ph::Yo, _) => "јо", + (_, Ph::Yu, _) => "ју", + (_, Ph::Yi, _) => "ји", + (_, Ph::B, _) => "б", + (_, Ph::Bb, _) => "бб", + (_, Ph::Ch, _) => "ћ", + (_, Ph::Cch, _) => "ч", + (_, Ph::D, _) => "д", + (_, Ph::Dd, _) => "дд", + (_, Ph::F, _) => "ф", + (_, Ph::Ff, _) => "фф", + (_, Ph::G, _) => "г", + (_, Ph::Gg, _) => "гг", + (_, Ph::H, _) => "х", + (_, Ph::K, _) => "к", + (_, Ph::Kk, _) => "кк", + (_, Ph::Kh, _) => "кх", + (_, Ph::L, v) if v.is_vowel_iotated() => "љ", + (_, Ph::L, _) => "л", + (_, Ph::Ll, v) if v.is_vowel_iotated() => "лљ", + (_, Ph::Ll, _) => "лл", + (_, Ph::M, _) => "м", + (_, Ph::Mm, _) => "мм", + (_, Ph::N, v) if v.is_vowel_iotated() => "њ", + (_, Ph::N, _) => "н", + (_, Ph::Nn, v) if v.is_vowel_iotated() => "нњ", + (_, Ph::Nn, _) => "нн", + (_, Ph::P, _) => "п", + (_, Ph::Pp, _) => "пп", + (_, Ph::R, _) => "р", + (_, Ph::Rr, _) => "рр", + (_, Ph::S, _) => "с", + (_, Ph::Ss, _) => "сс", + (_, Ph::Sh, _) => "ш", + (_, Ph::Ssh, _) => "шш", + (_, Ph::T, _) => "т", + (_, Ph::Tt, _) => "тт", + (_, Ph::V, _) => "в", + (_, Ph::Vv, _) => "вв", + (_, Ph::Z, _) => "з", + (_, Ph::Zz, _) => "зз", + (_, Ph::Ts, _) => "ц", + (_, Ph::Zh, _) => "ж", + (_, Ph::Dj, _) => "ђ", + }); + } + } +} diff --git a/lib/mw_dataformat/src/header.rs b/lib/mw_dataformat/src/header.rs index c22a6ad..96766d4 100644 --- a/lib/mw_dataformat/src/header.rs +++ b/lib/mw_dataformat/src/header.rs @@ -24,9 +24,11 @@ pub struct ISHeader { pub map_size: u8, pub n_players: u8, pub n_regions: u8, + pub len_mapdata_compressed: u32, pub len_rules: u16, + pub len_citdata_names: u16, pub len_playerdata: u16, - pub len_mapdata_compressed: u32, + pub reserved0: u16, } /// The MineWars File Header Extras @@ -117,12 +119,6 @@ impl ISHeader { pub fn len_total_is(&self) -> usize { Self::serialized_len() + self.len_total_data() } - pub fn len_total_data(&self) -> usize { - self.len_mapdata_compressed() - + self.len_citdata() - + self.len_playerdata() - + self.len_rules() - } pub fn len_rules(&self) -> usize { self.len_rules as usize } @@ -138,25 +134,40 @@ impl ISHeader { pub fn len_playerdata(&self) -> usize { self.len_playerdata as usize } - pub fn len_citdata(&self) -> usize { - // 2 bytes per cit position + pub fn len_citdata_names(&self) -> usize { + self.len_citdata_names as usize + } + pub fn len_citdata_pos(&self) -> usize { 2 * self.n_regions as usize } pub fn offset_mapdata(&self) -> usize { 0 } - pub fn offset_citdata(&self) -> usize { + pub fn offset_citdata_pos(&self) -> usize { self.len_mapdata_compressed() } + pub fn offset_citdata_names(&self) -> usize { + self.len_mapdata_compressed() + + self.len_citdata_pos() + } pub fn offset_playerdata(&self) -> usize { self.len_mapdata_compressed() - + self.len_citdata() + + self.len_citdata_pos() + + self.len_citdata_names() } pub fn offset_rules(&self) -> usize { self.len_mapdata_compressed() - + self.len_citdata() + + self.len_citdata_pos() + + self.len_citdata_names() + self.len_playerdata() } + pub fn len_total_data(&self) -> usize { + self.len_mapdata_compressed() + + self.len_citdata_pos() + + self.len_citdata_names() + + self.len_playerdata() + + self.len_rules() + } } impl MwFileHeader { diff --git a/lib/mw_dataformat/src/read.rs b/lib/mw_dataformat/src/read.rs index 684f1e9..86d24bf 100644 --- a/lib/mw_dataformat/src/read.rs +++ b/lib/mw_dataformat/src/read.rs @@ -1,6 +1,6 @@ //! Reading/Decoding MineWars Data Streams or Files -use mw_common::grid::*; +use mw_common::{grid::*, phoneme::Ph}; use thiserror::Error; use std::{io::{Cursor, Read, Seek, SeekFrom}, iter::FusedIterator}; @@ -299,12 +299,22 @@ impl<'b, R: Read + Seek> MwISReader<'b, R> { } } } - pub fn read_cits(&mut self) -> Result<&[Pos], MwReaderError> { - self.buf.resize(self.is_header.len_citdata(), 0); - self.reader.seek(SeekFrom::Start(self.off_data as u64 + self.is_header.offset_citdata() as u64))?; + pub fn read_cits_pos(&mut self) -> Result<&[Pos], MwReaderError> { + self.buf.resize(self.is_header.len_citdata_pos(), 0); + self.reader.seek(SeekFrom::Start(self.off_data as u64 + self.is_header.offset_citdata_pos() as u64))?; self.reader.read_exact(self.buf)?; Ok(bytemuck::cast_slice(&self.buf)) } + pub fn read_cits_names(&mut self) -> Result, MwReaderError> { + self.buf.resize(self.is_header.len_citdata_names(), 0); + self.reader.seek(SeekFrom::Start(self.off_data as u64 + self.is_header.offset_citdata_names() as u64))?; + self.reader.read_exact(self.buf)?; + Ok(CitNamesIter { + current_cit: 0, + total_cits: self.n_regions(), + buf: self.buf + }) + } pub fn read_players(&mut self) -> Result, MwReaderError> { self.buf.resize(self.is_header.len_playerdata(), 0); self.reader.seek(SeekFrom::Start(self.off_data as u64 + self.is_header.offset_playerdata() as u64))?; @@ -351,3 +361,40 @@ impl<'b> ExactSizeIterator for PlayerNamesIter<'b> { } impl<'b> FusedIterator for PlayerNamesIter<'b> {} + +pub struct CitNamesIter<'b> { + current_cit: u8, + total_cits: u8, + buf: &'b [u8], +} + +impl<'b> Iterator for CitNamesIter<'b> { + type Item = &'b [Ph]; + fn next(&mut self) -> Option { + if self.current_cit >= self.total_cits { + return None; + } + self.current_cit += 1; + if self.buf.is_empty() { + return Some(&[]); + } + let strlen = (self.buf[0] as usize).min(self.buf.len() - 1); + self.buf = &self.buf[1..]; + let (out, rem) = self.buf.split_at(strlen); + self.buf = rem; + unsafe { + Some(std::mem::transmute(out)) + } + } + fn size_hint(&self) -> (usize, Option) { + (self.len(), Some(self.len())) + } +} + +impl<'b> ExactSizeIterator for CitNamesIter<'b> { + fn len(&self) -> usize { + (self.total_cits - self.current_cit) as usize + } +} + +impl<'b> FusedIterator for CitNamesIter<'b> {} diff --git a/lib/mw_dataformat/src/write.rs b/lib/mw_dataformat/src/write.rs index 30e3e8b..d8b48b1 100644 --- a/lib/mw_dataformat/src/write.rs +++ b/lib/mw_dataformat/src/write.rs @@ -3,7 +3,7 @@ use seahash::SeaHasher; use thiserror::Error; use std::{hash::Hasher, io::{Cursor, Seek, SeekFrom, Write}}; -use mw_common::grid::*; +use mw_common::{grid::*, phoneme::Ph}; use crate::{header::{ISHeader, MwFileHeader}, map::MapTileDataOut}; @@ -410,19 +410,39 @@ impl<'b, W: Write + Seek> MwISBuilderWithMap<'b, W> { hash: self.hasher.map(|h| h.finish()), }) } - pub fn with_cits(mut self, cit_locations: impl IntoIterator) -> Result, MwWriterError> { + pub fn with_cits<'a>(mut self, cit_locations: impl IntoIterator) -> Result, MwWriterError> { self.buf.clear(); + self.buf.resize(2 * 256, 0); let mut count = 0; - for cit in cit_locations { - self.buf.extend_from_slice(&cit.y().to_be_bytes()); - self.buf.extend_from_slice(&cit.x().to_be_bytes()); + for (pos, name) in cit_locations { + if count == 255 { + break; + } + self.buf[count * 2 + 0] = pos.y() as u8; + self.buf[count * 2 + 1] = pos.x() as u8; + if name.len() >= 256 { + self.buf.push(0); + } else { + let name_bytes: &[u8] = unsafe { + std::mem::transmute(name) + }; + self.buf.push(name_bytes.len() as u8); + self.buf.extend_from_slice(name_bytes); + } count += 1; } - self.header.n_regions = count; - if let Some(ref mut h) = &mut self.hasher { - h.write(self.buf); + self.header.n_regions = count as u8; + { + let b_pos = &self.buf[..(count * 2)]; + let b_names = &self.buf[(2 * 256)..]; + self.header.len_citdata_names = b_names.len() as u16; + if let Some(ref mut h) = &mut self.hasher { + h.write(b_pos); + h.write(b_names); + } + self.writer.write_all(b_pos)?; + self.writer.write_all(b_names)?; } - self.writer.write_all(self.buf)?; Ok(MwISBuilderWithCits { buf: self.buf, off_header: self.off_header, @@ -462,8 +482,12 @@ impl<'b, W: Write + Seek> MwISBuilderWithCits<'b, W> { self.buf.clear(); let mut count = 0; for name in names { - self.buf.push(name.len() as u8); // TODO: handle overflow - self.buf.extend_from_slice(name.as_bytes()); + if name.len() >= 255 { + self.buf.push(0); + } else { + self.buf.push(name.len() as u8); + self.buf.extend_from_slice(name.as_bytes()); + } count += 1; } self.header.n_players = count;