From e631d6868f5d29b3a9e449b2999dbad2c1ebdfd3 Mon Sep 17 00:00:00 2001
From: nick black <dankamongmen@gmail.com>
Date: Wed, 5 Jun 2024 09:47:59 -0400
Subject: [PATCH] use encoding_us_utf8() for ncdirect

---
 src/lib/direct.c    |  3 ++-
 src/lib/internal.h  | 27 +++++++++++++++++++++++++++
 src/lib/metric.c    | 25 +++++++++++++------------
 src/lib/notcurses.c | 27 +++------------------------
 4 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/src/lib/direct.c b/src/lib/direct.c
index c77fd0d44f..1667acd245 100644
--- a/src/lib/direct.c
+++ b/src/lib/direct.c
@@ -893,8 +893,9 @@ ncdirect* ncdirect_core_init(const char* termtype, FILE* outfp, uint64_t flags){
   }
   const char* encoding = nl_langinfo(CODESET);
   bool utf8 = false;
-  if(encoding && strcmp(encoding, "UTF-8") == 0){
+  if(encoding && encoding_is_utf8(encoding)){
     utf8 = true;
+    ncmetric_use_utf8();
   }
   if(setup_signals(ret, (flags & NCDIRECT_OPTION_NO_QUIT_SIGHANDLERS),
                    true, ncdirect_stop_minimal)){
diff --git a/src/lib/internal.h b/src/lib/internal.h
index 168d3f1a9a..49a08bbbaf 100644
--- a/src/lib/internal.h
+++ b/src/lib/internal.h
@@ -1889,6 +1889,33 @@ int putenv_term(const char* termname) __attribute__ ((nonnull (1)));
 int set_loglevel_from_env(ncloglevel_e* loglevel)
   __attribute__ ((nonnull (1)));
 
+// glibc's _nl_normalize_charset() converts to lowercase, removing everything
+// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
+static inline bool
+encoding_is_utf8(const char *enc){
+  if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
+    enc += 2; // skip initial "cs" if present.
+  }
+  const char utfstr[] = "utf8";
+  const char* match = utfstr;
+  while(*enc){
+    if(isalnum(*enc)){ // we only care about alnums
+      if(tolower(*enc) != tolower(*match)){
+        return false;
+      }
+      ++match;
+    }
+    ++enc;
+  }
+  if(*match){
+    return false;
+  }
+  return true;
+}
+
+// tell ncmetric that utf8 is available. should be per-context, but isn't.
+void ncmetric_use_utf8(void);
+
 #undef API
 #undef ALLOC
 
diff --git a/src/lib/metric.c b/src/lib/metric.c
index ab78ed7511..26bb9d6f9b 100644
--- a/src/lib/metric.c
+++ b/src/lib/metric.c
@@ -8,26 +8,27 @@
 
 static const wchar_t UTF8_SUBPREFIX[] = L"mµnpfazy"; // 10^24-1
 static const wchar_t ASCII_SUBPREFIX[] = L"munpfazy"; // 10^24-1
+
+// we want to use UTF8_SUBPREFIX if we have utf8 available to us. we could
+// pull this out of const struct notcurses*, except these ncnmetric() doesn't
+// take one, and we don't want to break the API. instead, we call this from
+// notcurses_init() when we create a utf8 context. a gross hack =\.
+static pthread_once_t utf8_verdict = PTHREAD_ONCE_INIT;
 static const wchar_t* SUBPREFIXES = ASCII_SUBPREFIX;
-static pthread_once_t utf8_detector = PTHREAD_ONCE_INIT;
 
-// sure hope we've called setlocale() by the time we hit this!
 static void
-detect_utf8(void){
-  const char* encoding = nl_langinfo(CODESET);
-  if(encoding){
-    if(strcmp(encoding, "UTF-8") == 0){
-      SUBPREFIXES = UTF8_SUBPREFIX;
-    }
-  }
+ncmetric_use_utf8_internal(void){
+  SUBPREFIXES = UTF8_SUBPREFIX;
+}
+
+void ncmetric_use_utf8(void){
+  pthread_once(&utf8_verdict, ncmetric_use_utf8_internal);
 }
 
 const char* ncnmetric(uintmax_t val, size_t s, uintmax_t decimal,
                       char* buf, int omitdec, uintmax_t mult,
                       int uprefix){
-  // FIXME this is global to the process...ick :/
-  fesetround(FE_TONEAREST);
-  pthread_once(&utf8_detector, detect_utf8);
+  fesetround(FE_TONEAREST); // FIXME global to the process...ick :/
   // these two must have the same number of elements
   const wchar_t* subprefixes = SUBPREFIXES;
   const wchar_t prefixes[] = L"KMGTPEZY"; // 10^21-1 encompasses 2^64-1
diff --git a/src/lib/notcurses.c b/src/lib/notcurses.c
index c5b96c7cbd..95ad3a6938 100644
--- a/src/lib/notcurses.c
+++ b/src/lib/notcurses.c
@@ -1095,30 +1095,6 @@ int ncplane_destroy_family(ncplane *ncp){
   return ret;
 }
 
-// glibc's _nl_normalize_charset() converts to lowercase, removing everything
-// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
-static bool
-encoding_is_utf8(const char *enc){
-  if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
-    enc += 2; // skip initial "cs" if present.
-  }
-  const char utfstr[] = "utf8";
-  const char* match = utfstr;
-  while(*enc){
-    if(isalnum(*enc)){ // we only care about alnums
-      if(tolower(*enc) != tolower(*match)){
-        return false;
-      }
-      ++match;
-    }
-    ++enc;
-  }
-  if(*match){
-    return false;
-  }
-  return true;
-}
-
 // it's critical that we're using UTF-8 encoding if at all possible. since the
 // client might not have called setlocale(2) (if they weren't reading the
 // directions...), go ahead and try calling setlocale(LC_ALL, "") and then
@@ -1271,6 +1247,9 @@ notcurses_early_init(const struct notcurses_options* opts, FILE* fp, unsigned* u
     free(ret);
     return NULL;
   }
+  if(utf8){
+    ncmetric_use_utf8();
+  }
   return ret;
 }