Skip to content

Commit

Permalink
use encoding_us_utf8() for ncdirect
Browse files Browse the repository at this point in the history
  • Loading branch information
dankamongmen committed Jun 5, 2024
1 parent bfc0502 commit e631d68
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 37 deletions.
3 changes: 2 additions & 1 deletion src/lib/direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -893,8 +893,9 @@ ncdirect* ncdirect_core_init(const char* termtype, FILE* outfp, uint64_t flags){
}
const char* encoding = nl_langinfo(CODESET);
bool utf8 = false;
if(encoding && strcmp(encoding, "UTF-8") == 0){
if(encoding && encoding_is_utf8(encoding)){
utf8 = true;
ncmetric_use_utf8();
}
if(setup_signals(ret, (flags & NCDIRECT_OPTION_NO_QUIT_SIGHANDLERS),
true, ncdirect_stop_minimal)){
Expand Down
27 changes: 27 additions & 0 deletions src/lib/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,33 @@ int putenv_term(const char* termname) __attribute__ ((nonnull (1)));
int set_loglevel_from_env(ncloglevel_e* loglevel)
__attribute__ ((nonnull (1)));

// glibc's _nl_normalize_charset() converts to lowercase, removing everything
// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
static inline bool
encoding_is_utf8(const char *enc){
if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
enc += 2; // skip initial "cs" if present.
}
const char utfstr[] = "utf8";
const char* match = utfstr;
while(*enc){
if(isalnum(*enc)){ // we only care about alnums
if(tolower(*enc) != tolower(*match)){
return false;
}
++match;
}
++enc;
}
if(*match){
return false;
}
return true;
}

// tell ncmetric that utf8 is available. should be per-context, but isn't.
void ncmetric_use_utf8(void);

#undef API
#undef ALLOC

Expand Down
25 changes: 13 additions & 12 deletions src/lib/metric.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,27 @@

static const wchar_t UTF8_SUBPREFIX[] = L"mµnpfazy"; // 10^24-1
static const wchar_t ASCII_SUBPREFIX[] = L"munpfazy"; // 10^24-1

// we want to use UTF8_SUBPREFIX if we have utf8 available to us. we could
// pull this out of const struct notcurses*, except these ncnmetric() doesn't
// take one, and we don't want to break the API. instead, we call this from
// notcurses_init() when we create a utf8 context. a gross hack =\.
static pthread_once_t utf8_verdict = PTHREAD_ONCE_INIT;
static const wchar_t* SUBPREFIXES = ASCII_SUBPREFIX;
static pthread_once_t utf8_detector = PTHREAD_ONCE_INIT;

// sure hope we've called setlocale() by the time we hit this!
static void
detect_utf8(void){
const char* encoding = nl_langinfo(CODESET);
if(encoding){
if(strcmp(encoding, "UTF-8") == 0){
SUBPREFIXES = UTF8_SUBPREFIX;
}
}
ncmetric_use_utf8_internal(void){
SUBPREFIXES = UTF8_SUBPREFIX;
}

void ncmetric_use_utf8(void){
pthread_once(&utf8_verdict, ncmetric_use_utf8_internal);
}

const char* ncnmetric(uintmax_t val, size_t s, uintmax_t decimal,
char* buf, int omitdec, uintmax_t mult,
int uprefix){
// FIXME this is global to the process...ick :/
fesetround(FE_TONEAREST);
pthread_once(&utf8_detector, detect_utf8);
fesetround(FE_TONEAREST); // FIXME global to the process...ick :/
// these two must have the same number of elements
const wchar_t* subprefixes = SUBPREFIXES;
const wchar_t prefixes[] = L"KMGTPEZY"; // 10^21-1 encompasses 2^64-1
Expand Down
27 changes: 3 additions & 24 deletions src/lib/notcurses.c
Original file line number Diff line number Diff line change
Expand Up @@ -1095,30 +1095,6 @@ int ncplane_destroy_family(ncplane *ncp){
return ret;
}

// glibc's _nl_normalize_charset() converts to lowercase, removing everything
// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
static bool
encoding_is_utf8(const char *enc){
if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
enc += 2; // skip initial "cs" if present.
}
const char utfstr[] = "utf8";
const char* match = utfstr;
while(*enc){
if(isalnum(*enc)){ // we only care about alnums
if(tolower(*enc) != tolower(*match)){
return false;
}
++match;
}
++enc;
}
if(*match){
return false;
}
return true;
}

// it's critical that we're using UTF-8 encoding if at all possible. since the
// client might not have called setlocale(2) (if they weren't reading the
// directions...), go ahead and try calling setlocale(LC_ALL, "") and then
Expand Down Expand Up @@ -1271,6 +1247,9 @@ notcurses_early_init(const struct notcurses_options* opts, FILE* fp, unsigned* u
free(ret);
return NULL;
}
if(utf8){
ncmetric_use_utf8();
}
return ret;
}

Expand Down

0 comments on commit e631d68

Please sign in to comment.