Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Locale-sensitive operations using U16String and common locale enumeration #2787

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions lib/core/locale.nit
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Enumeration of common locale names as well as default locale name
module locale is pkgconfig ("icu-io", "icu-i18n", "icu-uc")

`{
#include <string.h>
#include <unicode/uloc.h>
`}

private fun get_default_locale(locale: CString): Int `{
const char * default_locale = uloc_getDefault();
if (locale != NULL) {
strcpy(locale, default_locale);
}
return strlen(default_locale);
`}

redef class Sys
fun default_locale: String do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be in an object itself added to Sys as to not clutter it more than it already is?

class Locales
	fun chinese_locale: String do return "zh"
	fun english_locale: String do return "en"
	# ...
end

redef class Sys
	fun locales do return new Locales
end

@privat WDYT?

var required_length = get_default_locale(new CString.nul)
var locale = new CString(required_length + 1)
get_default_locale(locale)
return locale.to_s
end
end
114 changes: 114 additions & 0 deletions lib/core/text/case_modification.nit
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Locale-sensitive case modification (lowercasing, uppercasing and titlecasing)
module case_modification is pkgconfig ("icu-io", "icu-i18n", "icu-uc")

intrude import u16_string
import locale

`{
#include <unicode/utypes.h>
#include <unicode/ustring.h>
`}

redef class U16String
# Returns an uppercased `U16String` from `self` considering a `locale`
fun uni_to_upper(locale : CString) : U16String
do
var required_length = uchar_string.uni_to_upper(new UCharString.nul, 0, locale, code_units)
var result = new U16String(required_length + 1, required_length + 1)
uchar_string.uni_to_upper(result.uchar_string, result.capacity , locale, code_units)
return result
end

# Returns an lowercased `U16String` from `self` considering a `locale`
fun uni_to_lower(locale : CString) : U16String
do
var required_length = uchar_string.uni_to_lower(new UCharString.nul, 0, locale, code_units)
var result = new U16String(required_length + 1, required_length + 1)
uchar_string.uni_to_lower(result.uchar_string, result.capacity , locale, code_units)
return result
end

# Returns an titlecased `U16String` from `self` considering a `locale`
fun uni_to_title(locale : CString) : U16String
do
var required_length = uchar_string.uni_to_title(new UCharString.nul, 0, locale, code_units)
var result = new U16String(required_length + 1, required_length + 1)
uchar_string.uni_to_title(result.uchar_string, result.capacity , locale, code_units)
return result
end
end

redef class UCharString
# Returns the number of code units required for the uppercased `UCharString` from `self` and writes the resulting `UCharString` in `dest`
fun uni_to_upper(dest : UCharString, dest_cap : Int, locale : CString, src_length : Int) : Int `{
UErrorCode error = U_ZERO_ERROR;
int32_t res = u_strToUpper(dest, dest_cap, self, src_length, locale, &error);
return res;
`}

# Returns the number of code units required for the lowercased `UCharString` from `self` and writes the resulting `UCharString` in `dest`
fun uni_to_lower(dest : UCharString, dest_cap : Int, locale : CString, src_length : Int) : Int `{
UErrorCode error = U_ZERO_ERROR;
int32_t res = u_strToLower(dest, dest_cap, self, src_length, locale, &error);
return res;
`}

# Returns the number of code units required for the titlecased `UCharString` from `self` and writes the resulting `UCharString` in `dest`
fun uni_to_title(dest : UCharString, dest_cap : Int, locale : CString, src_length : Int) : Int `{
UErrorCode error = U_ZERO_ERROR;
int32_t res = u_strToTitle(dest, dest_cap, self, src_length, NULL, locale, &error);
return res;
`}
end

redef class String
# Returns an uppercased `String` from `self` considering an optional `locale` parameter
#
# ~~~raw
# assert "kedi".uni_to_upper("tr_TR") == "KEDİ"
# assert "kedi".uni_to_upper("en_US") != "KEDİ"
# ~~~
fun uni_to_upper(locale : nullable String) : String
do
if locale == null then locale = default_locale
return to_u16string.uni_to_upper(locale.to_cstring).to_s
end

# Returns a lowercased `String` from `self` considering an optional `locale` parameter
#
# ~~~raw
# assert "YAZIM".uni_to_lower("tr_TR") == "yazım"
# assert "YAZIM".uni_to_lower("en_US") != "yazım"
# ~~~
fun uni_to_lower(locale : nullable String) : String
do
if locale == null then locale = default_locale
return to_u16string.uni_to_lower(locale.to_cstring).to_s
end

# Returns a titlecased `String` from `self` considering an optional `locale` parameter
#
# ~~~raw
# assert "istanbul".uni_to_upper("tr_TR") == "İstanbul"
# assert "istanbul".uni_to_upper("en_US") != "Istanbul"
# ~~~
fun uni_to_title(locale : nullable String) : String
do
if locale == null then locale = default_locale
return to_u16string.uni_to_title(locale.to_cstring).to_s
end
end
181 changes: 181 additions & 0 deletions lib/core/text/formatting.nit
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Locale-sensitive number and date formatting module
module formatting is pkgconfig ("icu-io", "icu-i18n", "icu-uc")

intrude import u16_string
import date
import locale

`{
#include <unicode/utypes.h>
#include <unicode/ustring.h>
#include <unicode/unum.h>
#include <unicode/udat.h>
#include <time.h>
`}

# Wrapper for ICU's `UNumberFormatStyle` enum.
# There are 6 styles available : decimal, percent, ordinal, currency and spellout.
extern class NumberFormatStyle `{ UNumberFormatStyle `}
new (style : CString) `{
if (!strcmp(style, "decimal")) {
return UNUM_DECIMAL;
} else if (!strcmp(style, "percent")) {
return UNUM_PERCENT;
} else if (!strcmp(style, "scientific")) {
return UNUM_SCIENTIFIC;
} else if (!strcmp(style, "ordinal")) {
return UNUM_ORDINAL;
} else if (!strcmp(style, "currency")) {
return UNUM_CURRENCY;
} else if (!strcmp(style, "spellout")) {
return UNUM_SPELLOUT;
} else {
return UNUM_DEFAULT;
}
`}
end

# Wrapper for ICU's `UDateFormatStyle` enum.
# There are 4 styles available : full, long, medium and short.
extern class DateFormatStyle `{ UDateFormatStyle `}
new (style : CString) `{
if (!strcmp(style, "full")) {
return UDAT_FULL;
} else if (!strcmp(style, "long")) {
return UDAT_LONG;
} else if (!strcmp(style, "medium")) {
return UDAT_MEDIUM;
} else if (!strcmp(style, "short")) {
return UDAT_SHORT;
} else {
return UDAT_DEFAULT;
}
`}
end

# Wrapper for ICU's `UNumberFormat` structure
extern class NumberFormatter `{ UNumberFormat * `}
new (locale_name : CString, style : NumberFormatStyle) `{
UErrorCode error = U_ZERO_ERROR;
UNumberFormat* numberFormatter = unum_open(style, NULL, -1, locale_name, NULL, &error);
return numberFormatter;
`}

# Returns a formatted `UCharString` from a `Float`
fun format (number : Float, dest : UCharString, dest_length : Int) : Int `{
UErrorCode error = U_ZERO_ERROR;
UFieldPosition pos;
uint32_t required_length = unum_formatDouble(self, number, dest, dest_length, &pos, &error);
return required_length;
`}

redef fun free `{
unum_close(self);
`}
end

# Wrapper for ICU's `UDateFormat` structure
extern class TimeFormatter `{ UDateFormat *`}
new (locale_name : CString, style : DateFormatStyle) `{
UErrorCode error = U_ZERO_ERROR;
UDateFormat * dateFormatter = udat_open(style, UDAT_NONE, locale_name, NULL, -1, NULL, -1, &error);
return dateFormatter;
`}

# Returns a formatted `UCharString` from a `Int` (date in miliseconds)
fun format(time : Int, dest : UCharString, dest_length : Int) : Int `{
UErrorCode error = U_ZERO_ERROR;
extern long timezone;
tzset();
UDate date = time + timezone * 1000;
uint32_t required_length = udat_format(self, date, dest, dest_length, NULL, &error);
return required_length;
`}

redef fun free `{
udat_close(self);
`}
end

redef class Float
# Returns a formatted `String` from `self` considering a locale and a style.
# There are 6 styles available : decimal, percent, ordinal, currency and spellout.
# ~~~raw
# assert 12.7.format("spellout", "en_US") == "twelve point seven"
# ~~~
fun format(style : String, locale_name : nullable String) : String
do
if locale_name == null then locale_name = default_locale

var nf = new NumberFormatter(locale_name.to_cstring, new NumberFormatStyle(style.to_cstring))
var required_length = nf.format(self, new UCharString.nul, 0)
var result = new U16String(required_length + 1, required_length + 1)
nf.format(self, result.uchar_string, result.capacity)
nf.free
return result.to_s
end
end

redef class Int
# Returns a formatted `String` from `self` considering a locale and a style.
# There are 6 styles available : decimal, percent, ordinal, currency and spellout.
# ~~~raw
# assert 12.format("spellout", "en_US") == "twelve"
# ~~~
fun format(style : String, locale_name : nullable String) : String
do
return to_f.format(style, locale_name)
end
end

redef class Time
redef fun to_s : String do return format("", default_locale)

# Returns the number of miliseconds in `self`
# ~~~raw
# var time = new Time(5,5,5)
# assert time.to_ms == 18305000
# ~~~
fun to_ms : Int
do
var h = hour * 60 * 60 * 1000
var m = minute * 60 * 1000
var s = second * 1000

return h + m + s
end

# Returns a formatted `String` from `self` considring a locale and a style.
# There are 4 styles available : full, long, medium and short.
# ~~~raw
# var time = new Time(5,5,5)
# assert time.format("full", "en_US") == "5:05:05 AM"
# ~~~
fun format(style : String, locale_name : nullable String) : String
do
if locale_name == null then locale_name = default_locale

var df = new TimeFormatter(locale_name.to_cstring, new DateFormatStyle(style.to_cstring))
var required_length = df.format(to_ms, new UCharString.nul, 0)
var result = new U16String(required_length + 1, required_length + 1)
df.format(to_ms, result.uchar_string, required_length + 1)
df.free
return result.to_s
end
end
Loading