diff --git a/Makefile b/Makefile index 2b5e477..f1bdf76 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL ?= /bin/bash endif #JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout) -JAR_VERSION := 2.05 +JAR_VERSION := 2.06 JAR_FILE := mn2pdf-$(JAR_VERSION).jar all: target/$(JAR_FILE) diff --git a/README.adoc b/README.adoc index 6bff1ee..d6348fb 100644 --- a/README.adoc +++ b/README.adoc @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-2.05.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] +java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] ---- e.g. [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-2.05.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf +java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf ---- === PDF encryption features @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.: ---- org.metanorma.fop mn2pdf -2.05 +2.06 Metanorma XML to PDF converter ---- @@ -111,8 +111,8 @@ Tag the same version in Git: [source,xml] ---- -git tag v2.05 -git push origin v2.05 +git tag v2.06 +git push origin v2.06 ---- Then the corresponding GitHub release will be automatically created at: diff --git a/pom.xml b/pom.xml index 2b74eb7..e3a981d 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.metanorma.fop mn2pdf - 2.05 + 2.06 Metanorma XML to PDF converter jar https://www.metanorma.org diff --git a/src/main/java/org/apache/fop/complexscripts/util/NumberConverter.java b/src/main/java/org/apache/fop/complexscripts/util/NumberConverter.java new file mode 100644 index 0000000..0a602ff --- /dev/null +++ b/src/main/java/org/apache/fop/complexscripts/util/NumberConverter.java @@ -0,0 +1,1621 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.util; + +import org.metanorma.fop.utils.JapaneseToNumbers; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +// CSOFF: LineLengthCheck + +/** + *

Implementation of Number to String Conversion algorithm specified by + * XSL Transformations (XSLT) Version 2.0, W3C Recommendation, 23 January 2007.

+ * + *

This algorithm differs from that specified in XSLT 1.0 in the following + * ways:

+ *
    + *
  • input numbers are greater than or equal to zero rather than greater than zero;
  • + *
  • introduces format tokens { w, W, Ww };
  • + *
  • introduces ordinal parameter to generate ordinal numbers;
  • + *
+ * + *

Implementation Defaults and Limitations

+ *
    + *
  • If language parameter is unspecified (null or empty string), then the value + * of DEFAULT_LANGUAGE is used, which is defined below as "eng" (English).
  • + *
  • Only English, French, and Spanish word numerals are supported, and only if less than one trillion (1,000,000,000,000).
  • + *
  • Ordinal word numerals are supported for French and Spanish only when less than or equal to ten (10).
  • + *
+ * + *

Implementation Notes

+ *
    + *
  • In order to handle format tokens outside the Unicode BMP, all processing is + * done in Unicode Scalar Values represented with Integer and Integer[] + * types. Without affecting behavior, this may be subsequently optimized to + * use int and int[] types.
  • + *
  • In order to communicate various sub-parameters, including ordinalization, a features + * is employed, which consists of comma separated name and optional value tokens, where name and value + * are separated by an equals '=' sign.
  • + *
  • Ordinal numbers are selected by specifying a word based format token in combination with a 'ordinal' feature with no value, in which case + * the features 'male' and 'female' may be used to specify gender for gender sensitive languages. For example, the feature string "ordinal,female" + * selects female ordinals.
  • + *
+ * + *

This work was originally authored by Glenn Adams (gadams@apache.org).

+ */ +public class NumberConverter { + + /** alphabetical */ + public static final int LETTER_VALUE_ALPHABETIC = 1; + /** traditional */ + public static final int LETTER_VALUE_TRADITIONAL = 2; + + /** no token type */ + private static final int TOKEN_NONE = 0; + /** alhphanumeric token type */ + private static final int TOKEN_ALPHANUMERIC = 1; + /** nonalphanumeric token type */ + private static final int TOKEN_NONALPHANUMERIC = 2; + /** default token */ + private static final Integer[] DEFAULT_TOKEN = new Integer[] { (int) '1' }; + /** default separator */ + private static final Integer[] DEFAULT_SEPARATOR = new Integer[] { (int) '.' }; + /** default language */ + private static final String DEFAULT_LANGUAGE = "eng"; + + /** prefix token */ + private Integer[] prefix; + /** suffix token */ + private Integer[] suffix; + /** sequence of tokens, as parsed from format */ + private Integer[][] tokens; + /** sequence of separators, as parsed from format */ + private Integer[][] separators; + /** grouping separator */ + private int groupingSeparator; + /** grouping size */ + private int groupingSize; + /** letter value */ + private int letterValue; + /** letter value system */ + private String features; + /** language */ + private String language; + /** country */ + private String country; + + /** + * Construct parameterized number converter. + * @param format format for the page number (may be null or empty, which is treated as null) + * @param groupingSeparator grouping separator (if zero, then no grouping separator applies) + * @param groupingSize grouping size (if zero or negative, then no grouping size applies) + * @param letterValue letter value (must be one of the above letter value enumeration values) + * @param features features (feature sub-parameters) + * @param language (may be null or empty, which is treated as null) + * @param country (may be null or empty, which is treated as null) + * @throws IllegalArgumentException if format is not a valid UTF-16 string (e.g., has unpaired surrogate) + */ + public NumberConverter(String format, int groupingSeparator, int groupingSize, int letterValue, String features, String language, String country) + throws IllegalArgumentException { + this.groupingSeparator = groupingSeparator; + this.groupingSize = groupingSize; + this.letterValue = letterValue; + this.features = features; + this.language = (language != null) ? language.toLowerCase() : null; + this.country = (country != null) ? country.toLowerCase() : null; + parseFormatTokens(format); + } + + /** + * Convert a number to string according to conversion parameters. + * @param number number to conver + * @return string representing converted number + */ + public String convert(long number) { + List numbers = new ArrayList(); + numbers.add(number); + return convert(numbers); + } + + /** + * Convert list of numbers to string according to conversion parameters. + * @param numbers list of numbers to convert + * @return string representing converted list of numbers + */ + public String convert(List numbers) { + List scalars = new ArrayList(); + if (prefix != null) { + appendScalars(scalars, prefix); + } + convertNumbers(scalars, numbers); + if (suffix != null) { + appendScalars(scalars, suffix); + } + return scalarsToString(scalars); + } + + private void parseFormatTokens(String format) throws IllegalArgumentException { + List tokens = new ArrayList(); + List separators = new ArrayList(); + if ((format == null) || (format.length() == 0)) { + format = "1"; + } + int tokenType = TOKEN_NONE; + List token = new ArrayList(); + Integer[] ca = UTF32.toUTF32(format, 0, true); + for (Integer c : ca) { + int tokenTypeNew = isAlphaNumeric(c) ? TOKEN_ALPHANUMERIC : TOKEN_NONALPHANUMERIC; + if (tokenTypeNew != tokenType) { + if (token.size() > 0) { + if (tokenType == TOKEN_ALPHANUMERIC) { + tokens.add(token.toArray(new Integer[token.size()])); + } else { + separators.add(token.toArray(new Integer[token.size()])); + } + token.clear(); + } + tokenType = tokenTypeNew; + } + token.add(c); + } + if (token.size() > 0) { + if (tokenType == TOKEN_ALPHANUMERIC) { + tokens.add(token.toArray(new Integer [ token.size() ])); + } else { + separators.add(token.toArray(new Integer [ token.size() ])); + } + } + if (!separators.isEmpty()) { + this.prefix = separators.remove(0); + } + if (!separators.isEmpty()) { + this.suffix = separators.remove(separators.size() - 1); + } + this.separators = separators.toArray(new Integer [ separators.size() ] []); + this.tokens = tokens.toArray(new Integer [ tokens.size() ] []); + } + + private static boolean isAlphaNumeric(int c) { + switch (Character.getType(c)) { + case Character.DECIMAL_DIGIT_NUMBER: // Nd + case Character.LETTER_NUMBER: // Nl + case Character.OTHER_NUMBER: // No + case Character.UPPERCASE_LETTER: // Lu + case Character.LOWERCASE_LETTER: // Ll + case Character.TITLECASE_LETTER: // Lt + case Character.MODIFIER_LETTER: // Lm + case Character.OTHER_LETTER: // Lo + return true; + default: + return false; + } + } + + private void convertNumbers(List scalars, List numbers) { + Integer[] tknLast = DEFAULT_TOKEN; + int tknIndex = 0; + int tknCount = tokens.length; + int sepIndex = 0; + int sepCount = separators.length; + int numIndex = 0; + for (Long number : numbers) { + Integer[] sep = null; + Integer[] tkn; + if (tknIndex < tknCount) { + if (numIndex > 0) { + if (sepIndex < sepCount) { + sep = separators [ sepIndex++ ]; + } else { + sep = DEFAULT_SEPARATOR; + } + } + tkn = tokens [ tknIndex++ ]; + } else { + tkn = tknLast; + } + appendScalars(scalars, convertNumber(number, sep, tkn)); + tknLast = tkn; + numIndex++; + } + } + + private Integer[] convertNumber(long number, Integer[] separator, Integer[] token) { + List sl = new ArrayList(); + if (separator != null) { + appendScalars(sl, separator); + } + if (token != null) { + appendScalars(sl, formatNumber(number, token)); + } + return sl.toArray(new Integer [ sl.size() ]); + } + + private Integer[] formatNumber(long number, Integer[] token) { + Integer[] fn = null; + assert token.length > 0; + if (number < 0) { + throw new IllegalArgumentException("number must be non-negative"); + } else if (token.length == 1) { + int s = token[0]; + switch (s) { + case (int) '1': + fn = formatNumberAsDecimal(number, (int) '1', 1); + break; + case (int) 'W': + case (int) 'w': + fn = formatNumberAsWord(number, (s == (int) 'W') ? Character.UPPERCASE_LETTER : Character.LOWERCASE_LETTER); + break; + case (int) 'A': // handled as numeric sequence + case (int) 'a': // handled as numeric sequence + case (int) 'I': // handled as numeric special + case (int) 'i': // handled as numeric special + case 20108: // handled as numeric special - Japanese Numerals, first is 二 = 20108 decimal; + default: + if (isStartOfDecimalSequence(s)) { + fn = formatNumberAsDecimal(number, s, 1); + } else if (isStartOfAlphabeticSequence(s)) { + fn = formatNumberAsSequence(number, s, getSequenceBase(s), null); + } else if (isStartOfNumericSpecial(s)) { + fn = formatNumberAsSpecial(number, s); + } else { + fn = null; + } + break; + } + } else if ((token.length == 2) && (token[0] == (int) 'W') && (token[1] == (int) 'w')) { + fn = formatNumberAsWord(number, Character.TITLECASE_LETTER); + } else if (isPaddedOne(token)) { + int s = token[token.length - 1]; + fn = formatNumberAsDecimal(number, s, token.length); + } else { + throw new IllegalArgumentException("invalid format token: \"" + UTF32.fromUTF32(token) + "\""); + } + if (fn == null) { + fn = formatNumber(number, DEFAULT_TOKEN); + } + assert fn != null; + return fn; + } + + /** + * Format NUMBER as decimal using characters denoting digits that start at ONE, + * adding one or more (zero) padding characters as needed to fill out field WIDTH. + * @param number to be formatted + * @param one unicode scalar value denoting numeric value 1 + * @param width non-negative integer denoting field width of number, possible including padding + * @return formatted number as array of unicode scalars + */ + private Integer[] formatNumberAsDecimal(long number, int one, int width) { + assert Character.getNumericValue(one) == 1; + assert Character.getNumericValue(one - 1) == 0; + assert Character.getNumericValue(one + 8) == 9; + List sl = new ArrayList(); + int zero = one - 1; + while (number > 0) { + long digit = number % 10; + sl.add(0, zero + (int) digit); + number = number / 10; + } + while (width > sl.size()) { + sl.add(0, zero); + } + if ((groupingSize != 0) && (groupingSeparator != 0)) { + sl = performGrouping(sl, groupingSize, groupingSeparator); + } + return sl.toArray(new Integer [ sl.size() ]); + } + + private static List performGrouping(List sl, int groupingSize, int groupingSeparator) { + assert groupingSize > 0; + assert groupingSeparator != 0; + if (sl.size() > groupingSize) { + List gl = new ArrayList(); + for (int i = 0, n = sl.size(), g = 0; i < n; i++) { + int k = n - i - 1; + if (g == groupingSize) { + gl.add(0, groupingSeparator); + g = 1; + } else { + g++; + } + gl.add(0, sl.get(k)); + } + return gl; + } else { + return sl; + } + } + + + /** + * Format NUMBER as using sequence of characters that start at ONE, and + * having BASE radix. + * @param number to be formatted + * @param one unicode scalar value denoting start of sequence (numeric value 1) + * @param base number of elements in sequence + * @param map if non-null, then maps sequences indices to unicode scalars + * @return formatted number as array of unicode scalars + */ + private Integer[] formatNumberAsSequence(long number, int one, int base, int[] map) { + assert base > 1; + assert (map == null) || (map.length >= base); + List sl = new ArrayList(); + if (number == 0) { + return null; + } else { + long n = number; + while (n > 0) { + int d = (int) ((n - 1) % (long) base); + int s = (map != null) ? map [ d ] : (one + d); + sl.add(0, s); + n = (n - 1) / base; + } + return sl.toArray(new Integer [ sl.size() ]); + } + } + + /** + * Format NUMBER as using special system that starts at ONE. + * @param number to be formatted + * @param one unicode scalar value denoting start of system (numeric value 1) + * @return formatted number as array of unicode scalars + */ + private Integer[] formatNumberAsSpecial(long number, int one) { + SpecialNumberFormatter f = getSpecialFormatter(one, letterValue, features, language, country); + if (f != null) { + return f.format(number, one, letterValue, features, language, country); + } else { + return null; + } + } + + /** + * Format NUMBER as word according to TYPE, which must be either + * Character.UPPERCASE_LETTER, Character.LOWERCASE_LETTER, or + * Character.TITLECASE_LETTER. Makes use of this.language to + * determine language of word. + * @param number to be formatted + * @param caseType unicode character type for case conversion + * @return formatted number as array of unicode scalars + */ + private Integer[] formatNumberAsWord(long number, int caseType) { + SpecialNumberFormatter f = null; + if (isLanguage("eng")) { + f = new EnglishNumberAsWordFormatter(caseType); + } else if (isLanguage("spa")) { + f = new SpanishNumberAsWordFormatter(caseType); + } else if (isLanguage("fra")) { + f = new FrenchNumberAsWordFormatter(caseType); + } else { + f = new EnglishNumberAsWordFormatter(caseType); + } + return f.format(number, 0, letterValue, features, language, country); + } + + private boolean isLanguage(String iso3Code) { + if (language == null) { + return false; + } else if (language.equals(iso3Code)) { + return true; + } else { + return isSameLanguage(iso3Code, language); + } + } + + private static String[][] equivalentLanguages = { + { "eng", "en" }, + { "fra", "fre", "fr" }, + { "spa", "es" }, + }; + + private static boolean isSameLanguage(String i3c, String lc) { + for (String[] el : equivalentLanguages) { + assert el.length >= 2; + if (el[0].equals(i3c)) { + for (String anEl : el) { + if (anEl.equals(lc)) { + return true; + } + } + return false; + } + } + return false; + } + + private static boolean hasFeature(String features, String feature) { + if (features != null) { + assert feature != null; + assert feature.length() != 0; + String[] fa = features.split(","); + for (String f : fa) { + String[] fp = f.split("="); + assert fp.length > 0; + String fn = fp[0]; + String fv = (fp.length > 1) ? fp[1] : ""; + if (fn.equals(feature)) { + return true; + } + } + } + return false; + } + + /* not yet used + private static String getFeatureValue ( String features, String feature ) { + if ( features != null ) { + assert feature != null; + assert feature.length() != 0; + String[] fa = features.split(","); + for ( String f : fa ) { + String[] fp = f.split("="); + assert fp.length > 0; + String fn = fp[0]; + String fv = ( fp.length > 1 ) ? fp[1] : ""; + if ( fn.equals ( feature ) ) { + return fv; + } + } + } + return ""; + } + */ + + private static void appendScalars(List scalars, Integer[] sa) { + Collections.addAll(scalars, sa); + } + + private static String scalarsToString(List scalars) { + Integer[] sa = scalars.toArray(new Integer [ scalars.size() ]); + return UTF32.fromUTF32(sa); + } + + private static boolean isPaddedOne(Integer[] token) { + if (getDecimalValue(token [ token.length - 1 ]) != 1) { + return false; + } else { + for (int i = 0, n = token.length - 1; i < n; i++) { + if (getDecimalValue(token [ i ]) != 0) { + return false; + } + } + return true; + } + } + + private static int getDecimalValue(Integer scalar) { + int s = scalar; + if (Character.getType(s) == Character.DECIMAL_DIGIT_NUMBER) { + return Character.getNumericValue(s); + } else { + return -1; + } + } + + private static boolean isStartOfDecimalSequence(int s) { + return (Character.getNumericValue(s) == 1) + && (Character.getNumericValue(s - 1) == 0) + && (Character.getNumericValue(s + 8) == 9); + } + + private static int[][] supportedAlphabeticSequences = { + { 'A', 26 }, // A...Z + { 'a', 26 }, // a...z + }; + + private static boolean isStartOfAlphabeticSequence(int s) { + for (int[] ss : supportedAlphabeticSequences) { + assert ss.length >= 2; + if (ss[0] == s) { + return true; + } + } + return false; + } + + private static int getSequenceBase(int s) { + for (int[] ss : supportedAlphabeticSequences) { + assert ss.length >= 2; + if (ss[0] == s) { + return ss[1]; + } + } + return 0; + } + + private static int[][] supportedSpecials = { + { 'I' }, // latin - uppercase roman numerals + { 'i' }, // latin - lowercase roman numerals + { '\u0391' }, // greek - uppercase isopsephry numerals + { '\u03B1' }, // greek - lowercase isopsephry numerals + { '\u05D0' }, // hebrew - gematria numerals + { '\u0623' }, // arabic - abjadi numberals + { '\u0627' }, // arabic - either abjadi or hijai alphabetic sequence + { '\u0E01' }, // thai - default alphabetic sequence + { '\u3042' }, // kana - hiragana (gojuon) - default alphabetic sequence + { '\u3044' }, // kana - hiragana (iroha) + { '\u30A2' }, // kana - katakana (gojuon) - default alphabetic sequence + { '\u30A4' }, // kana - katakana (iroha) + { '\u4E8C' }, // Japanese numbers + }; + + private static boolean isStartOfNumericSpecial(int s) { + for (int[] ss : supportedSpecials) { + assert ss.length >= 1; + if (ss[0] == s) { + return true; + } + } + return false; + } + + private SpecialNumberFormatter getSpecialFormatter(int one, int letterValue, String features, String language, String country) { + if (one == (int) 'I') { + return new RomanNumeralsFormatter(); + } else if (one == (int) 'i') { + return new RomanNumeralsFormatter(); + } else if (one == (int) '\u0391') { + return new IsopsephryNumeralsFormatter(); + } else if (one == (int) '\u03B1') { + return new IsopsephryNumeralsFormatter(); + } else if (one == (int) '\u05D0') { + return new GematriaNumeralsFormatter(); + } else if (one == (int) '\u0623') { + return new ArabicNumeralsFormatter(); + } else if (one == (int) '\u0627') { + return new ArabicNumeralsFormatter(); + } else if (one == (int) '\u0E01') { + return new ThaiNumeralsFormatter(); + } else if (one == (int) '\u3042') { + return new KanaNumeralsFormatter(); + } else if (one == (int) '\u3044') { + return new KanaNumeralsFormatter(); + } else if (one == (int) '\u30A2') { + return new KanaNumeralsFormatter(); + } else if (one == (int) '\u30A4') { + return new KanaNumeralsFormatter(); + } else if (one == (int) '\u4E8C') { + return new JapaneseNumeralsFormatter(); + } else { + return null; + } + } + + private static Integer[] toUpperCase(Integer[] sa) { + assert sa != null; + for (int i = 0, n = sa.length; i < n; i++) { + Integer s = sa [ i ]; + sa [ i ] = Character.toUpperCase(s); + } + return sa; + } + + private static Integer[] toLowerCase(Integer[] sa) { + assert sa != null; + for (int i = 0, n = sa.length; i < n; i++) { + Integer s = sa [ i ]; + sa [ i ] = Character.toLowerCase(s); + } + return sa; + } + + /* not yet used + private static Integer[] toTitleCase ( Integer[] sa ) { + assert sa != null; + if ( sa.length > 0 ) { + sa [ 0 ] = Character.toTitleCase ( sa [ 0 ] ); + } + return sa; + } + */ + + private static List convertWordCase(List words, int caseType) { + List wl = new ArrayList(); + for (String w : words) { + wl.add(convertWordCase(w, caseType)); + } + return wl; + } + + private static String convertWordCase(String word, int caseType) { + if (caseType == Character.UPPERCASE_LETTER) { + return word.toUpperCase(); + } else if (caseType == Character.LOWERCASE_LETTER) { + return word.toLowerCase(); + } else if (caseType == Character.TITLECASE_LETTER) { + StringBuffer sb = new StringBuffer(); + for (int i = 0, n = word.length(); i < n; i++) { + String s = word.substring(i, i + 1); + if (i == 0) { + sb.append(s.toUpperCase()); + } else { + sb.append(s.toLowerCase()); + } + } + return sb.toString(); + } else { + return word; + } + } + + private static String joinWords(List words, String separator) { + StringBuffer sb = new StringBuffer(); + for (String w : words) { + if (sb.length() > 0) { + sb.append(separator); + } + sb.append(w); + } + return sb.toString(); + } + + /** + * Special number formatter. + */ + interface SpecialNumberFormatter { + /** + * Format number with special numeral system. + * @param number to be formatted + * @param one unicode scalar value denoting numeric value 1 + * @param letterValue letter value (must be one of the above letter value enumeration values) + * @param features features (feature sub-parameters) + * @param language denotes applicable language + * @param country denotes applicable country + * @return formatted number as array of unicode scalars + */ + Integer[] format(long number, int one, int letterValue, String features, String language, String country); + } + + /** + * English Word Numerals + */ + private static String[] englishWordOnes = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" }; + private static String[] englishWordTeens = { "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" }; + private static String[] englishWordTens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" }; + private static String[] englishWordOthers = { "hundred", "thousand", "million", "billion" }; + private static String[] englishWordOnesOrd = { "none", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth" }; + private static String[] englishWordTeensOrd = { "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth" }; + private static String[] englishWordTensOrd = { "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetith" }; + private static String[] englishWordOthersOrd = { "hundredth", "thousandth", "millionth", "billionth" }; + private static class EnglishNumberAsWordFormatter implements SpecialNumberFormatter { + private int caseType = Character.UPPERCASE_LETTER; + EnglishNumberAsWordFormatter(int caseType) { + this.caseType = caseType; + } + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + List wl = new ArrayList(); + if (number >= 1000000000000L) { + return null; + } else { + boolean ordinal = hasFeature(features, "ordinal"); + if (number == 0) { + wl.add(englishWordOnes [ 0 ]); + } else if (ordinal && (number < 10)) { + wl.add(englishWordOnesOrd [ (int) number ]); + } else { + int ones = (int) (number % 1000); + int thousands = (int) ((number / 1000) % 1000); + int millions = (int) ((number / 1000000) % 1000); + int billions = (int) ((number / 1000000000) % 1000); + if (billions > 0) { + wl = formatOnesInThousand(wl, billions); + if (ordinal && ((number % 1000000000) == 0)) { + wl.add(englishWordOthersOrd[3]); + } else { + wl.add(englishWordOthers[3]); + } + } + if (millions > 0) { + wl = formatOnesInThousand(wl, millions); + if (ordinal && ((number % 1000000) == 0)) { + wl.add(englishWordOthersOrd[2]); + } else { + wl.add(englishWordOthers[2]); + } + } + if (thousands > 0) { + wl = formatOnesInThousand(wl, thousands); + if (ordinal && ((number % 1000) == 0)) { + wl.add(englishWordOthersOrd[1]); + } else { + wl.add(englishWordOthers[1]); + } + } + if (ones > 0) { + wl = formatOnesInThousand(wl, ones, ordinal); + } + } + wl = convertWordCase(wl, caseType); + return UTF32.toUTF32(joinWords(wl, " "), 0, true); + } + } + private List formatOnesInThousand(List wl, int number) { + return formatOnesInThousand(wl, number, false); + } + private List formatOnesInThousand(List wl, int number, boolean ordinal) { + assert number < 1000; + int ones = number % 10; + int tens = (number / 10) % 10; + int hundreds = (number / 100) % 10; + if (hundreds > 0) { + wl.add(englishWordOnes [ hundreds ]); + if (ordinal && ((number % 100) == 0)) { + wl.add(englishWordOthersOrd[0]); + } else { + wl.add(englishWordOthers[0]); + } + } + if (tens > 0) { + if (tens == 1) { + if (ordinal) { + wl.add(englishWordTeensOrd [ ones ]); + } else { + wl.add(englishWordTeens [ ones ]); + } + } else { + if (ordinal && (ones == 0)) { + wl.add(englishWordTensOrd [ tens ]); + } else { + wl.add(englishWordTens [ tens ]); + } + if (ones > 0) { + if (ordinal) { + wl.add(englishWordOnesOrd [ ones ]); + } else { + wl.add(englishWordOnes [ ones ]); + } + } + } + } else if (ones > 0) { + if (ordinal) { + wl.add(englishWordOnesOrd [ ones ]); + } else { + wl.add(englishWordOnes [ ones ]); + } + } + return wl; + } + } + + /** + * French Word Numerals + */ + private static String[] frenchWordOnes = { "z\u00e9ro", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf" }; + private static String[] frenchWordTeens = { "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" }; + private static String[] frenchWordTens = { "", "dix", "vingt", "trente", "quarante", "cinquante", "soixante", "soixante-dix", "quatre-vingt", "quatre-vingt-dix" }; + private static String[] frenchWordOthers = { "cent", "cents", "mille", "million", "millions", "milliard", "milliards" }; + private static String[] frenchWordOnesOrdMale = { "premier", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" }; + private static String[] frenchWordOnesOrdFemale = { "premi\u00e8re", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" }; + private static class FrenchNumberAsWordFormatter implements SpecialNumberFormatter { + private int caseType = Character.UPPERCASE_LETTER; + FrenchNumberAsWordFormatter(int caseType) { + this.caseType = caseType; + } + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + List wl = new ArrayList(); + if (number >= 1000000000000L) { + return null; + } else { + boolean ordinal = hasFeature(features, "ordinal"); + if (number == 0) { + wl.add(frenchWordOnes [ 0 ]); + } else if (ordinal && (number <= 10)) { + boolean female = hasFeature(features, "female"); + if (female) { + wl.add(frenchWordOnesOrdFemale [ (int) number ]); + } else { + wl.add(frenchWordOnesOrdMale [ (int) number ]); + } + } else { + int ones = (int) (number % 1000); + int thousands = (int) ((number / 1000) % 1000); + int millions = (int) ((number / 1000000) % 1000); + int billions = (int) ((number / 1000000000) % 1000); + if (billions > 0) { + wl = formatOnesInThousand(wl, billions); + if (billions == 1) { + wl.add(frenchWordOthers[5]); + } else { + wl.add(frenchWordOthers[6]); + } + } + if (millions > 0) { + wl = formatOnesInThousand(wl, millions); + if (millions == 1) { + wl.add(frenchWordOthers[3]); + } else { + wl.add(frenchWordOthers[4]); + } + } + if (thousands > 0) { + if (thousands > 1) { + wl = formatOnesInThousand(wl, thousands); + } + wl.add(frenchWordOthers[2]); + } + if (ones > 0) { + wl = formatOnesInThousand(wl, ones); + } + } + wl = convertWordCase(wl, caseType); + return UTF32.toUTF32(joinWords(wl, " "), 0, true); + } + } + private List formatOnesInThousand(List wl, int number) { + assert number < 1000; + int ones = number % 10; + int tens = (number / 10) % 10; + int hundreds = (number / 100) % 10; + if (hundreds > 0) { + if (hundreds > 1) { + wl.add(frenchWordOnes [ hundreds ]); + } + if ((hundreds > 1) && (tens == 0) && (ones == 0)) { + wl.add(frenchWordOthers[1]); + } else { + wl.add(frenchWordOthers[0]); + } + } + if (tens > 0) { + if (tens == 1) { + wl.add(frenchWordTeens [ ones ]); + } else if (tens < 7) { + if (ones == 1) { + wl.add(frenchWordTens [ tens ]); + wl.add("et"); + wl.add(frenchWordOnes [ ones ]); + } else { + StringBuffer sb = new StringBuffer(); + sb.append(frenchWordTens [ tens ]); + if (ones > 0) { + sb.append('-'); + sb.append(frenchWordOnes [ ones ]); + } + wl.add(sb.toString()); + } + } else if (tens == 7) { + if (ones == 1) { + wl.add(frenchWordTens [ 6 ]); + wl.add("et"); + wl.add(frenchWordTeens [ ones ]); + } else { + StringBuffer sb = new StringBuffer(); + sb.append(frenchWordTens [ 6 ]); + sb.append('-'); + sb.append(frenchWordTeens [ ones ]); + wl.add(sb.toString()); + } + } else if (tens == 8) { + StringBuffer sb = new StringBuffer(); + sb.append(frenchWordTens [ tens ]); + if (ones > 0) { + sb.append('-'); + sb.append(frenchWordOnes [ ones ]); + } else { + sb.append('s'); + } + wl.add(sb.toString()); + } else if (tens == 9) { + StringBuffer sb = new StringBuffer(); + sb.append(frenchWordTens [ 8 ]); + sb.append('-'); + sb.append(frenchWordTeens [ ones ]); + wl.add(sb.toString()); + } + } else if (ones > 0) { + wl.add(frenchWordOnes [ ones ]); + } + return wl; + } + } + + /** + * Spanish Word Numerals + */ + private static String[] spanishWordOnes = { "cero", "uno", "dos", "tres", "cuatro", "cinco", "seise", "siete", "ocho", "nueve" }; + private static String[] spanishWordTeens = { "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve" }; + private static String[] spanishWordTweens = { "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve" }; + private static String[] spanishWordTens = { "", "diez", "veinte", "treinta", "cuarenta", "cincuenta", "sesenta", "setenta", "ochenta", "noventa" }; + private static String[] spanishWordHundreds = { "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos" }; + private static String[] spanishWordOthers = { "un", "cien", "mil", "mill\u00f3n", "millones" }; + private static String[] spanishWordOnesOrdMale = { "ninguno", "primero", "segundo", "tercero", "cuarto", "quinto", "sexto", "s\u00e9ptimo", "octavo", "novento", "d\u00e9cimo" }; + private static String[] spanishWordOnesOrdFemale = { "ninguna", "primera", "segunda", "tercera", "cuarta", "quinta", "sexta", "s\u00e9ptima", "octava", "noventa", "d\u00e9cima" }; + private static class SpanishNumberAsWordFormatter implements SpecialNumberFormatter { + private int caseType = Character.UPPERCASE_LETTER; + SpanishNumberAsWordFormatter(int caseType) { + this.caseType = caseType; + } + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + List wl = new ArrayList(); + if (number >= 1000000000000L) { + return null; + } else { + boolean ordinal = hasFeature(features, "ordinal"); + if (number == 0) { + wl.add(spanishWordOnes [ 0 ]); + } else if (ordinal && (number <= 10)) { + boolean female = hasFeature(features, "female"); + if (female) { + wl.add(spanishWordOnesOrdFemale [ (int) number ]); + } else { + wl.add(spanishWordOnesOrdMale [ (int) number ]); + } + } else { + int ones = (int) (number % 1000); + int thousands = (int) ((number / 1000) % 1000); + int millions = (int) ((number / 1000000) % 1000); + int billions = (int) ((number / 1000000000) % 1000); + if (billions > 0) { + if (billions > 1) { + wl = formatOnesInThousand(wl, billions); + } + wl.add(spanishWordOthers[2]); + wl.add(spanishWordOthers[4]); + } + if (millions > 0) { + if (millions == 1) { + wl.add(spanishWordOthers[0]); + } else { + wl = formatOnesInThousand(wl, millions); + } + if (millions > 1) { + wl.add(spanishWordOthers[4]); + } else { + wl.add(spanishWordOthers[3]); + } + } + if (thousands > 0) { + if (thousands > 1) { + wl = formatOnesInThousand(wl, thousands); + } + wl.add(spanishWordOthers[2]); + } + if (ones > 0) { + wl = formatOnesInThousand(wl, ones); + } + } + wl = convertWordCase(wl, caseType); + return UTF32.toUTF32(joinWords(wl, " "), 0, true); + } + } + private List formatOnesInThousand(List wl, int number) { + assert number < 1000; + int ones = number % 10; + int tens = (number / 10) % 10; + int hundreds = (number / 100) % 10; + if (hundreds > 0) { + if ((hundreds == 1) && (tens == 0) && (ones == 0)) { + wl.add(spanishWordOthers[1]); + } else { + wl.add(spanishWordHundreds [ hundreds ]); + } + } + if (tens > 0) { + if (tens == 1) { + wl.add(spanishWordTeens [ ones ]); + } else if (tens == 2) { + wl.add(spanishWordTweens [ ones ]); + } else { + wl.add(spanishWordTens [ tens ]); + if (ones > 0) { + wl.add("y"); + wl.add(spanishWordOnes [ ones ]); + } + } + } else if (ones > 0) { + wl.add(spanishWordOnes [ ones ]); + } + return wl; + } + } + + /** + * Roman (Latin) Numerals + */ + private static int[] romanMapping = { + 100000, + 90000, + 50000, + 40000, + 10000, + 9000, + 5000, + 4000, + 1000, + 900, + 500, + 400, + 100, + 90, + 50, + 40, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1 + }; + private static String[] romanStandardForms = { + null, + null, + null, + null, + null, + null, + null, + null, + "m", + "cm", + "d", + "cd", + "c", + "xc", + "l", + "xl", + "x", + "ix", + null, + null, + null, + "v", + "iv", + null, + null, + "i" + }; + private static String[] romanLargeForms = { + "\u2188", + "\u2182\u2188", + "\u2187", + "\u2182\u2187", + "\u2182", + "\u2180\u2182", + "\u2181", + "\u2180\u2181", + "m", + "cm", + "d", + "cd", + "c", + "xc", + "l", + "xl", + "x", + "ix", + null, + null, + null, + "v", + "iv", + null, + null, + "i" + }; + private static String[] romanNumberForms = { + "\u2188", + "\u2182\u2188", + "\u2187", + "\u2182\u2187", + "\u2182", + "\u2180\u2182", + "\u2181", + "\u2180\u2181", + "\u216F", + "\u216D\u216F", + "\u216E", + "\u216D\u216E", + "\u216D", + "\u2169\u216D", + "\u216C", + "\u2169\u216C", + "\u2169", + "\u2168", + "\u2167", + "\u2166", + "\u2165", + "\u2164", + "\u2163", + "\u2162", + "\u2161", + "\u2160" + }; + private static class RomanNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + List sl = new ArrayList(); + if (number == 0) { + return null; + } else { + String[] forms; + int maxNumber; + if (hasFeature(features, "unicode-number-forms")) { + forms = romanNumberForms; + maxNumber = 199999; + } else if (hasFeature(features, "large")) { + forms = romanLargeForms; + maxNumber = 199999; + } else { + forms = romanStandardForms; + maxNumber = 4999; + } + if (number > maxNumber) { + return null; + } else { + while (number > 0) { + for (int i = 0, n = romanMapping.length; i < n; i++) { + int d = romanMapping [ i ]; + if ((number >= d) && (forms [ i ] != null)) { + appendScalars(sl, UTF32.toUTF32(forms [ i ], 0, true)); + number = number - d; + break; + } + } + } + if (one == (int) 'I') { + return toUpperCase(sl.toArray(new Integer [ sl.size() ])); + } else if (one == (int) 'i') { + return toLowerCase(sl.toArray(new Integer [ sl.size() ])); + } else { + return null; + } + } + } + } + } + + /** + * Isopsephry (Greek) Numerals + */ + private static class IsopsephryNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + return null; + } + } + + /** + * Gematria (Hebrew) Numerals + */ + private static int[] hebrewGematriaAlphabeticMap = { + // ones + 0x05D0, // ALEF + 0x05D1, // BET + 0x05D2, // GIMEL + 0x05D3, // DALET + 0x05D4, // HE + 0x05D5, // VAV + 0x05D6, // ZAYIN + 0x05D7, // HET + 0x05D8, // TET + // tens + 0x05D9, // YOD + 0x05DB, // KAF + 0x05DC, // LAMED + 0x05DE, // MEM + 0x05E0, // NUN + 0x05E1, // SAMEKH + 0x05E2, // AYIN + 0x05E4, // PE + 0x05E6, // TSADHI + // hundreds + 0x05E7, // QOF + 0x05E8, // RESH + 0x05E9, // SHIN + 0x05EA, // TAV + 0x05DA, // FINAL KAF + 0x05DD, // FINAL MEM + 0x05DF, // FINAL NUN + 0x05E3, // FINAL PE + 0x05E5, // FINAL TSADHI + }; + private class GematriaNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + if (one == 0x05D0) { + if (letterValue == LETTER_VALUE_ALPHABETIC) { + return formatNumberAsSequence(number, one, hebrewGematriaAlphabeticMap.length, hebrewGematriaAlphabeticMap); + } else if (letterValue == LETTER_VALUE_TRADITIONAL) { + if ((number == 0) || (number > 1999)) { + return null; + } else { + return formatAsGematriaNumber(number, features, language, country); + } + } else { + return null; + } + } else { + return null; + } + } + private Integer[] formatAsGematriaNumber(long number, String features, String language, String country) { + List sl = new ArrayList(); + assert hebrewGematriaAlphabeticMap.length == 27; + assert hebrewGematriaAlphabeticMap[0] == 0x05D0; // ALEF + assert hebrewGematriaAlphabeticMap[21] == 0x05EA; // TAV + assert number != 0; + assert number < 2000; + int[] map = hebrewGematriaAlphabeticMap; + int thousands = (int) ((number / 1000) % 10); + int hundreds = (int) ((number / 100) % 10); + int tens = (int) ((number / 10) % 10); + int ones = (int) ((number / 1) % 10); + if (thousands > 0) { + sl.add(map [ 0 + (thousands - 1) ]); + sl.add(0x05F3); + } + if (hundreds > 0) { + if (hundreds < 5) { + sl.add(map [ 18 + (hundreds - 1) ]); + } else if (hundreds < 9) { + sl.add(map [ 18 + (4 - 1) ]); + sl.add(0x05F4); + sl.add(map [ 18 + (hundreds - 5) ]); + } else if (hundreds == 9) { + sl.add(map [ 18 + (4 - 1) ]); + sl.add(map [ 18 + (4 - 1) ]); + sl.add(0x05F4); + sl.add(map [ 18 + (hundreds - 9) ]); + } + assert hundreds < 10; + } + if (number == 15) { + sl.add(map [ 9 - 1]); + sl.add(0x05F4); + sl.add(map [ 6 - 1]); + } else if (number == 16) { + sl.add(map [ 9 - 1 ]); + sl.add(0x05F4); + sl.add(map [ 7 - 1 ]); + } else { + if (tens > 0) { + assert tens < 10; + sl.add(map [ 9 + (tens - 1) ]); + } + if (ones > 0) { + assert ones < 10; + sl.add(map [ 0 + (ones - 1) ]); + } + } + return sl.toArray(new Integer [ sl.size() ]); + } + } + + /** + * Arabic Numerals + */ + private static int[] arabicAbjadiAlphabeticMap = { + // ones + 0x0623, // ALEF WITH HAMZA ABOVE + 0x0628, // BEH + 0x062C, // JEEM + 0x062F, // DAL + 0x0647, // HEH + 0x0648, // WAW + 0x0632, // ZAIN + 0x062D, // HAH + 0x0637, // TAH + // tens + 0x0649, // ALEF MAQSURA + 0x0643, // KAF + 0x0644, // LAM + 0x0645, // MEEM + 0x0646, // NOON + 0x0633, // SEEN + 0x0639, // AIN + 0x0641, // FEH + 0x0635, // SAD + // hundreds + 0x0642, // QAF + 0x0631, // REH + 0x0634, // SHEEN + 0x062A, // TEH + 0x062B, // THEH + 0x062E, // KHAH + 0x0630, // THAL + 0x0636, // DAD + 0x0638, // ZAH + // thousands + 0x063A, // GHAIN + }; + private static int[] arabicHijaiAlphabeticMap = { + 0x0623, // ALEF WITH HAMZA ABOVE + 0x0628, // BEH + 0x062A, // TEH + 0x062B, // THEH + 0x062C, // JEEM + 0x062D, // HAH + 0x062E, // KHAH + 0x062F, // DAL + 0x0630, // THAL + 0x0631, // REH + 0x0632, // ZAIN + 0x0633, // SEEN + 0x0634, // SHEEN + 0x0635, // SAD + 0x0636, // DAD + 0x0637, // TAH + 0x0638, // ZAH + 0x0639, // AIN + 0x063A, // GHAIN + 0x0641, // FEH + 0x0642, // QAF + 0x0643, // KAF + 0x0644, // LAM + 0x0645, // MEEM + 0x0646, // NOON + 0x0647, // HEH + 0x0648, // WAW + 0x0649, // ALEF MAQSURA + }; + private class ArabicNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + if (one == 0x0627) { + int[] map; + if (letterValue == LETTER_VALUE_TRADITIONAL) { + map = arabicAbjadiAlphabeticMap; + } else if (letterValue == LETTER_VALUE_ALPHABETIC) { + map = arabicHijaiAlphabeticMap; + } else { + map = arabicAbjadiAlphabeticMap; + } + return formatNumberAsSequence(number, one, map.length, map); + } else if (one == 0x0623) { + if ((number == 0) || (number > 1999)) { + return null; + } else { + return formatAsAbjadiNumber(number, features, language, country); + } + } else { + return null; + } + } + private Integer[] formatAsAbjadiNumber(long number, String features, String language, String country) { + List sl = new ArrayList(); + assert arabicAbjadiAlphabeticMap.length == 28; + assert arabicAbjadiAlphabeticMap[0] == 0x0623; // ALEF WITH HAMZA ABOVE + assert arabicAbjadiAlphabeticMap[27] == 0x063A; // GHAIN + assert number != 0; + assert number < 2000; + int[] map = arabicAbjadiAlphabeticMap; + int thousands = (int) ((number / 1000) % 10); + int hundreds = (int) ((number / 100) % 10); + int tens = (int) ((number / 10) % 10); + int ones = (int) ((number / 1) % 10); + if (thousands > 0) { + assert thousands < 2; + sl.add(map [ 27 + (thousands - 1) ]); + } + if (hundreds > 0) { + assert thousands < 10; + sl.add(map [ 18 + (hundreds - 1) ]); + } + if (tens > 0) { + assert tens < 10; + sl.add(map [ 9 + (tens - 1) ]); + } + if (ones > 0) { + assert ones < 10; + sl.add(map [ 0 + (ones - 1) ]); + } + return sl.toArray(new Integer [ sl.size() ]); + } + } + + /** + * Kana (Japanese) Numerals + */ + private static int[] hiraganaGojuonAlphabeticMap = { + 0x3042, // A + 0x3044, // I + 0x3046, // U + 0x3048, // E + 0x304A, // O + 0x304B, // KA + 0x304D, // KI + 0x304F, // KU + 0x3051, // KE + 0x3053, // KO + 0x3055, // SA + 0x3057, // SI + 0x3059, // SU + 0x305B, // SE + 0x305D, // SO + 0x305F, // TA + 0x3061, // TI + 0x3064, // TU + 0x3066, // TE + 0x3068, // TO + 0x306A, // NA + 0x306B, // NI + 0x306C, // NU + 0x306D, // NE + 0x306E, // NO + 0x306F, // HA + 0x3072, // HI + 0x3075, // HU + 0x3078, // HE + 0x307B, // HO + 0x307E, // MA + 0x307F, // MI + 0x3080, // MU + 0x3081, // ME + 0x3082, // MO + 0x3084, // YA + 0x3086, // YU + 0x3088, // YO + 0x3089, // RA + 0x308A, // RI + 0x308B, // RU + 0x308C, // RE + 0x308D, // RO + 0x308F, // WA + 0x3090, // WI + 0x3091, // WE + 0x3092, // WO + 0x3093, // N + }; + private static int[] katakanaGojuonAlphabeticMap = { + 0x30A2, // A + 0x30A4, // I + 0x30A6, // U + 0x30A8, // E + 0x30AA, // O + 0x30AB, // KA + 0x30AD, // KI + 0x30AF, // KU + 0x30B1, // KE + 0x30B3, // KO + 0x30B5, // SA + 0x30B7, // SI + 0x30B9, // SU + 0x30BB, // SE + 0x30BD, // SO + 0x30BF, // TA + 0x30C1, // TI + 0x30C4, // TU + 0x30C6, // TE + 0x30C8, // TO + 0x30CA, // NA + 0x30CB, // NI + 0x30CC, // NU + 0x30CD, // NE + 0x30CE, // NO + 0x30CF, // HA + 0x30D2, // HI + 0x30D5, // HU + 0x30D8, // HE + 0x30DB, // HO + 0x30DE, // MA + 0x30DF, // MI + 0x30E0, // MU + 0x30E1, // ME + 0x30E2, // MO + 0x30E4, // YA + 0x30E6, // YU + 0x30E8, // YO + 0x30E9, // RA + 0x30EA, // RI + 0x30EB, // RU + 0x30EC, // RE + 0x30ED, // RO + 0x30EF, // WA + 0x30F0, // WI + 0x30F1, // WE + 0x30F2, // WO + 0x30F3, // N + }; + private class KanaNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + if ((one == 0x3042) && (letterValue == LETTER_VALUE_ALPHABETIC)) { + return formatNumberAsSequence(number, one, hiraganaGojuonAlphabeticMap.length, hiraganaGojuonAlphabeticMap); + } else if ((one == 0x30A2) && (letterValue == LETTER_VALUE_ALPHABETIC)) { + return formatNumberAsSequence(number, one, katakanaGojuonAlphabeticMap.length, katakanaGojuonAlphabeticMap); + } else { + return null; + } + } + } + + private class JapaneseNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + if (one == 0x4E8C) { + List items = new ArrayList<>(); + + String numStr = JapaneseToNumbers.numToWord((int)number); // add + 10 for two characters testing + for (char ch: numStr.toCharArray()) { + items.add((int)ch); + } + + return items.toArray(new Integer [ items.size() ]); + } else { + return null; + } + } + } + + + /** + * Thai Numerals + */ + private static int[] thaiAlphabeticMap = { + 0x0E01, + 0x0E02, + 0x0E03, + 0x0E04, + 0x0E05, + 0x0E06, + 0x0E07, + 0x0E08, + 0x0E09, + 0x0E0A, + 0x0E0B, + 0x0E0C, + 0x0E0D, + 0x0E0E, + 0x0E0F, + 0x0E10, + 0x0E11, + 0x0E12, + 0x0E13, + 0x0E14, + 0x0E15, + 0x0E16, + 0x0E17, + 0x0E18, + 0x0E19, + 0x0E1A, + 0x0E1B, + 0x0E1C, + 0x0E1D, + 0x0E1E, + 0x0E1F, + 0x0E20, + 0x0E21, + 0x0E22, + 0x0E23, + // 0x0E24, // RU - not used in modern sequence + 0x0E25, + // 0x0E26, // LU - not used in modern sequence + 0x0E27, + 0x0E28, + 0x0E29, + 0x0E2A, + 0x0E2B, + 0x0E2C, + 0x0E2D, + 0x0E2E, + }; + private class ThaiNumeralsFormatter implements SpecialNumberFormatter { + public Integer[] format(long number, int one, int letterValue, String features, String language, String country) { + if ((one == 0x0E01) && (letterValue == LETTER_VALUE_ALPHABETIC)) { + return formatNumberAsSequence(number, one, thaiAlphabeticMap.length, thaiAlphabeticMap); + } else { + return null; + } + } + } + +} diff --git a/src/main/java/org/metanorma/fop/utils/JapaneseToNumbers.java b/src/main/java/org/metanorma/fop/utils/JapaneseToNumbers.java new file mode 100644 index 0000000..0059e4b --- /dev/null +++ b/src/main/java/org/metanorma/fop/utils/JapaneseToNumbers.java @@ -0,0 +1,156 @@ +package org.metanorma.fop.utils; + +// From https://github.com/joumorisu/SuuKotoba/blob/master/src/SuuKotoba.java + +import java.math.BigInteger; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Class containing static utility functions that allow for the conversion of (Arabic) numerals to Japanese kanji form + * Note that {@link BigInteger} is used to model the number - as the function handles numbers (0, 9.999 * 10^15]. + * + * @author Joseph Morris + * @version 1.0 + */ + +public class JapaneseToNumbers { + + final private static String[] NUMERALS_KANJI = new String[] {"","一","二","三","四","五","六","七","八","九","十"}; + + private static Map placeValues; + static { + // LinkedHashMap used to preserve order + placeValues = new LinkedHashMap(); + placeValues.put(new BigInteger("1000000000000"), "兆"); + placeValues.put(new BigInteger("100000000"), "億"); + placeValues.put(new BigInteger("10000"), "万"); + placeValues.put(new BigInteger("1000"), "千"); + placeValues.put(new BigInteger("100"), "百"); + placeValues.put(new BigInteger("10"), "十"); + }; + + /** + * Constructor. + * + * @param num String form of the numeral to be converted + */ + public static String numToWord(String num) { + return numToWord(num, false); + } + + /** + * Overloaded Constructor. + * + * @param num Integer form of the numeral to be converted + */ + public static String numToWord(Integer num) { + String numStr = num.toString(); + return numToWord(numStr, false); + } + + /** + * Overloaded Constructor. + * + * @param num BigInteger form of the numeral to be converted + */ + public static String numToWord(BigInteger num) { + String numStr = num.toString(); + return numToWord(numStr, false); + } + + /** + * Performs the conversion of a numeral (from String) to Japanese kanji form + * Keeping track of recursive calls is necessary to allow the function to properly + * determine the unit value (1-9999) of the place-value before moving to the next place-value. + * + * I.E. 486900000000 --> 4869 * 10^8 (oku) --> We can just find the written form of 4869 then add oku after it + * 4869 --> our recursive call --> 四千八百六十九 + * 10^8 --> 億 + * 四千八百六十九 * 億 === 四千八百六十九億 + * + * @param num the BigInteger number to be converted + * @param isRecursive whether or not this call is recursive + * @return numStr the final written representation in Japanese kanji + */ + public static String numToWord(String num, Boolean isRecursive) { + String numStr = ""; + + // Counter will be used to keep track of the remainder as each larger unit is subtracted + // E.G., 2486954371891 --> minus 2 chou (10^12) --> 486954371891 --> . . . + + BigInteger counter = new BigInteger(num); + + // Walk through the place values from largest to smallest + for (Map.Entry entry : placeValues.entrySet()) { + String pvKanji = entry.getValue(); + Object[] results = getUnitStr(entry.getKey(), pvKanji, counter); + + // There are some irregularities with the 10's, 100's, and 1000's place in terms of written representation + // Namely, "一十" and "一百" are invalid. In addition, "sen" is used with numbers 1000-1999, + // and issen with all higher numbers containing 1 in the 1000's place of the unit for that place-value. + // E.G., 10000000 --> 1000 * 10000 (man) --> 一千万 (issenman) + + String strVal = (String) results[0]; + + if (pvKanji.equals("十") || pvKanji.equals("百")) { + if (strVal.startsWith("一")) { + strVal = pvKanji; + } + } + + if (pvKanji.equals("千") && !isRecursive) { + if (strVal.startsWith("一千")) { + + // If numStr is empty at this point, it means that num >= 9999 + // and therefore 一 shouldn't be placed before 千 + if (numStr.isEmpty()) { + strVal = strVal.substring(1); + } + + } + } + + numStr += strVal; + counter = (BigInteger) results[1]; + } + + // Tack on the one's place value + numStr += NUMERALS_KANJI[counter.intValue()]; + + return numStr; + } + + /** + * This recursive function is the core of the library. It determines the number of units within a place-value. + * It uses recursive calls to break numbers down into chunks of 4 digits (the place-value system of Japanese + * changes in groups of 4 (units), not 3 like the US) in order to determine the written form for that place-value. + * The counter must be updated (subtracting the current place-value) to allow for smaller place values + * to be determined. + * + * @param placeValue The place-value that we are determining + * @param pvKanji The kanji representation of this place-value + * @param counter The current number that we are converting + * @return results Array with two elements: [0] = String result of conversion, [1] = updated counter + */ + private static Object[] getUnitStr(BigInteger placeValue, String pvKanji, BigInteger counter) { + String unitStr; + BigInteger numUnit = counter.divide(placeValue); + Object[] results; + + if (numUnit.intValue() > 9) { + // If numUnit > 10 then that means we can use recursion to get the written form for this unit + unitStr = numToWord(numUnit.toString(), true) + pvKanji; + } else { + // Otherwise, we can simply use the 1-9 kanji representations before the place-value + unitStr = NUMERALS_KANJI[numUnit.intValue()] + pvKanji; + } + + counter = counter.subtract(numUnit.multiply(placeValue)); + + // Don't return anything if there are no units of that place-value present + results = new Object[] {(numUnit.intValue() == 0) ? "" : unitStr, counter}; + + return results; + } +} diff --git a/src/test/java/org/metanorma/fop/mn2pdfTests.java b/src/test/java/org/metanorma/fop/mn2pdfTests.java index 8ee06b6..4155252 100644 --- a/src/test/java/org/metanorma/fop/mn2pdfTests.java +++ b/src/test/java/org/metanorma/fop/mn2pdfTests.java @@ -43,6 +43,7 @@ import org.junit.contrib.java.lang.system.SystemErrRule; import org.metanorma.Constants; import org.metanorma.fop.annotations.Annotation; +import org.metanorma.fop.utils.JapaneseToNumbers; import org.metanorma.utils.LoggerHelper; import org.w3c.dom.Node; @@ -459,6 +460,16 @@ public void checkAttachments() throws ParseException { } + @Test + public void checkJapaneseNumbering() throws ParseException { + System.out.println(name.getMethodName()); + String j1 = JapaneseToNumbers.numToWord(1); + String j11 = JapaneseToNumbers.numToWord(11); + String j23 = JapaneseToNumbers.numToWord(23); + assertTrue(j1.equals("一")); + assertTrue(j11.equals("十一")); + assertTrue(j23.equals("二十三")); + } @Test public void testDates() throws IOException {