diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index cc34e947ea9..bc0727e1cfe 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -44,6 +44,7 @@ where --- ** imports import Prelude hiding (Applicative(..)) import Control.Applicative (Applicative(..)) +import qualified Control.Exception as C import Control.Monad (unless, when, void) import Control.Monad.Except (ExceptT(..), liftEither, throwError) import qualified Control.Monad.Fail as Fail @@ -65,6 +66,8 @@ import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.IO as T +import Data.Text.ICU.Error (ICUError) +import qualified Data.Text.ICU.Convert as UCNV import Data.Time ( Day, TimeZone, UTCTime, LocalTime, ZonedTime(ZonedTime), defaultTimeLocale, getCurrentTimeZone, localDay, parseTimeM, utcToLocalTime, localTimeToUTC, zonedTimeToUTC) import Safe (atMay, headMay, lastMay, readMay) @@ -132,6 +135,12 @@ parse iopts f _ = do Nothing -> return [maybe err (dbg4 "inferred source") $ dataFileFor f] -- shouldn't fail, f has .rules extension where err = error' $ "could not infer a data file for " <> f return $ dbg4 "data file" $ headMay fs + mconverter <- do + case T.unpack <$> getDirective "encoding" rules of + Just enc -> Just <$> do + let ioconverter = UCNV.open (dbg4 "encoding" enc) (Just False) + ExceptT $ (Right . dbg4 "converter" <$> ioconverter) `C.catch` (\(_::ICUError) -> return $ Left "could not open ICU converter") + Nothing -> return Nothing case mdatafile of Nothing -> return nulljournal -- data file specified by source rule was not found Just dat -> do @@ -139,7 +148,7 @@ parse iopts f _ = do if not (dat=="-" || exists) then return nulljournal -- data file inferred from rules file name was not found else do - t <- liftIO $ readFileOrStdinPortably dat + t <- liftIO $ readFileOrStdinPortably' mconverter dat readJournalFromCsv (Just $ Left rules) dat t Nothing -- apply any command line account aliases. Can fail with a bad replacement pattern. >>= liftEither . journalApplyAliases (aliasesFromOpts iopts) @@ -500,6 +509,7 @@ directivep = (do directives :: [Text] directives = ["source" + ,"encoding" ,"date-format" ,"decimal-mark" ,"separator" diff --git a/hledger-lib/Hledger/Utils/IO.hs b/hledger-lib/Hledger/Utils/IO.hs index da163e0cbfa..bff80f9c13d 100644 --- a/hledger-lib/Hledger/Utils/IO.hs +++ b/hledger-lib/Hledger/Utils/IO.hs @@ -5,6 +5,7 @@ terminals, pager output, ANSI colour/styles, etc. -} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -32,9 +33,11 @@ module Hledger.Utils.IO ( expandGlob, sortByModTime, readFileOrStdinPortably, + readFileOrStdinPortably', readFileStrictly, readFilePortably, readHandlePortably, + readHandlePortably', -- hereFileRelative, -- * Command line parsing @@ -107,6 +110,7 @@ where import Control.Concurrent (forkIO) import Control.Exception (catch, evaluate, throwIO) import Control.Monad (when, forM, guard, void) +import qualified Data.ByteString as B import Data.Char (toLower) import Data.Colour.RGBSpace (RGB(RGB)) import Data.Colour.RGBSpace.HSL (lightness) @@ -118,6 +122,7 @@ import Data.Maybe (isJust, catMaybes) import Data.Ord (comparing, Down (Down)) import qualified Data.Text as T import qualified Data.Text.IO as T +import qualified Data.Text.ICU.Convert as UCNV import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TB import Data.Time.Clock (getCurrentTime) @@ -280,19 +285,30 @@ readFilePortably f = openFile f ReadMode >>= readHandlePortably -- | Like readFilePortably, but read from standard input if the path is "-". readFileOrStdinPortably :: String -> IO T.Text -readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably +readFileOrStdinPortably = readFileOrStdinPortably' Nothing + +-- | Like readFileOrStdinPortably, but take an optional converter. +readFileOrStdinPortably' :: Maybe UCNV.Converter -> String -> IO T.Text +readFileOrStdinPortably' c f = openFileOrStdin f >>= readHandlePortably' c where - openFileOrStdin :: String -> IOMode -> IO Handle - openFileOrStdin "-" _ = return stdin - openFileOrStdin f' m = openFile f' m + openFileOrStdin :: String -> IO Handle + openFileOrStdin "-" = return stdin + openFileOrStdin f' = openFile f' ReadMode readHandlePortably :: Handle -> IO T.Text -readHandlePortably h = do +readHandlePortably = readHandlePortably' Nothing + +readHandlePortably' :: Maybe UCNV.Converter -> Handle -> IO T.Text +readHandlePortably' Nothing h = do hSetNewlineMode h universalNewlineMode menc <- hGetEncoding h when (fmap show menc == Just "UTF-8") $ -- XXX no Eq instance, rely on Show hSetEncoding h utf8_bom T.hGetContents h +readHandlePortably' (Just c) h = do + -- We need to manually apply the newline mode + -- Since we already have a Text + T.replace "\r\n" "\n". UCNV.toUnicode c <$> B.hGetContents h -- | Like embedFile, but takes a path relative to the package directory. embedFileRelative :: FilePath -> Q Exp diff --git a/hledger-lib/hledger-lib.cabal b/hledger-lib/hledger-lib.cabal index 356c6bb9f94..3695a63c741 100644 --- a/hledger-lib/hledger-lib.cabal +++ b/hledger-lib/hledger-lib.cabal @@ -160,6 +160,7 @@ library , template-haskell , terminal-size >=0.3.3 , text >=1.2.4.1 + , text-icu >=0.8.0.5 , time >=1.5 , timeit , transformers >=0.2 diff --git a/hledger-lib/package.yaml b/hledger-lib/package.yaml index c9a2538b9d3..f506f326c82 100644 --- a/hledger-lib/package.yaml +++ b/hledger-lib/package.yaml @@ -78,6 +78,7 @@ dependencies: - template-haskell - terminal-size >=0.3.3 - text >=1.2.4.1 +- text-icu >=0.8.0.5 - time >=1.5 - timeit - transformers >=0.2