From fa7c5c8ca6d8be1db452c651d4dd21ae513d38ec Mon Sep 17 00:00:00 2001 From: Mike Pilgrem Date: Sun, 10 Dec 2023 01:02:48 +0000 Subject: [PATCH] Fix #9507 Describe accurately acceptable package names Also extinguishes false positive in typo detection. Also aims to make corresponding changes to buildinfo-reference-generator, including the `Cabal-described` package. Adds `alphanumNotDigit` and now specifies `alphanum` by inserting digits into `alphanumNotDigit`. The description of `alpha-num-not-digit` is added to the introduction to the section on 'Non-terminals' to accommodate the limitation of buildinfo-reference-generator that only a single 'formula' can follow a single paragraph of narrative for each non-terminal production. --- .typos.toml | 5 +++ Cabal-described/src/Distribution/Described.hs | 7 ++-- .../src/Distribution/Utils/CharSet.hs | 13 ++++++-- .../src/Distribution/Utils/GrammarRegex.hs | 7 ++-- buildinfo-reference-generator/src/Main.hs | 4 +-- buildinfo-reference-generator/template.zinza | 11 ++++--- doc/buildinfo-fields-reference.rst | 15 +++++---- doc/cabal-package-description-file.rst | 33 ++++++++++++++----- doc/package-concepts.rst | 2 +- 9 files changed, 67 insertions(+), 30 deletions(-) diff --git a/.typos.toml b/.typos.toml index 9e633fb004d..d22851a7a6a 100644 --- a/.typos.toml +++ b/.typos.toml @@ -1,2 +1,7 @@ [default] extend-ignore-re = ["(?s)(#|//)\\s*spellchecker:off.*?\\n\\s*(#|//)\\s*spellchecker:on"] + +[default.extend-words] +# Extinguish false positive in cabal-package-description-file.rst. 'Nd' is a +# Unicode category, not a misspelling of 'And'. +nd = "nd" diff --git a/Cabal-described/src/Distribution/Described.hs b/Cabal-described/src/Distribution/Described.hs index 717fd6a5c7a..2b3b4aad21e 100644 --- a/Cabal-described/src/Distribution/Described.hs +++ b/Cabal-described/src/Distribution/Described.hs @@ -168,8 +168,8 @@ reUnqualComponent = RENamed "unqual-name" $ -- currently the parser accepts "csAlphaNum `difference` "0123456789" -- which is larger set than CS.alpha -- - -- Hackage rejects non ANSI names, so it's not so relevant. - <> RECharSet CS.alpha + -- Hackage, however, rejects non ANSI names. + <> RECharSet csAlphaNumNotDigit <> REMunch reEps (RECharSet csAlphaNum) reDot :: GrammarRegex a @@ -194,6 +194,9 @@ csAlpha = CS.alpha csAlphaNum :: CS.CharSet csAlphaNum = CS.alphanum +csAlphaNumNotDigit :: CS.CharSet +csAlphaNumNotDigit = CS.alphanumNotDigit + csUpper :: CS.CharSet csUpper = CS.upper diff --git a/Cabal-described/src/Distribution/Utils/CharSet.hs b/Cabal-described/src/Distribution/Utils/CharSet.hs index 9243615c7fa..3fb6abe723e 100644 --- a/Cabal-described/src/Distribution/Utils/CharSet.hs +++ b/Cabal-described/src/Distribution/Utils/CharSet.hs @@ -27,16 +27,17 @@ module Distribution.Utils.CharSet ( -- * Special lists alpha, alphanum, + alphanumNotDigit, upper, ) where -import Data.Char (chr, isAlpha, isAlphaNum, isUpper, ord) +import Data.Char (chr, isAlpha, isAlphaNum, isDigit, isUpper, ord) import Data.List (foldl', sortBy) import Data.Monoid (Monoid (..)) import Data.String (IsString (..)) import Distribution.Compat.Semigroup (Semigroup (..)) import Prelude - (Bool (..), Bounded (..), Char, Enum (..), Eq (..), Int, Maybe (..), Num (..), Ord (..), Show (..), String, concatMap, flip, fst, otherwise, showParen, + (Bool (..), Bounded (..), Char, Enum (..), Eq (..), Int, Maybe (..), Num (..), Ord (..), Show (..), String, (&&), concatMap, flip, fst, not, otherwise, showParen, showString, uncurry, ($), (.)) #if MIN_VERSION_containers(0,5,0) @@ -229,10 +230,16 @@ alpha :: CharSet alpha = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlpha c ] {-# NOINLINE alpha #-} +-- | Note: this set varies depending on @base@ version. +-- +alphanumNotDigit :: CharSet +alphanumNotDigit = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlphaNum c && not (isDigit c) ] +{-# NOINLINE alphanumNotDigit #-} + -- | Note: this set varies depending on @base@ version. -- alphanum :: CharSet -alphanum = foldl' (flip insert) empty [ c | c <- [ minBound .. maxBound ], isAlphaNum c ] +alphanum = foldl' (flip insert) alphanumNotDigit ['0' .. '9' ] {-# NOINLINE alphanum #-} -- | Note: this set varies depending on @base@ version. diff --git a/Cabal-described/src/Distribution/Utils/GrammarRegex.hs b/Cabal-described/src/Distribution/Utils/GrammarRegex.hs index 471ee25d44f..712865860f9 100644 --- a/Cabal-described/src/Distribution/Utils/GrammarRegex.hs +++ b/Cabal-described/src/Distribution/Utils/GrammarRegex.hs @@ -194,9 +194,10 @@ mathtt d = "\\mathtt{" <<>> d <<>> "}" charsetDoc :: CS.CharSet -> PP.Doc charsetDoc acs - | acs == CS.alpha = terminalDoc "alpha" - | acs == CS.alphanum = terminalDoc "alpha-num" - | acs == CS.upper = terminalDoc "upper" + | acs == CS.alpha = terminalDoc "alpha" + | acs == CS.alphanum = terminalDoc "alpha-num" + | acs == CS.alphanumNotDigit = terminalDoc "alpha-num-not-digit" + | acs == CS.upper = terminalDoc "upper" charsetDoc acs = case CS.toIntervalList acs of [] -> "\\emptyset" [(x,y)] | x == y -> inquotes $ mathtt $ charDoc x diff --git a/buildinfo-reference-generator/src/Main.hs b/buildinfo-reference-generator/src/Main.hs index 309d711b55e..9ef0cc988b5 100644 --- a/buildinfo-reference-generator/src/Main.hs +++ b/buildinfo-reference-generator/src/Main.hs @@ -52,8 +52,8 @@ main = do "String as in Haskell; it's recommended to avoid using Haskell-specific escapes." , zproduction "unqual-name" reUnqualComponent $ unwords [ "Unqualified component names are used for package names, component names etc. but not flag names." - , "Unqualified component name consist of components separated by dash, each component is non-empty alphanumeric string, with at least one alphabetic character." - , "In other words, component may not look like a number." + , "An unqualified component name consists of components separated by a hyphen, each component is a non-empty alphanumeric string, with at least one character that is not the digits ``0`` to ``9``." + , "In other words, a component may not look like a number." ] , zproduction "module-name" (describe (Proxy :: Proxy ModuleName)) diff --git a/buildinfo-reference-generator/template.zinza b/buildinfo-reference-generator/template.zinza index 8f05a416a8f..224266816dd 100644 --- a/buildinfo-reference-generator/template.zinza +++ b/buildinfo-reference-generator/template.zinza @@ -12,7 +12,7 @@ Field syntax is described as they are in the latest cabal file format version. .. math:: - \mathord{"}\mathtt{example}\mathord{"} + \mathord{``}\mathtt{example}\mathord{"} * non-terminals are type set in italic: @@ -25,13 +25,13 @@ Field syntax is described as they are in the latest cabal file format version. .. math:: - [ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ] + [ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ] Character set complements have :math:`c` superscript: .. math:: - [ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ]^c + [ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c * repetition is type set using regular expression inspired notation. Superscripts tell how many time to repeat: @@ -125,7 +125,10 @@ Optional comma separated Non-terminals ------------- -In the syntax definitions below the following non-terminal symbols are used: +In the syntax definitions below the following non-terminal symbols are used. In addition: + + .. math:: + {\mathop{\mathit{alpha\text{-}num\text{-}not\text{-}digit}}} = {\mathop{\mathit{alpha\text{-}num}}}\cap{[ \mathord{``}\mathtt{0}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c} {% for production in productions %} {{ production.name }} diff --git a/doc/buildinfo-fields-reference.rst b/doc/buildinfo-fields-reference.rst index a289292945a..b1bb35d6d6c 100644 --- a/doc/buildinfo-fields-reference.rst +++ b/doc/buildinfo-fields-reference.rst @@ -12,7 +12,7 @@ Field syntax is described as they are in the latest cabal file format version. .. math:: - \mathord{"}\mathtt{example}\mathord{"} + \mathord{``}\mathtt{example}\mathord{"} * non-terminals are type set in italic: @@ -25,13 +25,13 @@ Field syntax is described as they are in the latest cabal file format version. .. math:: - [ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ] + [ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ] Character set complements have :math:`c` superscript: .. math:: - [ \mathord{"}\mathtt{1}\mathord{"} \cdots \mathord{"}\mathtt{9}\mathord{"} ]^c + [ \mathord{``}\mathtt{1}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c * repetition is type set using regular expression inspired notation. Superscripts tell how many time to repeat: @@ -125,7 +125,10 @@ Optional comma separated Non-terminals ------------- -In the syntax definitions below the following non-terminal symbols are used: +In the syntax definitions below the following non-terminal symbols are used. In addition: + + .. math:: + {\mathop{\mathit{alpha\text{-}num\text{-}not\text{-}digit}}} = {\mathop{\mathit{alpha\text{-}num}}}\cap{[ \mathord{``}\mathtt{0}\mathord{"} \cdots \mathord{``}\mathtt{9}\mathord{"} ]^c} hs-string String as in Haskell; it's recommended to avoid using Haskell-specific escapes. @@ -134,10 +137,10 @@ hs-string \mathop{\mathord{``}\mathtt{\text{"}}\mathord{"}}{\left\{ {[\mathop{\mathord{``}\mathtt{\text{"}}\mathord{"}}\mathop{\mathord{``}\mathtt{\text{\\}}\mathord{"}}]^c}\mid\left\{ \begin{gathered}\mathop{\mathord{``}\mathtt{\text{\\}\text{&}}\mathord{"}}\\\mathop{\mathord{``}\mathtt{\text{\\}\text{\\}}\mathord{"}}\\\left\{ \mathop{\mathord{``}\mathtt{\text{\\}n}\mathord{"}}\mid\mathop{\mathit{escapes}} \right\}\\\mathop{\mathord{``}\mathtt{\text{\\}}\mathord{"}}[\mathop{\mathord{``}\mathtt{0}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{9}\mathord{"}}]\\\mathop{\mathord{``}\mathtt{\text{\\}o}\mathord{"}}[\mathop{\mathord{``}\mathtt{0}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{7}\mathord{"}}]\\\mathop{\mathord{``}\mathtt{\text{\\}x}\mathord{"}}[\mathop{\mathord{``}\mathtt{0}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{9}\mathord{"}}\mathop{\mathord{``}\mathtt{A}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{F}\mathord{"}}\mathop{\mathord{``}\mathtt{a}\mathord{"}}\cdots\mathop{\mathord{``}\mathtt{f}\mathord{"}}]\\\left\{ \mathop{\mathord{``}\mathtt{\text{\\}\text{^}\text{@}}\mathord{"}}\mid\mathop{\mathit{control}} \right\}\\\left\{ \mathop{\mathord{``}\mathtt{\text{\\}NUL}\mathord{"}}\mid\mathop{\mathit{ascii}} \right\}\end{gathered} \right\} \right\}}^\ast_{}\mathop{\mathord{``}\mathtt{\text{"}}\mathord{"}} unqual-name - Unqualified component names are used for package names, component names etc. but not flag names. Unqualified component name consist of components separated by dash, each component is non-empty alphanumeric string, with at least one alphabetic character. In other words, component may not look like a number. + Unqualified component names are used for package names, component names etc. but not flag names. An unqualified component name consists of components separated by a hyphen, each component is a non-empty alphanumeric string, with at least one character that is not the digits ``0`` to ``9``. In other words, a component may not look like a number. .. math:: - {\left({\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\mathop{\mathit{alpha}}{\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\right)}^+_{\mathop{\mathord{``}\mathtt{\text{-}}\mathord{"}}} + {\left({\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\mathop{\mathit{alpha\text{-}num\text{-}not\text{-}digit}}{\mathop{\mathit{alpha\text{-}num}}}^\ast_{}\right)}^+_{\mathop{\mathord{``}\mathtt{\text{-}}\mathord{"}}} module-name Haskell module name as recognized by Cabal parser. diff --git a/doc/cabal-package-description-file.rst b/doc/cabal-package-description-file.rst index 9eb2aef2705..86aab056c0d 100644 --- a/doc/cabal-package-description-file.rst +++ b/doc/cabal-package-description-file.rst @@ -313,24 +313,39 @@ describe the package as a whole: tools require the package-name specified for this field to match the package description's file-name :file:`{package-name}.cabal`. - Package names are case-sensitive and must match the regular expression - (i.e. alphanumeric "words" separated by dashes; each alphanumeric - word must contain at least one letter): - ``[[:digit:]]*[[:alpha:]][[:alnum:]]*(-[[:digit:]]*[[:alpha:]][[:alnum:]]*)*``. + A valid package name comprises an alphanumeric 'word'; or two or more + such words separated by a hyphen character (``-``). A word cannot be + comprised only of the digits ``0`` to ``9``. - Or, expressed in ABNF_: + An alphanumeric character belongs to one of the Unicode Letter categories + (Lu (uppercase), Ll (lowercase), Lt (titlecase), Lm (modifier), or + Lo (other)) or Number categories (Nd (decimal), Nl (letter), or No (other)). + + Package names are case-sensitive. + + Expressed as a regular expression: + + ``[0-9]*[\p{L}\p{N}-[0-9]][\p{L}\p{N}]*(-[0-9]*[\p{L}\p{N}-[0-9]][\p{L}\p{N}]*)*`` + + Expressed in ABNF_: .. code-block:: abnf package-name = package-name-part *("-" package-name-part) - package-name-part = *DIGIT UALPHA *UALNUM + package-name-part = *DIGIT UALPHANUM-NOT-DIGIT *UALNUM + + DIGIT = %x30-39 ; 0-9 - UALNUM = UALPHA / DIGIT - UALPHA = ... ; set of alphabetic Unicode code-points + UALNUM = UALPHANUM-NOT-DIGIT / DIGIT + UALPHANUM-NOT-DIGIT = ... ; set of Unicode code-points in Letter or + ; Number categories, other than the DIGIT + ; code-points .. note:: - Hackage restricts package names to the ASCII subset. + Hackage will not accept package names that use alphanumeric characters + other than ``A`` to ``Z``, ``a`` to ``z``, and ``0`` to ``9`` + (the ASCII subset). .. pkg-field:: version: numbers (required) diff --git a/doc/package-concepts.rst b/doc/package-concepts.rst index 25cfeb13fba..d0586b8ec93 100644 --- a/doc/package-concepts.rst +++ b/doc/package-concepts.rst @@ -43,7 +43,7 @@ Package names and versions All packages have a name, e.g. "HUnit". Package names are assumed to be unique. Cabal package names may contain letters, numbers and hyphens, but not spaces and may also not contain a hyphened section consisting of -only numbers. The namespace for Cabal packages is flat, not +only of the digits ``0`` to ``9``. The namespace for Cabal packages is flat, not hierarchical. Packages also have a version, e.g "1.1". This matches the typical way in