diff --git a/adsingestp/parsers/jats.py b/adsingestp/parsers/jats.py
index 593033a..6aa398e 100644
--- a/adsingestp/parsers/jats.py
+++ b/adsingestp/parsers/jats.py
@@ -16,6 +16,7 @@
 
 class JATSAffils(object):
     regex_email = re.compile(r"^[a-zA-Z0-9+_.-]+@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+")
+    regex_auth_xid = re.compile(r"^A[0-9]+$")
 
     def __init__(self):
         self.contrib_dict = {}
@@ -94,6 +95,7 @@ def _fix_affil(self, affstring):
             # check for empty strings with commas
             check_a = a.replace(",", "")
             if check_a:
+                a = re.sub("\\(e-*mail:\\s*,+\\s*\\)", "", a)
                 a = a.replace("\\n", ",")
                 a = a.replace(" —", "—")
                 a = a.replace(" , ", ", ")
@@ -103,6 +105,7 @@ def _fix_affil(self, affstring):
                 a = re.sub("^(\\s*,+\\s*)+", "", a)
                 a = re.sub("(\\s*,\\s+)+", ", ", a)
                 a = re.sub("(,\\s*)+$", "", a)
+                a = re.sub("\\s+$", "", a)
                 if self.regex_email.match(a):
                     emails.append(a)
                 else:
@@ -567,15 +570,18 @@ def parse(self, article_metadata):
                 for aff in contrib_aff:
                     # check and see if the publisher defined an email tag inside an affil (like IOP does)
                     nested_email_list = aff.find_all("ext-link")
+                    key = aff.get("id", default_key)
                     for e in nested_email_list:
                         if e.get("ext-link-type", None) == "email":
-                            key = e["id"]
+                            if e.get("id", None):
+                                ekey = e["id"]
+                            else:
+                                ekey = key
                             value = e.text
                             # build the cross-reference dictionary to be used later
-                            self.email_xref[key] = value
+                            self.email_xref[ekey] = value
                             e.decompose()
 
-                    key = aff.get("id", default_key)
                     # special case: get rid of <sup>...
                     aff = self._decompose(soup=aff, tag="sup")
                     aff, aff_extids_tmp = self._get_inst_identifiers(aff)
@@ -586,11 +592,25 @@ def parse(self, article_metadata):
 
                     affstr = aff.get_text(separator=", ").strip()
                     (affstr, email_list) = self._fix_affil(affstr)
-                    if email_list:
-                        self.email_xref[key] = email_list
+                    if not self.email_xref.get(key, None):
+                        if email_list:
+                            self.email_xref[key] = email_list
+                        else:
+                            self.email_xref[key] = ""
                     self.xref_dict[key] = affstr
                     self.xref_xid_dict[key] = aff_extids_tmp
 
+        # special case: publisher defined aff/email xrefs, but the xids aren't
+        # assigned to authors; xid is typically of the form "A\d+"
+        # publisher example: Geol. Soc. London (gsl)
+        count_auth = len(authors_out)
+        count_xref = len(self.xref_dict.keys())
+        if count_auth == count_xref:
+            for auth, xref in zip(authors_out, self.xref_dict.keys()):
+                if self.regex_auth_xid.match(xref):
+                    if not auth.get("aff", []) and not auth.get("xaff", []):
+                        auth["xaff"] = [xref]
+
         self.contrib_dict = {"authors": authors_out, "contributors": contribs_out}
 
         # now get the xref keys outside of contrib-group:
@@ -1027,6 +1047,10 @@ def _parse_ids(self):
                 self.base_metadata["ids"]["pub-id"].append(
                     {"attribute": "manuscript", "Identifier": self._detag(d, [])}
                 )
+            elif id_type == "url":
+                self.base_metadata["ids"]["pub-id"].append(
+                    {"attribute": "url", "Identifier": self._detag(d, [])}
+                )
             elif id_type == "other":
                 self.base_metadata["ids"]["pub-id"].append(
                     {"attribute": "other", "Identifier": self._detag(d, [])}
@@ -1232,7 +1256,11 @@ def parse(self, text, bsparser="lxml-xml"):
             raise XmlLoadException(err)
 
         document = d.article
-        front_meta = document.front
+        # front_meta = document.front
+        try:
+            front_meta = document.front
+        except Exception as err:
+            raise XmlLoadException("No front matter found, stopping: %s" % err)
         self.back_meta = document.back
 
         self.article_meta = front_meta.find("article-meta")
diff --git a/tests/stubdata/input/jats_gsl_unkeyed_xref.xml b/tests/stubdata/input/jats_gsl_unkeyed_xref.xml
new file mode 100644
index 0000000..8eaada2
--- /dev/null
+++ b/tests/stubdata/input/jats_gsl_unkeyed_xref.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" "JATS-1.2/JATS-archivearticle1.dtd">
+<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="en" article-type="research-article" dtd-version="1.2">
+<front><journal-meta><journal-id journal-id-type="publisher-id">geea</journal-id><journal-id journal-id-type="hwp">geochem</journal-id><journal-id journal-id-type="nlm-ta">Geochemistry: Exploration, Environment, Analysis</journal-id><journal-title-group><journal-title>Geochemistry: Exploration, Environment, Analysis</journal-title><abbrev-journal-title abbrev-type="publisher">Geochemistry: Exploration, Environment, Analysis</abbrev-journal-title></journal-title-group><issn pub-type="ppub">1467-7873</issn><issn pub-type="epub">2041-4943</issn><publisher><publisher-name>The Geological Society of London</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.1144/geochem.1.2.119</article-id><article-id pub-id-type="publisher-id">119</article-id><article-categories><subj-group subj-group-type="heading"><subject>Regular Article</subject></subj-group></article-categories><title-group><article-title>A comparison of unsupervised neural networks and k-means clustering in the analysis of multi-element stream sediment data</article-title></title-group><contrib-group>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>A. P.</given-names> <surname>Clare</surname></string-name>
+
+
+
+   </contrib><x xml:space="preserve"> and </x><contrib contrib-type="author"><string-name name-style="western"><given-names>D. R.</given-names> <surname>Cohen</surname></string-name>
+
+
+
+
+
+  </contrib><aff id="A1"><label><sup>1</sup></label>Neural Mining Solutions, 1 Alfred Street, Sydney, NSW 2000, Australia</aff><aff id="A2"><label><sup>2</sup></label>School of Geology, University of New South Wales, Sydney, NSW 2052, Australia (e-mail: <ext-link xlink:href="d.cohen@unsw.edu.au" ext-link-type="email">d.cohen@unsw.edu.au</ext-link>)</aff></contrib-group><pub-date publication-format="print" date-type="pub" pub-type="ppub"><month>5</month><year>2001</year></pub-date><volume>1</volume><issue>2</issue><fpage>119</fpage><lpage>134</lpage><permissions><copyright-statement>© 2001 AAG/The Geological Society of London</copyright-statement><copyright-year>2001</copyright-year></permissions><self-uri content-type="pdf" xlink:href="geochem.1.2.119.pdf" /><abstract xml:lang="en">
+   <p>Isolation of complex patterns of correlation between variables, association among samples and anomaly identification, through conventional parametric multivariate statistical procedures, may be obscured by the presence of multivariate outliers and non-normal variable distributions. Procedures such as k-means clustering generally require substantial data pre-processing. Unsupervised neural networks (UNN) have the capacity to cluster multivariate data, using a modified form of the standard unsupervised Kohonen self-organizing map that is non-linear, non-parametric, rapid and robust. The number of clusters into which samples are allocated is determined by the unsupervised neural network and is directly dependent upon the original input data.</p>
+   <p>UNN and k-means clustering was performed on stream sediment geochemical data from 1670 sub-catchments in the northeast region of New South Wales. Both methods produced clusters for the feldspar-associated elements that were closely related to sub-catchment geology and topography. UNN clustering revealed more subtle variations within the major lithological groups. UNN clustering of Cu&ndash;Pb&ndash;Zn produced ten main clusters and identified 26 anomalies, that were mainly from sub-catchments, containing significant base metal mineralization occurrences. K-means clustering of transformed Cu&ndash;Pb&ndash;Zn yielded five major clusters and only 19 anomalies. Progressive increase in <italic toggle="yes">k</italic> from eight to 20 did not substantially alter the k-means classification of samples between common groups and anomalies. Some catchments identified only as anomalous by UNN clustering contain known base metal mineralization.</p>
+  </abstract><kwd-group kwd-group-type="KWD" xml:lang="en">
+   <kwd>exploration geochemistry</kwd>
+   <x xml:space="preserve">; </x><kwd>neural network</kwd>
+   <x xml:space="preserve">; </x><kwd>clustering</kwd>
+   <x xml:space="preserve">; </x><kwd>anomaly</kwd>
+   <x xml:space="preserve">; </x><kwd>New England</kwd>
+  </kwd-group><custom-meta-group><custom-meta><meta-name>hwp-legacy-fpage</meta-name><meta-value>119</meta-value></custom-meta><custom-meta><meta-name>hwp-legacy-dochead</meta-name><meta-value>Regular Article</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec><title>INTRODUCTION</title><p>A variety of techniques exist to quantify complex patterns of correlation between variables in large, multi-element geochemical datasets, and to establish association among samples. Most of these techniques require a reduction in the dimensionality of the dataset. Defining common groups (or clusters) that relate to regional scale geological processes and isolating anomalous samples is a fundamental task of geochemical analysis (<xref ref-type="bibr" rid="B5">Cheng <italic toggle="yes">et al.</italic> 1996</xref>). Anomaly detection and pattern recognition using conventional parametric statistical procedures, such as factor analysis and parametric clustering techniques, may be obscured by the presence of multivariate outliers and non-normal variable distributions (<xref ref-type="bibr" rid="B7">Chork 1990</xref>; <xref ref-type="bibr" rid="B6">Chork &amp; Rousseeuw 1992</xref>). Multivariate anomalies may be identified using techniques such as modelling of Mahalanobis D<sup>2</sup> distances using &chi;<sup>2</sup> probability plots (<xref ref-type="bibr" rid="B13">Garrett 1989</xref>) prior to clustering; however, this approach can be time consuming.</p><p>K-means clustering is one method for reducing data to groups of similar objects. It is of the partitional clustering type (<xref ref-type="bibr" rid="B21">MacQueen 1967</xref>) as it attempts to directly decompose data into a set of disjoint clusters. One problem with k-means clustering is that the clustering algorithms prefer certain cluster shapes (multivariate distributions), and the algorithms tend to assign data to clusters of such shapes even if no such clusters actually exist in the data (<xref ref-type="bibr" rid="B17">Kaski 1997</xref>). Another potential problem involves the choice of the number of clusters. Good initialization of the cluster centroids may also be crucial as real data clusters may be left empty if their centroids are distant from the initial estimates at the commencement of clustering (<xref ref-type="bibr" rid="B17">Kaski 1997</xref>).</p><p>Neural networks are an alternate approach to revealing complex relationships within multivariate datasets and identifying anomalies, and are less dependent on the characteristics of the input data than conventional methods (<xref ref-type="bibr" rid="B2">Benediktsson <italic toggle="yes">et al.</italic> 1990</xref>; <xref ref-type="bibr" rid="B12">Foody 1997</xref>). Unlike conventional statistical or rule-based systems, neural networks are not programmed to perform particular tasks according to strict rules, but to isolate patterns in data by training on historical data using a learning algorithm. Unsupervised neural networks do not use training data but look for patterns and relationships within the entire input dataset, and organize the network into a set of unique classes or groups (<xref ref-type="bibr" rid="B11">Dayhoff 1990</xref>). One potential benefit resulting from the design of unsupervised neural networks is that outlying observations can be included in the dataset without significantly affecting the main patterns and relationships identified. The neural network takes the spread of data into account and those samples not falling into a common cluster or pattern usually remain in smaller groups of anomalous or uncommon patterns.</p><p>The self-organizing map (SOM), developed by <xref ref-type="bibr" rid="B20">Kohonen (1995)</xref>, is a neural network algorithm that has been used for a wide variety of applications, ranging from optimization of communications networks to predictive modelling in the finance industry. Comparison of clustering methods by <xref ref-type="bibr" rid="B17">Kaski (1997)</xref> indicates that k-means clustering is very closely related to SOM-based methods. An SOM is, however, both a method of clustering (which achieves dimension reduction) and a technique for non-linear projecting of data onto a lower-dimensional display (<xref ref-type="bibr" rid="B17">Kaski 1997</xref>).</p><p>This study compares the capacity of unsupervised neural network (UNN) clustering on non-preprocessed regional stream sediment data, to reduce dimensionality, define multivariate geochemical patterns and identify multivariate anomalies, with that of conventional k-means clustering.</p><sec><title>K-means clustering</title><p>K-means clustering seeks to assign multivariate observations to a predetermined number of groups (<italic toggle="yes">k</italic>) each containing <italic toggle="yes">n</italic><sub>k</sub> observations and a group centroid (<italic toggle="yes">x</italic><sub>k</sub>) (<xref ref-type="bibr" rid="B15">Howarth 1983</xref>). Clustering commences with the random allocation of k original observations as the group centroids. Each observation is assigned to the group to whose centroid they are nearest. Once all observations are allocated to groups, the new group centroids are then recalculated and the original observations reallocated to the new groups. The process is repeated until the overall within-group scatter (<italic toggle="yes">W</italic>) is minimized (or the change in <italic toggle="yes">W</italic> becomes insignificant).<disp-formula id="EQ1"><mml:math id="UDM405" overflow="scroll"><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mrow><mml:mo movablelimits="false">∑</mml:mo></mml:mrow><mml:mrow /><mml:mi>k</mml:mi></mml:munderover><mml:mo>⁡</mml:mo><mml:munderover><mml:mrow><mml:mo movablelimits="false">∑</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:munderover><mml:mo>⁡</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:math>
+</disp-formula>&emsp;As well as allocating observations to groups, the posterior probability of an observation being a member of a group may be determined (R. G. Garrett, pers. comm.). From a geochemical perspective, anomalies may be defined as clusters with only a few observations or as observations displaying a low probability of membership of any of the main clusters.</p><p>K-means clustering is generally affected by the geometry of the multivariate data distributions. Whereas distortions can be reduced by rescaling variables (normalizing individual variables against the covariance matrix) or by transforming the original variables to normal distributions (<xref ref-type="bibr" rid="B22">Mancey &amp; Howarth 1980</xref>), it is difficult to remove the bias induced by the inclusion of marginally anomalous observations within the various groups. As such, k-means clustering is generally used to partition data into major groups rather than to detect anomalies.</p></sec><sec><title>Neural networks</title><p>The unsupervised neural network applied in this study is a modified form of the standard unsupervised SOM, in which patterns in the <italic toggle="yes">n</italic>-dimensional input data are transformed into lower dimensional space which is topologically ordered (<xref ref-type="bibr" rid="B18">Kohonen 1989</xref>, <xref ref-type="bibr" rid="B19">1990</xref>, <xref ref-type="bibr" rid="B20">1995</xref>; <xref ref-type="bibr" rid="B11">Dayhoff 1990</xref>). The SOM comprises the input layers (consisting of <italic toggle="yes">m</italic> samples and <italic toggle="yes">n</italic> layers or variables) and a competitive layer organized as a grid of units (<xref ref-type="fig" rid="F1">Fig. 1</xref>). For each sample, a vector (X) representing the <italic toggle="yes">n</italic> variables is compared with the vectors representing the weight vectors (W) of each unit in the network&rsquo;s competitive layer. The software used in this study (Prospect Explorer&trade;) commenced with a single vector, arbitrarily weighted according to the first observation encountered in the dataset. A matching value is computed for each unit in the competitive layer matrix. The unit with the greatest similarity to the input vector is then selected. The neighbourhood of units around the selected unit is then defined and the weights for all units in that neighbourhood adjusted, such that:<disp-formula id="EQ2"><mml:math id="UDM406" overflow="scroll"><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mspace width="thickmathspace" /></mml:mrow><mml:mspace width="thinmathspace" /><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mspace width="thickmathspace" /></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>o</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi mathvariant="normal">Δ</mml:mi><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math>
+</disp-formula>where<disp-formula id="EQ3"><mml:math id="UDM407" overflow="scroll"><mml:mrow><mml:mi mathvariant="normal">where</mml:mi></mml:mrow><mml:mspace width="1em" /><mml:mspace width="1em" /><mml:mi mathvariant="normal">Δ</mml:mi><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi>η</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mo>−</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:math>
+</disp-formula>&eta; is the learning rate (a small positive number that decreases as training continues).</p><p>Hence, the selected unit and its neighbourhood becomes more like the input pattern vector X and is more likely to be selected should the same or a similar input pattern be presented to the network. This procedure is then repeated so that eventually all patterns have been presented to the network numerous times over and similar patterns have been grouped (<xref ref-type="bibr" rid="B16">Kamgar-Parsi <italic toggle="yes">et al.</italic> 1990</xref>). The neural network ceases to group the data once the rate of change in the clustering falls below an arbitrary minimum threshold.</p><p>In most applications, observations are not strictly allocated to a specific cluster, but exhibit varying degrees of association (similarity) with the weighted vectors representing the derived clusters. The similarity of individual observations (as represented by their vectors) to the vectors representing each of the neural clusters established may be determined, or observations can be allocated to the cluster to which it displays the greatest similarity (<xref ref-type="fig" rid="F2">Fig. 2</xref>). An anomalous cluster may, therefore, be defined as one where very few observations display a significant degree of similarity to the weighted vector representing that cluster. Anomalous observations are then defined as those displaying a high degree of similarity with those anomalous cluster vectors.</p></sec></sec><sec><title>STUDY AREA</title><p>The study area, located in the northeastern corner of New South Wales, covers <italic toggle="yes">c.</italic>&thinsp;30&thinsp;000&thinsp;km<sup>2</sup> and contains the catchments of the Clarence, Richmond and Tweed Rivers (<xref ref-type="fig" rid="F3">Fig. 3</xref>). The western, southern and northeastern regions contain Silurian to Carboniferous metasedimentary rocks and associated volcanics, along with Permian volcanics and the Permo-Triassic plutonic suites of the New England Batholith. The metasedimentary rocks and volcanics form an accretionary complex. East of the Clarence River lies the Mesozoic Clarence-Moreton Basin, which contains sediments derived from the surrounding Palaeozoic basement. The basin is bounded on its western margin by a major meridonal structural zone containing laterally extensive serpentinites. In the northeast and central south of the study area, extensive Tertiary basalt flows overlie much of the surrounding Palaeozoic basement and parts of the Clarence-Moreton Basin.</p><p>Various styles of mineralization are present in the area, although most occurrences are low grade and sub-economic (<xref ref-type="bibr" rid="B14">Gilligan &amp; Barnes 1990</xref>). Mineralization is predominantly of the quartz vein-hosted, skarn, greisen or epithermal styles, with the main commodities being Au, As, Sb, W, Sn, Mo, Cu and Zn. The principal Au and base metal deposits are located around Drake, Timbarra, Tooloom, Baryulgil, Jackadgery, Dalmoreton and Glen Innes. A comprehensive mineral occurrence database has been assembled by the NSW Department of Mineral Resources (<xref ref-type="bibr" rid="B1">Barnes <italic toggle="yes">et al.</italic> 1996</xref>).</p><p>The topography is influenced by the underlying geology. The western margin of the study area is characterized by high (up to 1200&thinsp;m altitude), gently undulating plateau country associated with the New England Batholith. The Clarence-Moreton Basin displays subdued topographic relief and forms extensive lowland floodplains for the major river systems. The older Palaeozoic rocks and the Cenozoic basalts form more dissected terrains.</p></sec><sec><title>DATASET</title><p>The region was divided into 1670 sub-catchments, ranging in area from 8 to 50&thinsp;km<sup>2</sup>. The choice of sampling density related to the scale of spatial variation in rock types, the distribution of known mineral deposits and the size of drainage sub-catchments (<xref ref-type="bibr" rid="B9">Cohen <italic toggle="yes">et al.</italic> 1995</xref>, <xref ref-type="bibr" rid="B10">1999</xref>). Specific sites were selected on major creeks or rivers at the outlet point of discrete, closed drainage basins. At these locations, 5&thinsp;kg of active channel stream sediment were collected. The stream sediments were sieved to &lt;250&thinsp;&mu;m prior to analysis by X-ray fluorescence spectrometry and instrumental neutron activation analysis. This fraction is coarser than that typically used in exploration geochemical surveys and was intended to enhance the primary lithological signature of the stream sediment data, yet preserve secondary (hydromorphic) geochemical dispersion patterns.</p></sec><sec><title>DATA PROCESSING</title><sec><title>K-means clustering</title><p>Normal probability plotting of the raw data indicated both the existence of multiple populations and a positive skewness to the main body of data. Preliminary testing of k-means clustering on the raw data resulted in isolation of very few anomalies and poor separation of the major lithologies between clusters. Each variable was, therefore, cleaned and normalized by removing gross outliers plus a further 5% of observations from the upper end of the distribution, and determining the Box-Cox transformation [x&prime;=(x<sup>&lambda;</sup>&minus;1)/&lambda;] that reduced skewness to 0 (<xref ref-type="bibr" rid="B4">Box &amp; Cox 1964</xref>). The excluded values were then returned to the dataset and the transformation applied. Other methods of robust estimation could also have been used.</p><p>Selection of the optimum number of clusters was determined after testing selected groups of variables for cluster numbers ranging from six to 20. In the case of Cu&ndash;Pb&ndash;Zn, extending the number of clusters from six to eight had no significant effect on either the main groupings of observations (the first three main clusters) or on the identification of anomalies (<xref ref-type="fig" rid="F4">Fig. 4</xref>). Extending from eight to ten clusters resulted in a slight reduction in the number of anomalies (from 22 to 18) and sub-division of the main clusters into overlapping groups (<xref ref-type="fig" rid="F5">Fig. 5</xref>). Further increase in the number of clusters served only to subdivide the small anomalous clusters into even smaller clusters or to isolate anomalous values, rather than to split the main clusters into sub-clusters.</p></sec><sec><title>UNN clustering</title><p>In order to analyse the stream sediment geochemical results from each catchment, the polygon (catchment boundary) attributed data were converted to a grid. This was a requirement of the software used rather than an intrinsic requirement of UNN methods. The smallest catchment in the study area was <italic toggle="yes">c.</italic>&thinsp;2500&thinsp;m<sup>2</sup> and a grid cell of 400&thinsp;m<sup>2</sup> was deemed to be sufficient to accurately represent the geochemical signatures of each catchment. To prevent creation of artificial values between sparsely distributed sample points, the catchment polygons were rasterized so that each grid cell within a given catchment was given the same geochemical value (a procedure discussed by <xref ref-type="bibr" rid="B3">Bonham-Carter 1994</xref>). The resulting input for the neural network involved 300&thinsp;000 cells by 33 elements. The UNN clustering was run initially on all 33 elements from the stream sediment dataset to establish the main associations. No information on sub-catchment geology or the distribution of known mineralization occurrences was provided to the UNN.</p><p>For the complete set of variables, there were 14 common clusters and a number of clusters for which very few observations displayed a high index of similarity (i.e. UNN anomalies). The common clusters were closely related to the distribution of the major lithological groups in the area and the main element associations, as defined by the normalized weights (loadings) for each cluster, were:</p><p>Ba&ndash;Sr&ndash;Rb&ndash;Na&ndash;K&ndash;Pb&ndash;Eu&plusmn;Ga Feldspar association</p><p>Cu&ndash;Zn&plusmn;Pb Base metal mineralization association</p><p>Fe&ndash;Co&ndash;Cu&ndash;Ni&ndash;Cr&plusmn;Zn Mafic/ultramafic and Fe-oxide adsorption association</p><p>REE&ndash;Nb&ndash;Ta&ndash;Hf&ndash;Zr&ndash;Th Heavy mineral association</p><p>These associations closely agreed with those established using factor analysis on the transformed dataset (<xref ref-type="bibr" rid="B9">Cohen <italic toggle="yes">et al.</italic> 1995</xref>). Both the UNN and k-means clustering were rerun on these four associations to evaluate both cluster patterns and the distribution of anomalies (<xref ref-type="bibr" rid="B8">Clare &amp; Cohen 1999</xref>). To demonstrate the relative performance of the k-means clustering and the UNN clustering, the distribution of patterns for the feldspar association and the anomaly detection results for the base metal association will be compared.</p></sec></sec><sec><title>RESULTS</title><sec><title>The feldspar association</title><p>The structure of the k-means and UNN clusters are presented in <xref ref-type="fig" rid="F6">Fig. 6</xref> in terms of the mean and standard deviation for each (transformed) element of a k-means cluster and the normalized value (or vector) representing a UNN cluster. The distribution of the allocated k-means clusters and an index of the similarity between selected UNN cluster vectors and individual catchment (grid cell) data are presented in <xref ref-type="fig" rid="F7">Figs 7</xref> to <xref ref-type="fig" rid="F9">9</xref>. The correspondence between k-means and UNN clusters and their relationship to the regional lithologies is summarized in <xref ref-type="table" rid="T1">Table 1</xref> and the distribution of k-means clusters relative to catchment lithological group is indicated in <xref ref-type="table" rid="T2">Table 2</xref>.</p><p>Both the k-means and UNN clustering display a very strong lithological or geomorphic association. The clustering defines four main spatial associations: the granites, the accretionary complex, the Tertiary basalts and the Clarence-Moreton Basin sedimentary rocks.</p><sec><title>Granites</title><p>The distribution of catchments allocated to k-means cluster 3 shows a high degree of similarity to those displaying a strong similarity index to UNN cluster 2 (<xref ref-type="fig" rid="F7">Figs 7</xref> and <xref ref-type="fig" rid="F8">8</xref>) and are characterized by low Ba, Eu and Sr values and high K, Na and Pb (<xref ref-type="fig" rid="F6">Fig. 6</xref>). These clusters are restricted mainly to the granites along the western boundary of the area, with 58% of k-means cluster 3 being contained within granite-dominated catchments and 36% in areas of alluvium. K-means cluster 4 is more evenly distributed amongst granites, metasedimentary rocks and alluvium. It covers some of the less differentiated, plagioclase-rich granites and has higher Ba and Eu loadings but lower K contents than cluster 3.</p></sec><sec><title>Accretionary complex</title><p>The accretionary complex differs from the granitic clusters in its higher Ba and Eu values and lower K, Pb and Rb values. Whereas this region is covered by a single k-means cluster (cluster 5 for which 68% of the catchments are in the accretionary complex), there are three UNN clusters associated with this region. The UNN clusters display a subtle variation in geochemical signature and spatial distribution, and form three contiguous sub-regions in the accretionary complex. UNN cluster 1 (<xref ref-type="fig" rid="F8">Fig. 8</xref>), for example, is characterized by more elevated Ba values than the other two clusters and covers the southwestern part of the complex. UNN cluster 3 is characterized by lower Eu, K, Na, Rb and Sr values than UNN cluster 1 and is most strongly reflected in the southeastern and eastern sections of the complex, including the Gundahl Complex (a tectonic melange of metasediments and greenstones of altered mafic volcanics) and the basal conglomerates of the Clarence-Moreton Basin.</p></sec><sec><title>Tertiary basalts</title><p>K-means cluster 2 and UNN clusters 5 and 8 delineate the main Tertiary basalt flows in the northeast, northwest and the south (<xref ref-type="fig" rid="F7">Figs 7</xref> and <xref ref-type="fig" rid="F8">8</xref>). There also appears to be an association between these clusters and the intermediate to basic volcanics and volcanoclastics southeast of Tenterfield and the Gordonbrook Serpentinite near Baryugil. The clusters are characterized by higher Ba, Eu and Sr and lower K and Pb values than the granites, reflecting the predominance of calcic plagioclase over potassic and sodic feldspars.</p></sec><sec><title>Clarence-Moreton Basin</title><p>K-means clusters 1 and 6 and UNN clusters 7, 9 and 10 are mainly restricted to the Clarence-Moreton Basin. All clusters are generally characterized by low loadings (or cluster means) for all six elements relative to the other lithological units. Catchments allocated to k-means cluster 1 are more dominant in the centre of the basin and the element loadings indicate the highly weathered and transported nature of the stream sediments. The UNN clusters display subtle variations in the composition of sediments from the edge to the centre of the basin with a significant decrease in Ba, Rb, Sr and Na towards the basin centre. UNN cluster 7 is most strongly related to marine clastic units and coal measures along the western and southern edges of the basin. The mid to upper sequences of the Clarence-Moreton Basin, defined by UNN clusters 9 and 10, are typified by very low Ba, Na, Pb and Sr and a slight K enrichment. UNN cluster 9 also shows a spatial association with the northeastern part of the accretionary complex (<xref ref-type="fig" rid="F8">Fig. 8</xref>).</p></sec></sec><sec><title>The base metal mineralization suite</title><p>The k-means and UNN cluster structures, and normalized values for the UNN anomalies, are presented in <xref ref-type="fig" rid="F9">Fig. 9</xref> and spatial patterns in <xref ref-type="fig" rid="F10">Figs 10</xref> and <xref ref-type="fig" rid="F11">11</xref>.</p><sec><title>Spatial distributions</title><p>UNN cluster 1 is generally confined to the Gordonbrook Serpentinite (near Baryugil) and clusters 2 and 5 to the Tertiary basalts and the Clarence River estuary (which acts as both a mechanical and chemical &lsquo;sink&rsquo; for most trace metals). These correspond with k-means cluster 6 and have high Cu means and low Pb means. A number of isolated catchments defined by UNN cluster 1 occur on a N&ndash;NW trend and included the Gordonbrook Serpentinite and the Gundahl Complex.</p><p>UNN clusters 6 to 10 and k-means clusters 2 and 5 represent the accretionary complex, with over 40% of allocated catchments located within metasediments and metavolcanics. This group of clusters exhibit similar loadings for Cu, Pb and Zn. The k-means clusters 2 and 5 are interspersed and would appear to relate to variations in Pb concentrations. Variations in the similarity between the sediment concentrations for Cu, Pb and Zn and the UNN clusters represent both subtle changes in lithological compositions within the complex and proximity to the main belt of base metal mineralization that extends from Jackadgery to Drake. The group of clusters covers virtually all the accretionary wedge units and some of the rhyodacitic units near Drake. The exception to this main grouping of clusters is cluster 8, which appears to delineate some of the intrusive units either marginal to or intruding the accretionary complex.</p><p>UNN cluster 3 and k-means cluster 3 have very low Cu and Zn values, moderate to low Pb values, and define the granites along the western edge of the region and the western side of the Clarence-Moreton Basin. UNN cluster 4 and k-means cluster 1 are characterized by low Pb, and very low Zn and Cu loadings or means. They are difficult to assign to any specific lithological group and their distribution appears to be structurally or topographically controlled. The most obvious association is with the main drainage of the upper Clarence River and coincidence with the western margin of the Clarence-Moreton Basin (57% of k-means cluster 1 catchments are located in areas of alluvium).</p></sec><sec><title>Anomaly detection</title><p>The UNN detected ten common clusters and one composite group of 26 anomalous catchments. The distribution of the anomalous catchments, and the medium and small sized mineral deposits recorded in the area are shown in <xref ref-type="fig" rid="F12">Fig. 12</xref>. The relative performance of (i) k-means clustering on log-transformed data (ii) k-means clustering on Box-Cox transformed data and (iii) UNN clustering on raw data is summarized in <xref ref-type="fig" rid="F13">Fig. 13</xref>.</p><p>For log-transformed data, the k-means clustering identified nine anomalies (clusters with &lt;15 observations out of a total pool of 1670 observations), whereas 19 anomalies were detected for the transformed data. By contrast, UNN detected 26 anomalies of which 17 were also identified by the k-means clustering. In nearly all cases, the observations deemed anomalous by both methods were also detectable on the basis of their individual Cu, Pb or Zn values. The nine anomalies that were only identified by UNN include a series of six observations with low Cu, Zn and Pb values, together with a further three contained within the main k-means clusters. Conversely, two samples were determined to be anomalous under k-means clustering but not UNN. Most of the observations within three units of the joint Cu&ndash;Zn mean were only identified as being anomalous on the basis of their combined Cu&ndash;Pb&ndash;Zn signature.</p><p>The UNN anomalies fall into three distinct groups (<xref ref-type="fig" rid="F10">Fig. 10</xref>); (1) high-value multivariate anomalies, (2) low-value multivariate anomalies, and (3) anomalies associated with individual variables. In group 1, anomalies U/K<sub>4</sub>, U/K<sub>5a&ndash;g</sub> and U/K<sub>17</sub> are characterized by extremely high values for all three elements. U/K<sub>4</sub> and U/K<sub>5</sub> incorporate catchments containing known base metal and Au mineral deposits. U/K<sub>5</sub> incorporates a series of seven catchments that rim the southern side of a plateau, which contains the Timbarra Gold Mine and a number of adjacent base metal mineralization zones. Anomalies U/K<sub>6,7,9,11,12</sub> contain at least two variables with highly elevated values. U/K<sub>7</sub> includes catchments surrounding the major copper mineralization in the area at the Cangai Copper Mine and other smaller workings. Anomalies U/K<sub>9&ndash;12</sub> and U/K<sub>17</sub> contain no known mineralization but are found within the vicinity of Tertiary basalts, south of the axis of accretionary complex base metal deposits that extend from Dorrigo to Glencoe.</p><p>The low-value multivariate anomalies, U/K<sub>2</sub> and U/K<sub>18</sub>, coincide with the basal units of the Clarence-Moreton Basin but differ from the main cluster delineating these units by the absence of Cu and the strongly elevated Pb values. Anomalies U<sub>14&ndash;16</sub> all contain anomalously low metal contents and were from areas of low topographic relief in the Clarence-Moreton Basin. They are not identified as anomalous by the k-means clustering.</p><p>The remaining anomalies are &lsquo;non-outliers&rsquo;. Anomaly U<sub>1</sub> is characterized by intermediate Cu and Zn values and low Pb. The underlying lithology for the catchment is wholly Tertiary basalts. The main difference between the Tertiary Basalt cluster and this anomalous catchment is the strong Pb depletion. U<sub>3</sub> is characterized by moderate to low Cu and Zn values and low Pb. This catchment is located between the Timbarra and Drake mineral fields, which contain a number of base metal occurrences. U/K6 is within a catchment containing the small Snapes Lode deposit (a Ag&ndash;Au&ndash;Pb&ndash;Zn occurrence with no recorded production history) and U<sub>8</sub> coincides with a leucocratic granitic stock and associated Bi&ndash;Mo mineralization. The anomalous character of the catchment (although not due to high values) may be due to the association with the known mineralization.</p><p>Anomaly U/K<sub>9</sub> is characterized by moderate to high Pb and Cu and moderate Zn values. Two small known Cu mines existed in the vicinity of the catchment. U<sub>10</sub> characterized by moderate to high values for Cu, Pb and Zn is not found to be associated with any recorded mineralization, the catchment being contained wholly within Tertiary basalts. U<sub>13</sub> and K<sub>21</sub> are characterized by moderate Pb and Zn values and low Cu, with U<sub>13</sub> in the main catchment for the town of Grafton. The elevated Pb and Zn values, compared to the surrounding catchments, are possibly due to the urban and localized industrial development associated with the town at the head of the estuarine portion of the Clarence River. Anomalies U<sub>8,10,13,19</sub> are unique to the UNN clustering, and anomalies K<sub>20</sub> and K<sub>21</sub> to the k-means clustering.</p></sec></sec></sec><sec><title>DISCUSSION</title><p>Two key aspects of processing of regional geochemical datasets are (i) establishing common patterns (between variables or samples) that may be related to variations in geological characteristics or geochemical processes operating in a region; and (ii) identifying anomalies. In most cases, such objectives require a reduction in the dimensionality of the dataset.</p><p>Traditional methods of dimension reduction, such as factor analysis, or multivariate sample clustering and anomaly detection, such as k-means clustering, generally require both pre-processing of datasets and some degree of <italic toggle="yes">a priori</italic> knowledge of the structure of the data (i.e. the number of factors or clusters to extract). For k-means clustering this may require some degree of trial-and-error. Whereas part of the function of UNN clustering is to establish objectively the number of clusters present in the dataset, there still remains the question as to whether the number of clusters formed is &lsquo;optimum&rsquo;. In this study, the question of performance has been resolved simply in terms of the number of anomalies detected by the two methods and the extent to which the common clusters relate to the geology and topography of the region.</p><p>For the transformed Cu&ndash;Pb&ndash;Zn data, varying the number of clusters and observing the subsequent grouping of samples indicated a maximum of five main clusters (each with &gt;200 observations) dominated by the main lithological groups and a set of 17 observations in small clusters (anomalies). Beyond a certain value for <italic toggle="yes">k</italic>, an increase in clusters served only to subdivide the anomalies into smaller clusters and progressively subdivide the larger (common) clusters without suitable criteria to establish a limit on the number of clusters. For the feldspar-associated elements, variation in the value for <italic toggle="yes">k</italic> had a similar effect. Under UNN clustering, transformation of the data (such as log-transformation) or exclusion of anomalous observations, prior to modelling, had no significant effect on the number of common clusters or the number of anomalies.</p><p>As noted by <xref ref-type="bibr" rid="B17">Kaski (1997)</xref>, although the k-means clustering algorithm and SOM are very closely related, the number k-means clusters initially chosen should accord with the number of (real) clusters present in the dataset, whereas under SOM procedures this is not necessary (under supervised neural networks the number of reference vectors can be chosen to be much larger than the number of actual clusters in the data). The direct result of UNN clustering and self-organizing of data is to assign all clusters with few observations to a multicharacter anomalous group. It is the lack of commonality with the common patterns that defines observations as being anomalous.</p><p>Comparing the performance of the two methods, both k-means clustering (on transformed data) and UNN self-organizing of the data (without removal of outliers or data transformation), were clearly able to define the common patterns within the whole population. These common patterns were consistent with the geology of the catchments. Both methods also identified a number of anomalous catchments, using Cu&ndash;Pb&ndash;Zn, that contained known base metal mineralization.</p><p>UNN was able to show greater subtlety in geochemical patterns than the classical k-means technique. This is not just a function of the different approach to the linking of observations to clusters (degree of similarity for UNN; absolute grouping for k-means clustering), but may relate to restriction of the shape of k-means clusters to hyperelipsiods. A greater equivalence between the output from the UNN and k-means clustering could be achieved by determining the probability of a sample being allocated to a given k-means clusters by relating the distance of an observation to the k-means cluster centroid using a measurement such as the Mahalanobis D<sup>2</sup>.</p><p>Whereas both methods could identify outlier anomalies (observation with one or more elements displaying high values), only the UNN could identify non-outlier anomalies (observations within the common spread of values for each element, but where there is a natural hiatus in data continuity in the original n-dimensional space). The UNN anomalies could be more easily subdivided into groups displaying some degree of similarity.</p><p>In terms of utility, the UNN technique tested was superior to the k-means clustering. The actual analytical time taken to define clusters and anomalous catchments from presentation of the raw data to the UNN was in the order of minutes. The k-means clustering required some hours of data manipulation and pre-processing prior to running the models.</p></sec><sec><title>CONCLUSION</title><p>The application of unsupervised neural network described in this study demonstrates the effectiveness of a non-linear, non-parametric approach to the analysis of geochemical data. Unlike conventional clustering methods, the patterns in the data established using UNN appears to be relatively unaffected by the presence of outlying values and non-normal populations, so commonly exhibited by geochemical datasets such as the one examined in this study. The k-means clustering produced similar results to the UNN clustering; however, it proved less efficient at identifying anomalous geochemical signatures in the stream sediment data.</p><p>The UNN has proved capable of extracting both common geochemical patterns and the subset of anomalous patterns by reducing the complex higher dimensionality raw data layers into lower dimensionality ordered groups. Defining complex relationships within the data, using UNN, was efficiently performed without any <italic toggle="yes">a priori</italic> knowledge of variable characteristics, relationships between variables, or spatial correlations between samples.</p><p>The study demonstrates unsupervised neural networks to be a viable alternative to conventional statistical approaches in the modelling of multivariate geochemical data.</p></sec></body><back><ack>
+   <p>The authors wish to acknowledge R. Barnes, K. McDonald and the NSW Department of Mineral Resources for the supply of the digital geology and mineral deposit datasets. Surtec Pty. Ltd, J. Yong, E. Fellenberg and I. Wainwright provided valuable assistance in the project. Critique of the neural methods was provided by T. Gedeon. The reviewers are also thanked for their comments.</p>
+  </ack><ref-list>
+   <ref id="B1">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">BARNES</named-content>, <named-content content-type="arthw-fnm">R. G.</named-content></named-content>, BROWNLOW, J, ALDER, D. &amp; 6 others <year>1996</year>. Mineral resources. <italic toggle="yes">In</italic>: <italic toggle="yes">Regional Report of Upper North East New South Wales, Volume 5; Socio-economic Values</italic>. Resource and Conservation Assessment Council, Sydney, 73&ndash;154.</mixed-citation>
+   </ref>
+   <ref id="B2">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">BENEDIKTSSON</named-content>, <named-content content-type="arthw-fnm">J. A.</named-content></named-content>, SWAIN, P. H. &amp; ERSOY, O. K. <year>1990</year>. Neural network approaches versus statistical methods in classification of multisource remote sensing data. <source><italic toggle="yes">IEEE Transactions on Geoscience and Remote Sensing</italic></source> <volume><italic toggle="yes">4</italic></volume>, <fpage>540</fpage>-552.</mixed-citation>
+   </ref>
+   <ref id="B3">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">BONHAM-CARTER</named-content>, <named-content content-type="arthw-fnm">G. F.</named-content></named-content> <year>1994</year>. <italic toggle="yes">Geographic Information Systems for Geoscientists: Modelling with GIS</italic>. Pergamon Publishing, Oxford.</mixed-citation>
+   </ref>
+   <ref id="B4">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">BOX</named-content>, <named-content content-type="arthw-fnm">G. E. P.</named-content></named-content> &amp; COX, D. R. <year>1964</year>. An analysis of transformations. <source><italic toggle="yes">Journal of the Royal Statistical Society, Series B</italic></source> <volume><italic toggle="yes">26</italic></volume>, <fpage>211</fpage>-243.</mixed-citation>
+   </ref>
+   <ref id="B5">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">CHENG</named-content>, <named-content content-type="arthw-fnm">Q.</named-content></named-content>, AGTERBERG, F. P. &amp; BONHAM-CARTER, G. F. <year>1996</year>. A spatial analysis method for geochemical anomaly separation. <source><italic toggle="yes">Journal of Geochemical Exploration</italic></source> <volume><italic toggle="yes">56</italic></volume>, <fpage>183</fpage>-195.</mixed-citation>
+   </ref>
+   <ref id="B6">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">CHORK</named-content>, <named-content content-type="arthw-fnm">C. Y.</named-content></named-content> &amp; ROUSSEEUW, P. J. <year>1992</year>. Integrating a high-breakdown option into discriminant analysis in exploration geochemistry. <source><italic toggle="yes">Journal of Geochemical Exploration</italic></source> <volume><italic toggle="yes">43</italic></volume>, <fpage>191</fpage>-203.</mixed-citation>
+   </ref>
+   <ref id="B7">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">CHORK</named-content>, <named-content content-type="arthw-fnm">C. Y.</named-content></named-content> <year>1990</year>. Unmasking multivariate anomalous observations in exploration geochemical data from sheeted vein tin mineralization near Emmaville, N.S.W, Australia. <source><italic toggle="yes">Journal of Geochemical Exploration</italic></source> <volume><italic toggle="yes">37</italic></volume>, <fpage>205</fpage>-223.</mixed-citation>
+   </ref>
+   <ref id="B8">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">CLARE</named-content>, <named-content content-type="arthw-fnm">A. P.</named-content></named-content> &amp; COHEN, D. R. <year>1999</year>. An unsupervised neural network approach to the analysis of multi-element stream sediment data, northeastern NSW, Australia. <italic toggle="yes">In</italic>: <italic toggle="yes">Proceedings of the 19th International Geochemical Exploration Symposium</italic>, Vancouver, 12&ndash;16 April, 1999.</mixed-citation>
+   </ref>
+   <ref id="B9">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">COHEN</named-content>, <named-content content-type="arthw-fnm">D. R.</named-content></named-content>, RUTHERFORD, N. F. &amp; GARNETT, D. L. <year>1995</year>. <italic toggle="yes">A Geochemical Survey of the Upper Northeast Region, New South Wales</italic>. NSW Department of Mineral Resources, Sydney.</mixed-citation>
+   </ref>
+   <ref id="B10">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">COHEN</named-content>, <named-content content-type="arthw-fnm">D. R.</named-content></named-content>, SILVA-SANTISTEBAN, C. M, RUTHERFORD, N. F, GARNETT, D. L. &amp; WALDRON, H. M. <year>1999</year>. Comparison of vegetation and stream sediment geochemical patterns in the north eastern region of New South Wales. <source><italic toggle="yes">Journal of Geochemical Exploration</italic></source> <volume><italic toggle="yes">66</italic></volume>, <fpage>469</fpage>-489.</mixed-citation>
+   </ref>
+   <ref id="B11">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">DAYHOFF</named-content>, <named-content content-type="arthw-fnm">J. E.</named-content></named-content> <year>1990</year>. <italic toggle="yes">Neural Network Architectures: An Introduction</italic>. Van Nostrand, New York.</mixed-citation>
+   </ref>
+   <ref id="B12">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">FOODY</named-content>, <named-content content-type="arthw-fnm">G. M.</named-content></named-content> <year>1997</year>. Fully fuzzy supervised classification of land cover from remotely sensed imagery with an artificial neural network. <source><italic toggle="yes">Neural Computing and Applications</italic></source> <volume><italic toggle="yes">5</italic></volume>, <fpage>238</fpage>-247.</mixed-citation>
+   </ref>
+   <ref id="B13">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">GARRETT</named-content>, <named-content content-type="arthw-fnm">R. G.</named-content></named-content> <year>1989</year>. The chi-square plot: a tool for multivariate outlier recognition. <source><italic toggle="yes">Journal of Geochemical Exploration</italic></source> <volume><italic toggle="yes">32</italic></volume>, <fpage>319</fpage>-341.</mixed-citation>
+   </ref>
+   <ref id="B14">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">GILLIGAN</named-content>, <named-content content-type="arthw-fnm">L. B.</named-content></named-content> &amp; BARNES, R. G. <year>1990</year>. New England Fold Belt, New South Wales &ndash; Regional Geology and Mineralisation. <italic toggle="yes">In</italic>: Hughes, F.E. (ed) . <italic toggle="yes">Geology of the Mineral Deposits of Australia and Papua New Guinea</italic>. Australian Institute of Mining and Metallurgy. Monograph, <bold>14</bold>, <fpage>1417</fpage>-1423.</mixed-citation>
+   </ref>
+   <ref id="B15">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">HOWARTH</named-content>, <named-content content-type="arthw-fnm">R. J.</named-content></named-content> <year>1983</year>. Statistics and Data Analysis in Geochemical Prospecting. <italic toggle="yes">In</italic>: Govett, G.J.S. (ed) . <italic toggle="yes">Handbook of Exploration Geochemistry, Volume 2</italic>. Elsevier, Amsterdam.</mixed-citation>
+   </ref>
+   <ref id="B16">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">KAMGAR-PARSI</named-content>, <named-content content-type="arthw-fnm">B.</named-content></named-content>, GUALTIERI, J. A. &amp; DEVANEY, J. E. <year>1990</year>. Clustering with neural networks. <source><italic toggle="yes">Biological Cybernetics</italic></source> <volume><italic toggle="yes">63</italic></volume>, <fpage>210</fpage>-208.</mixed-citation>
+   </ref>
+   <ref id="B17">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">KASKI</named-content>, <named-content content-type="arthw-fnm">S.</named-content></named-content> <year>1997</year>. <italic toggle="yes">Data exploration using self-organizing maps</italic>. Acta Polytechnica Scandinavica, Mathematics, Computing and Management in Engineering Series, <bold>82</bold>.</mixed-citation>
+   </ref>
+   <ref id="B18">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">KOHONEN</named-content>, <named-content content-type="arthw-fnm">T.</named-content></named-content> <year>1989</year>. <italic toggle="yes">Self-organisation and Associative Memory</italic>. Springer-Verlag, Berlin.</mixed-citation>
+   </ref>
+   <ref id="B19">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">KOHONEN</named-content>, <named-content content-type="arthw-fnm">T.</named-content></named-content> <year>1990</year>. The self-organizing map. <source><italic toggle="yes">Proceedings of the IEEE</italic></source> <volume><italic toggle="yes">78</italic></volume>, <fpage>1464</fpage>-1480.</mixed-citation>
+   </ref>
+   <ref id="B20">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">KOHONEN</named-content>, <named-content content-type="arthw-fnm">T.</named-content></named-content> <year>1995</year>. <italic toggle="yes">Self-Organizing Maps</italic>. Springer-Verlag, Berlin.</mixed-citation>
+   </ref>
+   <ref id="B21">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">MACQUEEN</named-content>, <named-content content-type="arthw-fnm">J.</named-content></named-content> <year>1967</year>. Some methods for classification and analysis of multivariate observations. <italic toggle="yes">In</italic>: Le Cam, L.M, Neyman, &amp; J. (eds) . <italic toggle="yes">Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability, Volume 1</italic>. University of California Press, Berkeley<fpage>281</fpage>-297.</mixed-citation>
+   </ref>
+   <ref id="B22">
+    <mixed-citation><named-content content-type="arthw-firstauthor"><named-content content-type="arthw-snm">MANCEY</named-content>, <named-content content-type="arthw-fnm">S. J.</named-content></named-content> &amp; HOWARTH, R. J. <year>1980</year>. Power-transform removal of skewness from large data sets. <source><italic toggle="yes">Institute for Mining and Metallurgy Transactions, Section B</italic></source> <volume><italic toggle="yes">89</italic></volume>, <fpage>92</fpage>-97.</mixed-citation>
+   </ref>
+  </ref-list></back><floats-group><fig id="F1" position="float" orientation="portrait"><label>Fig. 1.</label><caption><p>Combination of vector <italic toggle="yes">x</italic><sub>1</sub> .&thinsp;.&thinsp;. <italic toggle="yes">x</italic><sub><italic toggle="yes">n</italic></sub> representing the multivariate signature of a cell from the original dataset, with the weight vector <italic toggle="yes">w</italic><sub><italic toggle="yes">i</italic>1</sub> .&thinsp;.&thinsp;. <italic toggle="yes">w</italic><sub><italic toggle="yes">in</italic></sub> for the <italic toggle="yes">i</italic><sup>th</sup> unit of the competitive layer.</p></caption><graphic xlink:href="119f1.jpg" position="float" orientation="portrait" /></fig><fig id="F2" position="float" orientation="portrait"><label>Fig. 2.</label><caption><p>UNN clustering of objects in <italic toggle="yes">n</italic>-dimensional space. Anomalies may be defined as clusters with very few observations.</p></caption><graphic xlink:href="119f2.jpg" position="float" orientation="portrait" /></fig><fig id="F3" position="float" orientation="portrait"><label>Fig. 3.</label><caption><p>Simplified geological map of the study area (after <xref ref-type="bibr" rid="B1">Barnes <italic toggle="yes">et al.</italic> 1996</xref>).</p></caption><graphic xlink:href="119f3.jpg" position="float" orientation="portrait" /></fig><fig id="F4" position="float" orientation="portrait"><label>Fig. 4.</label><caption><p>Comparison of cumulative percentages of observations clustered by k-means clustering of Cu&ndash;Pb&ndash;Zn under different values of <italic toggle="yes">k</italic>. Variables have been Box-Cox transformed.</p></caption><graphic xlink:href="119f4.jpg" position="float" orientation="portrait" /></fig><fig id="F5" position="float" orientation="portrait"><label>Fig. 5.</label><caption><p>Projection of k-means clusters onto Cu&ndash;Zn space for the base metal suite for six-, eight- and ten-cluster models. Ellipses represent the cluster &mu;&plusmn;1&sigma; for the transformed Cu and Zn variables.</p></caption><graphic xlink:href="119f5.jpg" position="float" orientation="portrait" /></fig><fig id="F6" position="float" orientation="portrait"><label>Fig. 6.</label><caption><p>Spread of values (&mu;&plusmn;1&sigma;) for the feldspar association under an eight-group k-means clustering and the normalized values for the ten common UNN clusters.</p></caption><graphic xlink:href="119f6.jpg" position="float" orientation="portrait" /></fig><fig id="F7" position="float" orientation="portrait"><label>Fig. 7.</label><caption><p>Spatial distribution of catchment group allocation under an eight-group k-means clustering of the feldspar-associated elements (Ba, Na, K, Sr, Eu, Pb).</p></caption><graphic xlink:href="119f7.jpg" position="float" orientation="portrait" /></fig><fig id="F8" position="float" orientation="portrait"><label>Fig. 8.</label><caption><p>Similarity index for UNN clusters 1, 2, 8 and 9 for the feldspar-associated elements (Ba, Na, K, Sr, Eu, Pb).</p></caption><graphic xlink:href="119f8.jpg" position="float" orientation="portrait" /></fig><fig id="F9" position="float" orientation="portrait"><label>Fig. 9.</label><caption><p>(<bold>a</bold>) Spread of values (&mu;&plusmn;1&sigma;) for elements within a 10-group k-means clustering of Cu&ndash;Pb&ndash;Zn; (<bold>b</bold>) Normalized values for the ten common UNN clusters of Cu&ndash;Pb&ndash;Zn and normalized values for the 16 UNN anomalies. Group A is anomalies in catchments with known mineralization, group B in catchments with no known significant mineralization and group C is a small set of observations with very low Cu+Zn&plusmn;Pb values.</p></caption><graphic xlink:href="119f9.jpg" position="float" orientation="portrait" /></fig><table-wrap id="T1" position="float" orientation="portrait"><label>Table 1.</label><caption><p>Allocation of k-means and UNN clusters to lithological associations for the feldspar (Ba&ndash;Eu&ndash;K&ndash;Na&ndash;Sr&ndash;Pb) and Cu&ndash;Pb&ndash;Zn associations</p></caption><table><tbody><tr><td colspan="1" rowspan="1" align="left" valign="top">&ensp;<hr /></td><td colspan="2" rowspan="1" align="center" valign="top">Cluster<hr /></td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">Association<hr /></td><td colspan="1" rowspan="1" align="center" valign="top"><italic toggle="yes">k-means</italic><hr /></td><td colspan="1" rowspan="1" align="center" valign="top"><italic toggle="yes">UNN</italic><hr /></td><td colspan="1" rowspan="1" align="center" valign="top">Lithological association<hr /></td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">Feldspar</td><td colspan="1" rowspan="1" align="center" valign="top">1, 6</td><td colspan="1" rowspan="1" align="center" valign="top">7, 9, 10</td><td colspan="1" rowspan="1" align="left" valign="top">Clarence-Moreton Basin</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">2</td><td colspan="1" rowspan="1" align="center" valign="top">5, 8</td><td colspan="1" rowspan="1" align="left" valign="top">Tertiary basalts and ultramafics</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">3, 4</td><td colspan="1" rowspan="1" align="center" valign="top">2</td><td colspan="1" rowspan="1" align="left" valign="top">Granites</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">5</td><td colspan="1" rowspan="1" align="center" valign="top">1, 3, 4</td><td colspan="1" rowspan="1" align="left" valign="top">Metasedimentary and metavolcanic rocks (accretionary complex)</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">7, 8</td><td colspan="1" rowspan="1" align="center" valign="top">Remainder</td><td colspan="1" rowspan="1" align="left" valign="top">Anomalies</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="char" char="." valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">6</td><td colspan="1" rowspan="1" align="left" valign="top">None</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">Cu&ndash;Pb&ndash;Zn</td><td colspan="1" rowspan="1" align="center" valign="top">3</td><td colspan="1" rowspan="1" align="center" valign="top">3</td><td colspan="1" rowspan="1" align="left" valign="top">Granites and basal Clarence-Moreton Basin</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">6</td><td colspan="1" rowspan="1" align="center" valign="top">2, 5</td><td colspan="1" rowspan="1" align="left" valign="top">Tertiary basalts and Clarence River Estuary</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">6</td><td colspan="1" rowspan="1" align="center" valign="top">1</td><td colspan="1" rowspan="1" align="left" valign="top">Serpentinite and meta-basic volcanics</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">5&plusmn;2</td><td colspan="1" rowspan="1" align="center" valign="top">6 to 10</td><td colspan="1" rowspan="1" align="left" valign="top">Accretionary complex</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">1</td><td colspan="1" rowspan="1" align="center" valign="top">4</td><td colspan="1" rowspan="1" align="left" valign="top">Western Clarence River System</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">4, 7 to 10</td><td colspan="1" rowspan="1" align="center" valign="top">Remainder</td><td colspan="1" rowspan="1" align="left" valign="top">Anomalies</td></tr></tbody></table></table-wrap><table-wrap id="T2" position="float" orientation="portrait"><label>Table 2.</label><caption><p>Cross tabulation of the percent distribution for each k-means cluster across the five main catchment geological groups for (<bold>a</bold>) feldspar association (Ba&ndash;Eu&ndash;K&ndash;Na&ndash;Pb&ndash;Sr) under an eight-cluster model and (<bold>b</bold>) Cu&ndash;Pb&ndash;Zn suite under a ten-cluster model. The anomalies represent the summation of clusters containing &lt;2% of the total number of catchments</p></caption><table><tbody><tr><td colspan="1" rowspan="1" align="left" valign="top">&ensp;<hr /></td><td colspan="1" rowspan="1" align="char" char="." valign="top">&ensp;</td><td colspan="2" rowspan="1" align="center" valign="top">Geological Group<hr /></td></tr><tr><td colspan="1" rowspan="1" align="char" char="." valign="top">&ensp;</td><td colspan="1" rowspan="1" align="center" valign="top">Mafics and ultramafics<hr /></td><td colspan="1" rowspan="1" align="center" valign="top">Granites and volcanics<hr /></td><td colspan="1" rowspan="1" align="center" valign="top">Metasediments<hr /></td><td colspan="1" rowspan="1" align="center" valign="top">Alluvium<hr /></td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">&ensp;<hr /></td><td colspan="1" rowspan="1" align="center" valign="top">Total catchments<hr /></td><td colspan="1" rowspan="1" align="char" char="." valign="top">8.7<hr /></td><td colspan="1" rowspan="1" align="char" char="." valign="top">19.1<hr /></td><td colspan="1" rowspan="1" align="char" char="." valign="top">34.1<hr /></td><td colspan="1" rowspan="1" align="char" char="." valign="top">38.1<hr /></td></tr><tr><td colspan="6" rowspan="1" align="left" valign="top">(<bold>a</bold>) Feldspar association</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">&emsp;Cluster</td><td colspan="1" rowspan="1" align="center" valign="top">1</td><td colspan="1" rowspan="1" align="char" char="." valign="top">4.3</td><td colspan="1" rowspan="1" align="char" char="." valign="top">1.1</td><td colspan="1" rowspan="1" align="char" char="." valign="top">28.0</td><td colspan="1" rowspan="1" align="char" char="." valign="top">66.7</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">2</td><td colspan="1" rowspan="1" align="char" char="." valign="top">30.4</td><td colspan="1" rowspan="1" align="char" char="." valign="top">14.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">19.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">35.4</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">3</td><td colspan="1" rowspan="1" align="char" char="." valign="top">1.0</td><td colspan="1" rowspan="1" align="char" char="." valign="top">58.1</td><td colspan="1" rowspan="1" align="char" char="." valign="top">4.8</td><td colspan="1" rowspan="1" align="char" char="." valign="top">36.2</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">4</td><td colspan="1" rowspan="1" align="char" char="." valign="top">2.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">37.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">30.7</td><td colspan="1" rowspan="1" align="char" char="." valign="top">29.1</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">0.4</td><td colspan="1" rowspan="1" align="char" char="." valign="top">8.8</td><td colspan="1" rowspan="1" align="char" char="." valign="top">63.8</td><td colspan="1" rowspan="1" align="char" char="." valign="top">27.1</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">13.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">5.8</td><td colspan="1" rowspan="1" align="char" char="." valign="top">30.4</td><td colspan="1" rowspan="1" align="char" char="." valign="top">50.3</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">&emsp;Anomalies</td><td colspan="1" rowspan="1" align="char" char="." valign="top" /><td colspan="1" rowspan="1" align="char" char="." valign="top">8.7</td><td colspan="1" rowspan="1" align="char" char="." valign="top">17.4</td><td colspan="1" rowspan="1" align="char" char="." valign="top">43.5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">30.4</td></tr><tr><td colspan="6" rowspan="1" align="left" valign="top">(<bold>b</bold>) Cu&ndash;Pb&ndash;Zn association</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">&emsp;Cluster</td><td colspan="1" rowspan="1" align="center" valign="top">1</td><td colspan="1" rowspan="1" align="char" char="." valign="top">6.1</td><td colspan="1" rowspan="1" align="char" char="." valign="top">8.2</td><td colspan="1" rowspan="1" align="char" char="." valign="top">28.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">57.1</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">2</td><td colspan="1" rowspan="1" align="char" char="." valign="top">5.9</td><td colspan="1" rowspan="1" align="char" char="." valign="top">15.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">42.4</td><td colspan="1" rowspan="1" align="char" char="." valign="top">36.2</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">3</td><td colspan="1" rowspan="1" align="char" char="." valign="top">0.6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">53.9</td><td colspan="1" rowspan="1" align="char" char="." valign="top">10.7</td><td colspan="1" rowspan="1" align="char" char="." valign="top">34.8</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">4.5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">10.7</td><td colspan="1" rowspan="1" align="char" char="." valign="top">51.7</td><td colspan="1" rowspan="1" align="char" char="." valign="top">33.1</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top" /><td colspan="1" rowspan="1" align="center" valign="top">6</td><td colspan="1" rowspan="1" align="char" char="." valign="top">32.0</td><td colspan="1" rowspan="1" align="char" char="." valign="top">7.5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">26.5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">34.0</td></tr><tr><td colspan="1" rowspan="1" align="left" valign="top">&emsp;Anomalies</td><td colspan="1" rowspan="1" align="char" char="." valign="top" /><td colspan="1" rowspan="1" align="char" char="." valign="top">18.2</td><td colspan="1" rowspan="1" align="char" char="." valign="top">9.1</td><td colspan="1" rowspan="1" align="char" char="." valign="top">45.5</td><td colspan="1" rowspan="1" align="char" char="." valign="top">27.3</td></tr></tbody></table></table-wrap><fig id="F10" position="float" orientation="portrait"><label>Fig. 10.</label><caption><p>Spatial distribution of catchment group allocation under a ten-group k-means clustering of Cu&ndash;Pb&ndash;Zn.</p></caption><graphic xlink:href="119f10.jpg" position="float" orientation="portrait" /></fig><fig id="F11" position="float" orientation="portrait"><label>Fig. 11.</label><caption><p>Similarity index for UNN clusters 1, 2, 3 and 6, of Cu&ndash;Pb&ndash;Zn.</p></caption><graphic xlink:href="119f11.jpg" position="float" orientation="portrait" /></fig><fig id="F12" position="float" orientation="portrait"><label>Fig. 12.</label><caption><p>Distribution of anomalous catchments derived from k-means clustering (K<sub>n</sub>) and UNN clustering (U<sub>n</sub>) of Cu&ndash;Pb&ndash;Zn, as well as the location of major occurrences of base metal mineralization.</p></caption><graphic xlink:href="119f12.jpg" position="float" orientation="portrait" /></fig><fig id="F13" position="float" orientation="portrait"><label>Fig. 13.</label><caption><p>(<bold>a</bold>) Plot of anomalous Cu&ndash;Pb&ndash;Zn clusters, determined using UNN clustering on raw data and k-means clustering of both log-transformed and Box-Cox transformed data. (<bold>b</bold>) Cross tabulations of background and anomalous samples identified by k-means and UNN clustering.</p></caption><graphic xlink:href="119f13.jpg" position="float" orientation="portrait" /></fig></floats-group>
+
+
+
+
+
+
+
+
+
+
+
+
+
+</article>
diff --git a/tests/stubdata/input/jats_indersci_url_ident.xml b/tests/stubdata/input/jats_indersci_url_ident.xml
new file mode 100644
index 0000000..d12095e
--- /dev/null
+++ b/tests/stubdata/input/jats_indersci_url_ident.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
+<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" xml:lang="en" dtd-version="1.0">
+<front>
+<journal-meta>
+<journal-id journal-id-type="publisher-id">ijogct</journal-id>
+<journal-title-group>
+<journal-title>International Journal of Oil, Gas and Coal Technology</journal-title>
+</journal-title-group>
+<issn pub-type="ppub">1753-3309</issn>
+<issn pub-type="epub">1753-3317</issn>
+<publisher>
+<publisher-name>Inderscience Publishers (IEL)</publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+<article-id pub-id-type="url">https://www.inderscienceonline.com/doi/10.1504/IJOGCT.2024.139531</article-id>
+<title-group>
+<article-title>Simulation study on frictional resistance and influencing factors of flexible screen pipe tripping into horizontal wellbore</article-title>
+</title-group>
+<contrib-group>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>Zhongzhi</given-names> <surname>Hu</surname></string-name><xref ref-type="aff" rid="A1"><sup>1</sup></xref><x xml:space="preserve">, </x></contrib>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>Junliang</given-names> <surname>Li</surname></string-name><xref ref-type="aff" rid="A2"><sup>2</sup></xref><x xml:space="preserve">, </x></contrib>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>Li</given-names> <surname>Wang</surname></string-name><xref ref-type="aff" rid="A3"><sup>3</sup></xref><x xml:space="preserve">, </x></contrib>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>Jinbo</given-names> <surname>Wang</surname></string-name><xref ref-type="aff" rid="A4"><sup>4</sup></xref><x xml:space="preserve">, </x></contrib>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>Xinyang</given-names> <surname>Liu</surname></string-name><xref ref-type="aff" rid="A5"><sup>5</sup></xref><x xml:space="preserve">, </x></contrib>
+<contrib contrib-type="author"><string-name name-style="western"><given-names>Yang</given-names> <surname>Kong</surname></string-name><xref ref-type="aff" rid="A6"><sup>6</sup></xref></contrib>
+<aff id="A1"><label><sup>1</sup></label>School of Mechanical Engineering, Sichuan University of Science &amp; Engineering, No.1 Baita Road, Yibin, Sichuan, 644002, China</aff>
+<aff id="A2"><label><sup>2</sup></label>Production Engineering Research Institute, Daqing Oilfield, CNPC, No. 9 Xibin Road, Daqing, Heilongjiang, 163712, China</aff>
+<aff id="A3"><label><sup>3</sup></label>Production Engineering Research Institute, Daqing Oilfield, CNPC, No. 9 Xibin Road, Daqing, Heilongjiang, 163712, China</aff>
+<aff id="A4"><label><sup>4</sup></label>Dongying Ruifeng Petroleum Technical Development Co., Ltd., No. 5 Juzhou Road, Dongying, Shandong, 257299, China</aff>
+<aff id="A5"><label><sup>5</sup></label>Bulk Material Transportation Equipment Business Unit, Chengdu Gongbei Intelligent Technology Co., Ltd., No. 7 Guanghua Street, Chengdu, Sichuan, 610011, China</aff>
+<aff id="A6"><label><sup>6</sup></label>Dongying Ruifeng Petroleum Technical Development Co., Ltd., No. 5 Juzhou Road, Dongying, Shandong, 257299, China</aff>
+</contrib-group>
+<pub-date pub-type="ppub">
+<year>2024</year>
+</pub-date>
+<pub-date pub-type="epub">
+<day>03</day>
+<month>7</month>
+<year>2024</year>
+</pub-date>
+<volume>36</volume>
+<issue>1</issue>
+<fpage>55</fpage>
+<lpage>76</lpage>
+<permissions>
+<copyright-statement>Copyright © 2024 Inderscience Enterprises Ltd.</copyright-statement>
+<copyright-year>2024</copyright-year>
+<license><license-p></license-p></license>
+</permissions>
+<self-uri content-type="pdf" xlink:href="ijogct.2024.139531.pdf"></self-uri>
+<abstract xml:lang="en">
+<p>Flexible sand control screens play a crucial role in the operation and economic benefits of ultra-short radius wells. We established a calculation model for the motion friction resistance of flexible screens and a criterion for contact between screen units and wellbore walls. Case analysis clarified the correlation between several key factors (e.g., maximum allowed swing angle, friction coefficient, insertion speed, and total length of screen units) and the frictional resistance of flexible screens. We utilised the overall distribution of contact point positions and the compressed distance between adjacent screen unit centroids to analyse the conversion mechanism of screen friction resistance from linear to nonlinear rapid accumulation. We also explored measures to improve the extension capacity of flexible screens. Our findings provide a basis for the optimisation design of flexible screen structures and the assessment of their maximum extension capacity. [Received: June 2, 2023; Accepted: February 13, 2024]</p>
+</abstract>
+<kwd-group xml:lang="en">
+<kwd>flexible screen</kwd><x xml:space="preserve">, </x>
+<kwd>sand control</kwd><x xml:space="preserve">, </x>
+<kwd>multi-body dynamics simulation</kwd><x xml:space="preserve">, </x>
+<kwd>frictional resistance</kwd>
+</kwd-group>
+<counts>
+<page-count count="21"></page-count>
+</counts>
+</article-meta>
+</front>
+</article>
diff --git a/tests/stubdata/output/jats_gsl_unkeyed_xref.json b/tests/stubdata/output/jats_gsl_unkeyed_xref.json
new file mode 100644
index 0000000..5d910f4
--- /dev/null
+++ b/tests/stubdata/output/jats_gsl_unkeyed_xref.json
@@ -0,0 +1,129 @@
+{
+  "abstract": {
+    "textEnglish": "Isolation of complex patterns of correlation between variables, association among samples and anomaly identification, through conventional parametric multivariate statistical procedures, may be obscured by the presence of multivariate outliers and non-normal variable distributions. Procedures such as k-means clustering generally require substantial data pre-processing. Unsupervised neural networks (UNN) have the capacity to cluster multivariate data, using a modified form of the standard unsupervised Kohonen self-organizing map that is non-linear, non-parametric, rapid and robust. The number of clusters into which samples are allocated is determined by the unsupervised neural network and is directly dependent upon the original input data.\nUNN and k-means clustering was performed on stream sediment geochemical data from 1670 sub-catchments in the northeast region of New South Wales. Both methods produced clusters for the feldspar-associated elements that were closely related to sub-catchment geology and topography. UNN clustering revealed more subtle variations within the major lithological groups. UNN clustering of CuPbZn produced ten main clusters and identified 26 anomalies, that were mainly from sub-catchments, containing significant base metal mineralization occurrences. K-means clustering of transformed CuPbZn yielded five major clusters and only 19 anomalies. Progressive increase in k from eight to 20 did not substantially alter the k-means classification of samples between common groups and anomalies. Some catchments identified only as anomalous by UNN clustering contain known base metal mineralization."
+  },
+  "authors": [
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "Neural Mining Solutions, 1 Alfred Street, Sydney, NSW 2000, Australia"
+        }
+      ],
+      "name": {
+        "given_name": "A. P.",
+        "surname": "Clare"
+      }
+    },
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "School of Geology, University of New South Wales, Sydney, NSW 2052, Australia"
+        }
+      ],
+      "attrib": {
+        "email": "d.cohen@unsw.edu.au"
+      },
+      "name": {
+        "given_name": "D. R.",
+        "surname": "Cohen"
+      }
+    }
+  ],
+  "copyright": {
+    "statement": "\u00a9 2001 AAG/The Geological Society of London",
+    "status": true
+  },
+  "keywords": [
+    {
+      "keyString": "exploration geochemistry",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "neural network",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "clustering",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "anomaly",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "New England",
+      "keySystem": "misc"
+    }
+  ],
+  "pagination": {
+    "firstPage": "119",
+    "lastPage": "134",
+    "pageRange": "119-134"
+  },
+  "persistentIDs": [
+    {
+      "DOI": "10.1144/geochem.1.2.119"
+    }
+  ],
+  "pubDate": {
+    "printDate": "2001-05-00"
+  },
+  "publication": {
+    "ISSN": [
+      {
+        "issnString": "1467-7873",
+        "pubtype": "ppub"
+      },
+      {
+        "issnString": "2041-4943",
+        "pubtype": "epub"
+      }
+    ],
+    "issueNum": "2",
+    "pubName": "Geochemistry: Exploration, Environment, Analysis",
+    "pubYear": "2001",
+    "publisher": "The Geological Society of London",
+    "volumeNum": "1"
+  },
+  "publisherIDs": [
+    {
+      "Identifier": "119",
+      "attribute": "publisher-id"
+    }
+  ],
+  "recordData": {
+    "createdTime": "",
+    "loadFormat": "JATS",
+    "loadLocation": "",
+    "loadType": "fromFile",
+    "parsedTime": "",
+    "recordOrigin": ""
+  },
+  "references": [
+    "<ref id=\"B1\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">BARNES</named-content>, <named-content content-type=\"arthw-fnm\">R. G.</named-content></named-content>, BROWNLOW, J, ALDER, D. &amp; 6 others <year>1996</year>. Mineral resources. <italic toggle=\"yes\">In</italic>: <italic toggle=\"yes\">Regional Report of Upper North East New South Wales, Volume 5; Socio-economic Values</italic>. Resource and Conservation Assessment Council, Sydney, 73154.</mixed-citation> </ref>",
+    "<ref id=\"B2\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">BENEDIKTSSON</named-content>, <named-content content-type=\"arthw-fnm\">J. A.</named-content></named-content>, SWAIN, P. H. &amp; ERSOY, O. K. <year>1990</year>. Neural network approaches versus statistical methods in classification of multisource remote sensing data. <source><italic toggle=\"yes\">IEEE Transactions on Geoscience and Remote Sensing</italic></source> <volume><italic toggle=\"yes\">4</italic></volume>, <fpage>540</fpage>-552.</mixed-citation> </ref>",
+    "<ref id=\"B3\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">BONHAM-CARTER</named-content>, <named-content content-type=\"arthw-fnm\">G. F.</named-content></named-content> <year>1994</year>. <italic toggle=\"yes\">Geographic Information Systems for Geoscientists: Modelling with GIS</italic>. Pergamon Publishing, Oxford.</mixed-citation> </ref>",
+    "<ref id=\"B4\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">BOX</named-content>, <named-content content-type=\"arthw-fnm\">G. E. P.</named-content></named-content> &amp; COX, D. R. <year>1964</year>. An analysis of transformations. <source><italic toggle=\"yes\">Journal of the Royal Statistical Society, Series B</italic></source> <volume><italic toggle=\"yes\">26</italic></volume>, <fpage>211</fpage>-243.</mixed-citation> </ref>",
+    "<ref id=\"B5\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">CHENG</named-content>, <named-content content-type=\"arthw-fnm\">Q.</named-content></named-content>, AGTERBERG, F. P. &amp; BONHAM-CARTER, G. F. <year>1996</year>. A spatial analysis method for geochemical anomaly separation. <source><italic toggle=\"yes\">Journal of Geochemical Exploration</italic></source> <volume><italic toggle=\"yes\">56</italic></volume>, <fpage>183</fpage>-195.</mixed-citation> </ref>",
+    "<ref id=\"B6\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">CHORK</named-content>, <named-content content-type=\"arthw-fnm\">C. Y.</named-content></named-content> &amp; ROUSSEEUW, P. J. <year>1992</year>. Integrating a high-breakdown option into discriminant analysis in exploration geochemistry. <source><italic toggle=\"yes\">Journal of Geochemical Exploration</italic></source> <volume><italic toggle=\"yes\">43</italic></volume>, <fpage>191</fpage>-203.</mixed-citation> </ref>",
+    "<ref id=\"B7\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">CHORK</named-content>, <named-content content-type=\"arthw-fnm\">C. Y.</named-content></named-content> <year>1990</year>. Unmasking multivariate anomalous observations in exploration geochemical data from sheeted vein tin mineralization near Emmaville, N.S.W, Australia. <source><italic toggle=\"yes\">Journal of Geochemical Exploration</italic></source> <volume><italic toggle=\"yes\">37</italic></volume>, <fpage>205</fpage>-223.</mixed-citation> </ref>",
+    "<ref id=\"B8\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">CLARE</named-content>, <named-content content-type=\"arthw-fnm\">A. P.</named-content></named-content> &amp; COHEN, D. R. <year>1999</year>. An unsupervised neural network approach to the analysis of multi-element stream sediment data, northeastern NSW, Australia. <italic toggle=\"yes\">In</italic>: <italic toggle=\"yes\">Proceedings of the 19th International Geochemical Exploration Symposium</italic>, Vancouver, 1216 April, 1999.</mixed-citation> </ref>",
+    "<ref id=\"B9\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">COHEN</named-content>, <named-content content-type=\"arthw-fnm\">D. R.</named-content></named-content>, RUTHERFORD, N. F. &amp; GARNETT, D. L. <year>1995</year>. <italic toggle=\"yes\">A Geochemical Survey of the Upper Northeast Region, New South Wales</italic>. NSW Department of Mineral Resources, Sydney.</mixed-citation> </ref>",
+    "<ref id=\"B10\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">COHEN</named-content>, <named-content content-type=\"arthw-fnm\">D. R.</named-content></named-content>, SILVA-SANTISTEBAN, C. M, RUTHERFORD, N. F, GARNETT, D. L. &amp; WALDRON, H. M. <year>1999</year>. Comparison of vegetation and stream sediment geochemical patterns in the north eastern region of New South Wales. <source><italic toggle=\"yes\">Journal of Geochemical Exploration</italic></source> <volume><italic toggle=\"yes\">66</italic></volume>, <fpage>469</fpage>-489.</mixed-citation> </ref>",
+    "<ref id=\"B11\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">DAYHOFF</named-content>, <named-content content-type=\"arthw-fnm\">J. E.</named-content></named-content> <year>1990</year>. <italic toggle=\"yes\">Neural Network Architectures: An Introduction</italic>. Van Nostrand, New York.</mixed-citation> </ref>",
+    "<ref id=\"B12\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">FOODY</named-content>, <named-content content-type=\"arthw-fnm\">G. M.</named-content></named-content> <year>1997</year>. Fully fuzzy supervised classification of land cover from remotely sensed imagery with an artificial neural network. <source><italic toggle=\"yes\">Neural Computing and Applications</italic></source> <volume><italic toggle=\"yes\">5</italic></volume>, <fpage>238</fpage>-247.</mixed-citation> </ref>",
+    "<ref id=\"B13\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">GARRETT</named-content>, <named-content content-type=\"arthw-fnm\">R. G.</named-content></named-content> <year>1989</year>. The chi-square plot: a tool for multivariate outlier recognition. <source><italic toggle=\"yes\">Journal of Geochemical Exploration</italic></source> <volume><italic toggle=\"yes\">32</italic></volume>, <fpage>319</fpage>-341.</mixed-citation> </ref>",
+    "<ref id=\"B14\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">GILLIGAN</named-content>, <named-content content-type=\"arthw-fnm\">L. B.</named-content></named-content> &amp; BARNES, R. G. <year>1990</year>. New England Fold Belt, New South Wales  Regional Geology and Mineralisation. <italic toggle=\"yes\">In</italic>: Hughes, F.E. (ed) . <italic toggle=\"yes\">Geology of the Mineral Deposits of Australia and Papua New Guinea</italic>. Australian Institute of Mining and Metallurgy. Monograph, <bold>14</bold>, <fpage>1417</fpage>-1423.</mixed-citation> </ref>",
+    "<ref id=\"B15\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">HOWARTH</named-content>, <named-content content-type=\"arthw-fnm\">R. J.</named-content></named-content> <year>1983</year>. Statistics and Data Analysis in Geochemical Prospecting. <italic toggle=\"yes\">In</italic>: Govett, G.J.S. (ed) . <italic toggle=\"yes\">Handbook of Exploration Geochemistry, Volume 2</italic>. Elsevier, Amsterdam.</mixed-citation> </ref>",
+    "<ref id=\"B16\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">KAMGAR-PARSI</named-content>, <named-content content-type=\"arthw-fnm\">B.</named-content></named-content>, GUALTIERI, J. A. &amp; DEVANEY, J. E. <year>1990</year>. Clustering with neural networks. <source><italic toggle=\"yes\">Biological Cybernetics</italic></source> <volume><italic toggle=\"yes\">63</italic></volume>, <fpage>210</fpage>-208.</mixed-citation> </ref>",
+    "<ref id=\"B17\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">KASKI</named-content>, <named-content content-type=\"arthw-fnm\">S.</named-content></named-content> <year>1997</year>. <italic toggle=\"yes\">Data exploration using self-organizing maps</italic>. Acta Polytechnica Scandinavica, Mathematics, Computing and Management in Engineering Series, <bold>82</bold>.</mixed-citation> </ref>",
+    "<ref id=\"B18\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">KOHONEN</named-content>, <named-content content-type=\"arthw-fnm\">T.</named-content></named-content> <year>1989</year>. <italic toggle=\"yes\">Self-organisation and Associative Memory</italic>. Springer-Verlag, Berlin.</mixed-citation> </ref>",
+    "<ref id=\"B19\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">KOHONEN</named-content>, <named-content content-type=\"arthw-fnm\">T.</named-content></named-content> <year>1990</year>. The self-organizing map. <source><italic toggle=\"yes\">Proceedings of the IEEE</italic></source> <volume><italic toggle=\"yes\">78</italic></volume>, <fpage>1464</fpage>-1480.</mixed-citation> </ref>",
+    "<ref id=\"B20\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">KOHONEN</named-content>, <named-content content-type=\"arthw-fnm\">T.</named-content></named-content> <year>1995</year>. <italic toggle=\"yes\">Self-Organizing Maps</italic>. Springer-Verlag, Berlin.</mixed-citation> </ref>",
+    "<ref id=\"B21\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">MACQUEEN</named-content>, <named-content content-type=\"arthw-fnm\">J.</named-content></named-content> <year>1967</year>. Some methods for classification and analysis of multivariate observations. <italic toggle=\"yes\">In</italic>: Le Cam, L.M, Neyman, &amp; J. (eds) . <italic toggle=\"yes\">Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability, Volume 1</italic>. University of California Press, Berkeley<fpage>281</fpage>-297.</mixed-citation> </ref>",
+    "<ref id=\"B22\"> <mixed-citation><named-content content-type=\"arthw-firstauthor\"><named-content content-type=\"arthw-snm\">MANCEY</named-content>, <named-content content-type=\"arthw-fnm\">S. J.</named-content></named-content> &amp; HOWARTH, R. J. <year>1980</year>. Power-transform removal of skewness from large data sets. <source><italic toggle=\"yes\">Institute for Mining and Metallurgy Transactions, Section B</italic></source> <volume><italic toggle=\"yes\">89</italic></volume>, <fpage>92</fpage>-97.</mixed-citation> </ref>"
+  ],
+  "title": {
+    "textEnglish": "A comparison of unsupervised neural networks and k-means clustering in the analysis of multi-element stream sediment data"
+  }
+}
diff --git a/tests/stubdata/output/jats_indersci_url_ident.json b/tests/stubdata/output/jats_indersci_url_ident.json
new file mode 100644
index 0000000..42bc0d1
--- /dev/null
+++ b/tests/stubdata/output/jats_indersci_url_ident.json
@@ -0,0 +1,139 @@
+{
+  "abstract": {
+    "textEnglish": "Flexible sand control screens play a crucial role in the operation and economic benefits of ultra-short radius wells. We established a calculation model for the motion friction resistance of flexible screens and a criterion for contact between screen units and wellbore walls. Case analysis clarified the correlation between several key factors (e.g., maximum allowed swing angle, friction coefficient, insertion speed, and total length of screen units) and the frictional resistance of flexible screens. We utilised the overall distribution of contact point positions and the compressed distance between adjacent screen unit centroids to analyse the conversion mechanism of screen friction resistance from linear to nonlinear rapid accumulation. We also explored measures to improve the extension capacity of flexible screens. Our findings provide a basis for the optimisation design of flexible screen structures and the assessment of their maximum extension capacity. [Received: June 2, 2023; Accepted: February 13, 2024]"
+  },
+  "authors": [
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "School of Mechanical Engineering, Sichuan University of Science & Engineering, No.1 Baita Road, Yibin, Sichuan, 644002, China"
+        }
+      ],
+      "name": {
+        "given_name": "Zhongzhi",
+        "surname": "Hu"
+      }
+    },
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "Production Engineering Research Institute, Daqing Oilfield, CNPC, No. 9 Xibin Road, Daqing, Heilongjiang, 163712, China"
+        }
+      ],
+      "name": {
+        "given_name": "Junliang",
+        "surname": "Li"
+      }
+    },
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "Production Engineering Research Institute, Daqing Oilfield, CNPC, No. 9 Xibin Road, Daqing, Heilongjiang, 163712, China"
+        }
+      ],
+      "name": {
+        "given_name": "Li",
+        "surname": "Wang"
+      }
+    },
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "Dongying Ruifeng Petroleum Technical Development Co., Ltd., No. 5 Juzhou Road, Dongying, Shandong, 257299, China"
+        }
+      ],
+      "name": {
+        "given_name": "Jinbo",
+        "surname": "Wang"
+      }
+    },
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "Bulk Material Transportation Equipment Business Unit, Chengdu Gongbei Intelligent Technology Co., Ltd., No. 7 Guanghua Street, Chengdu, Sichuan, 610011, China"
+        }
+      ],
+      "name": {
+        "given_name": "Xinyang",
+        "surname": "Liu"
+      }
+    },
+    {
+      "affiliation": [
+        {
+          "affPubRaw": "Dongying Ruifeng Petroleum Technical Development Co., Ltd., No. 5 Juzhou Road, Dongying, Shandong, 257299, China"
+        }
+      ],
+      "name": {
+        "given_name": "Yang",
+        "surname": "Kong"
+      }
+    }
+  ],
+  "copyright": {
+    "statement": "Copyright \u00a9 2024 Inderscience Enterprises Ltd.",
+    "status": true
+  },
+  "keywords": [
+    {
+      "keyString": "flexible screen",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "sand control",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "multi-body dynamics simulation",
+      "keySystem": "misc"
+    },
+    {
+      "keyString": "frictional resistance",
+      "keySystem": "misc"
+    }
+  ],
+  "pagination": {
+    "firstPage": "55",
+    "lastPage": "76",
+    "pageCount": "21",
+    "pageRange": "55-76"
+  },
+  "pubDate": {
+    "electrDate": "2024-07-03",
+    "printDate": "2024-00-00"
+  },
+  "publication": {
+    "ISSN": [
+      {
+        "issnString": "1753-3309",
+        "pubtype": "ppub"
+      },
+      {
+        "issnString": "1753-3317",
+        "pubtype": "epub"
+      }
+    ],
+    "issueNum": "1",
+    "pubName": "International Journal of Oil, Gas and Coal Technology",
+    "pubYear": "2024",
+    "publisher": "Inderscience Publishers (IEL)",
+    "volumeNum": "36"
+  },
+  "publisherIDs": [
+    {
+      "Identifier": "https://www.inderscienceonline.com/doi/10.1504/IJOGCT.2024.139531",
+      "attribute": "url"
+    }
+  ],
+  "recordData": {
+    "createdTime": "",
+    "loadFormat": "JATS",
+    "loadLocation": "",
+    "loadType": "fromFile",
+    "parsedTime": "",
+    "recordOrigin": ""
+  },
+  "title": {
+    "textEnglish": "Simulation study on frictional resistance and influencing factors of flexible screen pipe tripping into horizontal wellbore"
+  }
+}
diff --git a/tests/test_jats.py b/tests/test_jats.py
index fde4c01..955f073 100644
--- a/tests/test_jats.py
+++ b/tests/test_jats.py
@@ -102,6 +102,8 @@ def test_jats(self):
             "jats_liebert_atypon",
             "jats_aip_native_strip",
             "jats_a+a_nested_collab",
+            "jats_indersci_url_ident",
+            "jats_gsl_unkeyed_xref",
         ]
 
         for f in filenames:

	Cluster
Association	k-means	UNN	Lithological association
Feldspar	1, 6	7, 9, 10	Clarence-Moreton Basin
	2	5, 8	Tertiary basalts and ultramafics
	3, 4	2	Granites
	5	1, 3, 4	Metasedimentary and metavolcanic rocks (accretionary complex)
	7, 8	Remainder	Anomalies
		6	None
Cu–Pb–Zn	3	3	Granites and basal Clarence-Moreton Basin
	6	2, 5	Tertiary basalts and Clarence River Estuary
	6	1	Serpentinite and meta-basic volcanics
	5±2	6 to 10	Accretionary complex
	1	4	Western Clarence River System
	4, 7 to 10	Remainder	Anomalies
		Geological Group
	Mafics and ultramafics	Granites and volcanics	Metasediments	Alluvium
	Total catchments	8.7	19.1	34.1	38.1
(a) Feldspar association
Cluster	1	4.3	1.1	28.0	66.7
	2	30.4	14.6	19.6	35.4
	3	1.0	58.1	4.8	36.2
	4	2.6	37.6	30.7	29.1
	5	0.4	8.8	63.8	27.1
	6	13.6	5.8	30.4	50.3
Anomalies		8.7	17.4	43.5	30.4
(b) Cu–Pb–Zn association
Cluster	1	6.1	8.2	28.6	57.1
	2	5.9	15.6	42.4	36.2
	3	0.6	53.9	10.7	34.8
	5	4.5	10.7	51.7	33.1
	6	32.0	7.5	26.5	34.0
Anomalies		18.2	9.1	45.5	27.3