Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Indersci urlid.20241127 #152

Merged
merged 5 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions adsingestp/parsers/jats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

class JATSAffils(object):
regex_email = re.compile(r"^[a-zA-Z0-9+_.-]+@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+")
regex_auth_xid = re.compile(r"^A[0-9]+$")

def __init__(self):
self.contrib_dict = {}
Expand Down Expand Up @@ -94,6 +95,7 @@ def _fix_affil(self, affstring):
# check for empty strings with commas
check_a = a.replace(",", "")
if check_a:
a = re.sub("\\(e-*mail:\\s*,+\\s*\\)", "", a)
a = a.replace("\\n", ",")
a = a.replace(" —", "—")
a = a.replace(" , ", ", ")
Expand All @@ -103,6 +105,7 @@ def _fix_affil(self, affstring):
a = re.sub("^(\\s*,+\\s*)+", "", a)
a = re.sub("(\\s*,\\s+)+", ", ", a)
a = re.sub("(,\\s*)+$", "", a)
a = re.sub("\\s+$", "", a)
if self.regex_email.match(a):
emails.append(a)
else:
Expand Down Expand Up @@ -567,15 +570,18 @@ def parse(self, article_metadata):
for aff in contrib_aff:
# check and see if the publisher defined an email tag inside an affil (like IOP does)
nested_email_list = aff.find_all("ext-link")
key = aff.get("id", default_key)
for e in nested_email_list:
if e.get("ext-link-type", None) == "email":
key = e["id"]
if e.get("id", None):
ekey = e["id"]
else:
ekey = key
value = e.text
# build the cross-reference dictionary to be used later
self.email_xref[key] = value
self.email_xref[ekey] = value
e.decompose()

key = aff.get("id", default_key)
# special case: get rid of <sup>...
aff = self._decompose(soup=aff, tag="sup")
aff, aff_extids_tmp = self._get_inst_identifiers(aff)
Expand All @@ -586,11 +592,25 @@ def parse(self, article_metadata):

affstr = aff.get_text(separator=", ").strip()
(affstr, email_list) = self._fix_affil(affstr)
if email_list:
self.email_xref[key] = email_list
if not self.email_xref.get(key, None):
if email_list:
self.email_xref[key] = email_list
else:
self.email_xref[key] = ""
self.xref_dict[key] = affstr
self.xref_xid_dict[key] = aff_extids_tmp

# special case: publisher defined aff/email xrefs, but the xids aren't
# assigned to authors; xid is typically of the form "A\d+"
# publisher example: Geol. Soc. London (gsl)
count_auth = len(authors_out)
count_xref = len(self.xref_dict.keys())
if count_auth == count_xref:
for auth, xref in zip(authors_out, self.xref_dict.keys()):
if self.regex_auth_xid.match(xref):
if not auth.get("aff", []) and not auth.get("xaff", []):
auth["xaff"] = [xref]

self.contrib_dict = {"authors": authors_out, "contributors": contribs_out}

# now get the xref keys outside of contrib-group:
Expand Down Expand Up @@ -1027,6 +1047,10 @@ def _parse_ids(self):
self.base_metadata["ids"]["pub-id"].append(
{"attribute": "manuscript", "Identifier": self._detag(d, [])}
)
elif id_type == "url":
self.base_metadata["ids"]["pub-id"].append(
{"attribute": "url", "Identifier": self._detag(d, [])}
)
elif id_type == "other":
self.base_metadata["ids"]["pub-id"].append(
{"attribute": "other", "Identifier": self._detag(d, [])}
Expand Down Expand Up @@ -1232,7 +1256,11 @@ def parse(self, text, bsparser="lxml-xml"):
raise XmlLoadException(err)

document = d.article
front_meta = document.front
# front_meta = document.front
try:
front_meta = document.front
except Exception as err:
raise XmlLoadException("No front matter found, stopping: %s" % err)
self.back_meta = document.back

self.article_meta = front_meta.find("article-meta")
Expand Down
110 changes: 110 additions & 0 deletions tests/stubdata/input/jats_gsl_unkeyed_xref.xml

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions tests/stubdata/input/jats_indersci_url_ident.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" xml:lang="en" dtd-version="1.0">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">ijogct</journal-id>
<journal-title-group>
<journal-title>International Journal of Oil, Gas and Coal Technology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1753-3309</issn>
<issn pub-type="epub">1753-3317</issn>
<publisher>
<publisher-name>Inderscience Publishers (IEL)</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="url">https://www.inderscienceonline.com/doi/10.1504/IJOGCT.2024.139531</article-id>
<title-group>
<article-title>Simulation study on frictional resistance and influencing factors of flexible screen pipe tripping into horizontal wellbore</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><string-name name-style="western"><given-names>Zhongzhi</given-names> <surname>Hu</surname></string-name><xref ref-type="aff" rid="A1"><sup>1</sup></xref><x xml:space="preserve">, </x></contrib>
<contrib contrib-type="author"><string-name name-style="western"><given-names>Junliang</given-names> <surname>Li</surname></string-name><xref ref-type="aff" rid="A2"><sup>2</sup></xref><x xml:space="preserve">, </x></contrib>
<contrib contrib-type="author"><string-name name-style="western"><given-names>Li</given-names> <surname>Wang</surname></string-name><xref ref-type="aff" rid="A3"><sup>3</sup></xref><x xml:space="preserve">, </x></contrib>
<contrib contrib-type="author"><string-name name-style="western"><given-names>Jinbo</given-names> <surname>Wang</surname></string-name><xref ref-type="aff" rid="A4"><sup>4</sup></xref><x xml:space="preserve">, </x></contrib>
<contrib contrib-type="author"><string-name name-style="western"><given-names>Xinyang</given-names> <surname>Liu</surname></string-name><xref ref-type="aff" rid="A5"><sup>5</sup></xref><x xml:space="preserve">, </x></contrib>
<contrib contrib-type="author"><string-name name-style="western"><given-names>Yang</given-names> <surname>Kong</surname></string-name><xref ref-type="aff" rid="A6"><sup>6</sup></xref></contrib>
<aff id="A1"><label><sup>1</sup></label>School of Mechanical Engineering, Sichuan University of Science &amp; Engineering, No.1 Baita Road, Yibin, Sichuan, 644002, China</aff>
<aff id="A2"><label><sup>2</sup></label>Production Engineering Research Institute, Daqing Oilfield, CNPC, No. 9 Xibin Road, Daqing, Heilongjiang, 163712, China</aff>
<aff id="A3"><label><sup>3</sup></label>Production Engineering Research Institute, Daqing Oilfield, CNPC, No. 9 Xibin Road, Daqing, Heilongjiang, 163712, China</aff>
<aff id="A4"><label><sup>4</sup></label>Dongying Ruifeng Petroleum Technical Development Co., Ltd., No. 5 Juzhou Road, Dongying, Shandong, 257299, China</aff>
<aff id="A5"><label><sup>5</sup></label>Bulk Material Transportation Equipment Business Unit, Chengdu Gongbei Intelligent Technology Co., Ltd., No. 7 Guanghua Street, Chengdu, Sichuan, 610011, China</aff>
<aff id="A6"><label><sup>6</sup></label>Dongying Ruifeng Petroleum Technical Development Co., Ltd., No. 5 Juzhou Road, Dongying, Shandong, 257299, China</aff>
</contrib-group>
<pub-date pub-type="ppub">
<year>2024</year>
</pub-date>
<pub-date pub-type="epub">
<day>03</day>
<month>7</month>
<year>2024</year>
</pub-date>
<volume>36</volume>
<issue>1</issue>
<fpage>55</fpage>
<lpage>76</lpage>
<permissions>
<copyright-statement>Copyright © 2024 Inderscience Enterprises Ltd.</copyright-statement>
<copyright-year>2024</copyright-year>
<license><license-p></license-p></license>
</permissions>
<self-uri content-type="pdf" xlink:href="ijogct.2024.139531.pdf"></self-uri>
<abstract xml:lang="en">
<p>Flexible sand control screens play a crucial role in the operation and economic benefits of ultra-short radius wells. We established a calculation model for the motion friction resistance of flexible screens and a criterion for contact between screen units and wellbore walls. Case analysis clarified the correlation between several key factors (e.g., maximum allowed swing angle, friction coefficient, insertion speed, and total length of screen units) and the frictional resistance of flexible screens. We utilised the overall distribution of contact point positions and the compressed distance between adjacent screen unit centroids to analyse the conversion mechanism of screen friction resistance from linear to nonlinear rapid accumulation. We also explored measures to improve the extension capacity of flexible screens. Our findings provide a basis for the optimisation design of flexible screen structures and the assessment of their maximum extension capacity. [Received: June 2, 2023; Accepted: February 13, 2024]</p>
</abstract>
<kwd-group xml:lang="en">
<kwd>flexible screen</kwd><x xml:space="preserve">, </x>
<kwd>sand control</kwd><x xml:space="preserve">, </x>
<kwd>multi-body dynamics simulation</kwd><x xml:space="preserve">, </x>
<kwd>frictional resistance</kwd>
</kwd-group>
<counts>
<page-count count="21"></page-count>
</counts>
</article-meta>
</front>
</article>
Loading
Loading