Skip to content

Commit

Permalink
Add f245 title splitting to title+subtitle if some of most common pat…
Browse files Browse the repository at this point in the history
…terns match to title
  • Loading branch information
aatuny committed Nov 4, 2024
1 parent 60da977 commit 70fff3c
Show file tree
Hide file tree
Showing 19 changed files with 151 additions and 19 deletions.
58 changes: 49 additions & 9 deletions src/transform/convert/common/generate2xx.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,22 @@
*/
export function generate245({getFields}) {
const isAddedEntry = generateIsAddedEntry();
const ind1 = isAddedEntry ? '1' : '0';
const ind2 = ' '; // NB: generated in validation phase by marc-record-validators-melinda:IndicatorFixes

const fields = getFields('dc.title');
const title = fields.length > 0 ? fields[0].$.value : null;
if (fields.length === 0) {
return [];
}

const titleText = fields.length > 0 ? fields[0].$.value : null;

const {title, alternativeSubtitle} = getTitle(titleText);

return alternativeSubtitle
? [{tag: '245', ind1, ind2, subfields: [{code: 'a', value: `${title} :`}, {code: 'b', value: `${alternativeSubtitle}.`}]}]
: [{tag: '245', ind1, ind2, subfields: [{code: 'a', value: `${title}.`}]}];

return title ? [
{
tag: '245',
ind1: isAddedEntry ? '1' : '0',
ind2: '0',
subfields: [{code: 'a', value: `${title}.`}]
}
] : [];

function generateIsAddedEntry() {
const fields = getFields(p => [
Expand All @@ -26,6 +30,42 @@ export function generate245({getFields}) {

return fields.length > 0;
}

// Splits title to title+subtitle if title contains any of patterns that require this type of processing.
// Note: this getter is same as one defined within ONIX-transformer
function getTitle(titleText) {
const regexObj = findRegex(titleText);
const result = regexObj ? regexObj.regex.exec(titleText) : undefined;

if (!result) {
return {title: titleText.trimEnd(), alternativeSubtitle: undefined};
}

const titleResult = regexObj.keepResult === true ? {
title: (titleText.slice(0, result.index + regexObj.keepCharactersFromStart) + result).trimEnd(),
alternativeSubtitle: titleText.slice(result.index + result[0].length - regexObj.keepCharactersFromEnd).trimEnd().trimStart()
}
: {
title: titleText.slice(0, result.index + regexObj.keepCharactersFromStart).trimEnd(),
alternativeSubtitle: titleText.slice(result.index + result[0].length - regexObj.keepCharactersFromEnd).trimEnd().trimStart()
};

return titleResult;

function findRegex(titleText) {
// Note: order defines priority
const pluralOfRegex = [
// split title to mainTitle and subtitle at first ':', do not keep ':'
{keepCharactersFromStart: 0, keepCharactersFromEnd: 0, regex: /:\s+/u},
// split title to mainTitle and subtitle at first ' - ', do not keep the separator
{keepCharactersFromStart: 1, keepCharactersFromEnd: 1, regex: /[^0-9]\s+[\u2013\u2014-]\s+[^0-9]/u},
// split title to mainTitle and subtitle at '! ' or '? ', keep question and exclamation marks, they are part of the title
{keepCharactersFromStart: 0, keepCharactersFromEnd: 0, keepResult: true, regex: /!+|\?+/u}
];

return pluralOfRegex.find(({regex}) => regex.test(titleText));
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"$": {
"schema": "dc",
"element": "title",
"value": "Dublin Core - MARC21 -konversio"
"value": "Dublin Core"
}
}
]
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[
{
"tag": "245", "ind1": "0", "ind2": "0",
"tag": "245", "ind1": "0", "ind2": " ",
"subfields": [
{"code": "a", "value": "Dublin Core - MARC21 -konversio."}
{"code": "a", "value": "Dublin Core."}
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"$": {
"schema": "dc",
"element": "title",
"value": "Dublin Core - MARC21 -konversio"
"value": "Dublin Core"
}
}
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"description": "Ind1=1 when do.contibutor.author or dc.creator information can be found.",
"description": "Ind1=1 when dc.contibutor.author or dc.creator information can be found.",
"only": false
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[
{
"tag": "245", "ind1": "1", "ind2": "0",
"tag": "245", "ind1": "1", "ind2": " ",
"subfields": [
{"code": "a", "value": "Dublin Core - MARC21 -konversio."}
{"code": "a", "value": "Dublin Core."}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"$": {
"schema": "dc",
"element": "title",
"value": "Dublin Core - MARC21 -conversion"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"description": "Splits title to title+subtitle from character \"-\"",
"only": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"tag": "245", "ind1": "0", "ind2": " ",
"subfields": [
{"code": "a", "value": "Dublin Core :"},
{"code": "b", "value": "MARC21 -conversion."}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"$": {
"schema": "dc",
"element": "title",
"value": "Dublin Core: MARC21 -conversion"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"description": "Splits title to title+subtitle from character \":\"",
"only": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"tag": "245", "ind1": "0", "ind2": " ",
"subfields": [
{"code": "a", "value": "Dublin Core :"},
{"code": "b", "value": "MARC21 -conversion."}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"$": {
"schema": "dc",
"element": "title",
"value": "Dublin Core! MARC21 -conversion"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"description": "Splits title to title+subtitle from character \"!\"",
"only": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"tag": "245", "ind1": "0", "ind2": " ",
"subfields": [
{"code": "a", "value": "Dublin Core! :"},
{"code": "b", "value": "MARC21 -conversion."}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"$": {
"schema": "dc",
"element": "title",
"value": "Dublin Core? MARC21 -conversion"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"description": "Splits title to title+subtitle from character \"?\"",
"only": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
{
"tag": "245", "ind1": "0", "ind2": " ",
"subfields": [
{"code": "a", "value": "Dublin Core? :"},
{"code": "b", "value": "MARC21 -conversion."}
]
}
]
10 changes: 7 additions & 3 deletions test-fixtures/transform/integration/01/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@
"subfields": [
{
"code": "a",
"value": "<\"Cool Stuff\", like 'this': Dublin Core & MARC21 ♥ >."
"value": "<\"Cool Stuff\", like 'this' :"
},
{
"code": "b",
"value": "Dublin Core & MARC21 ♥ >."
}
]
},
Expand Down Expand Up @@ -409,7 +413,7 @@
},
{
"code": "k",
"value": "MELINDA_RECORD_IMPORT_REPO:FOOBAR:3f39198fe332d48b27b8d18cfab4356d286ce43f3afd2799c21a302556bd04a8"
"value": "MELINDA_RECORD_IMPORT_REPO:FOOBAR:00eb8bec1dddd268cf2306e35693e6889d402f30ccb4a882d4978c78cfcaccaf"
},
{
"code": "q",
Expand Down Expand Up @@ -461,7 +465,7 @@
},
{
"description": "Normalizes indicator values",
"state": "valid"
"state": "fixed"
},
{
"description": "Adds URN for record, to 856-field (if not existing). If isLegalDeposit is active, adds legal deposit subfields to the f856s with URN.",
Expand Down

0 comments on commit 70fff3c

Please sign in to comment.