Skip to content

Commit

Permalink
DedupeTool: Use matchType=exact for CDX server
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Sep 23, 2024
1 parent a22e1f9 commit 5a9b5ff
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion src/org/netpreserve/jwarc/tools/DedupeTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ private WarcRevisit deduplicate(WarcRecord record) throws IOException {
}

private CdxRecord findMatchingRecord(WarcCaptureRecord capture, String digest) throws IOException {
URL queryUrl = new URL(cdxServer + "?sort=reverse&rows=10&url=" + URLEncoder.encode(capture.target(), UTF_8.name()));
URL queryUrl = new URL(cdxServer + "?sort=reverse&rows=10&matchType=exact&url=" + URLEncoder.encode(capture.target(), UTF_8.name()));
try (CdxReader response = new CdxReader(queryUrl.openStream())) {
for (CdxRecord record : response) {
if (digest.equalsIgnoreCase(record.digest())) {
Expand Down

0 comments on commit 5a9b5ff

Please sign in to comment.