Skip to content

Commit

Permalink
Remove dependency on fastutil
Browse files Browse the repository at this point in the history
Fastutil is our largest dependency and consumes a third of the overall Heritrix distribution size. If we update to the latest version it will be even larger. But we're only using two tiny classes from it: the trivial RepositionableStream interface and the unsynchronized FastBufferedOutputStream.

Some downstream users (e.g. lockss-core) actually implement RepositionableStream, so to preserve API compatiblity this change includes a copy of just that interface while keeping the same package name.

Regarding FastBufferedOutputStream, for WARC writing the outer GZIPOutputStream is synchronized anyway. And RecordingOutputStream will typically be doing moderately large writes copying from the network. So in both usages it seems unlikely that there's much practical benefit in using it here over the standard BufferedOutputStream. The JVM JIT has a lot of optimizations for synchronized these days too.
  • Loading branch information
ato committed Dec 4, 2024
1 parent 4bb03ba commit 94378b2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 6 deletions.
42 changes: 42 additions & 0 deletions src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// copied from fastutil, keeping the original package name to avoid breaking
// compatibility with existing user code that implements this interface
package it.unimi.dsi.fastutil.io;

/*
* Copyright (C) 2005-2015 Sebastiano Vigna
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


/** A basic interface specifying positioning methods for a byte stream.
*
* @author Sebastiano Vigna
* @since 4.4
*/

public interface RepositionableStream {

/** Sets the current stream position.
*
* @param newPosition the new stream position.
*/
void position( long newPosition ) throws java.io.IOException;

/** Returns the current stream position.
*
* @return the current stream position.
*/
long position() throws java.io.IOException;

}
5 changes: 2 additions & 3 deletions src/main/java/org/archive/io/RecordingOutputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@

package org.archive.io;

import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;

import java.io.BufferedOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
Expand Down Expand Up @@ -207,7 +206,7 @@ public void open(OutputStream wrappedStream) throws IOException {
protected OutputStream ensureDiskStream() throws FileNotFoundException {
if (this.diskStream == null) {
FileOutputStream fis = new FileOutputStream(this.backingFilename);
this.diskStream = new FastBufferedOutputStream(fis);
this.diskStream = new BufferedOutputStream(fis);
}
return this.diskStream;
}
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/org/archive/io/WriterPoolMember.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@

package org.archive.io;

import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
Expand Down Expand Up @@ -200,7 +199,7 @@ protected String createFile(final File file) throws IOException {
close();
this.f = file;
FileOutputStream fos = new FileOutputStream(this.f);
this.countOut = new MiserOutputStream(new FastBufferedOutputStream(fos),settings.getFrequentFlushes());
this.countOut = new MiserOutputStream(new BufferedOutputStream(fos),settings.getFrequentFlushes());
this.out = this.countOut;
logger.fine("Opened " + this.f.getAbsolutePath());
return this.f.getName();
Expand Down

0 comments on commit 94378b2

Please sign in to comment.