Skip to content

Commit

Permalink
Merge branch 'develop' into downloadjarpathinputlocation
Browse files Browse the repository at this point in the history
  • Loading branch information
palaniappan1 authored Sep 26, 2024
2 parents 63e5450 + 2d7ee55 commit bdf9950
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 165 deletions.
37 changes: 19 additions & 18 deletions .github/download_jars.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,29 @@
import random
import os
import json
import datetime

print('Job Starting')

BASE_URL = "https://search.maven.org/solrsearch/select"
DOWNLOAD_URL_TEMPLATE = "https://repo1.maven.org/maven2/{group}/{artifact}/{version}/{artifact}-{version}.jar"
metadata_path = os.getenv('METADATA_PATH', 'metadata.json')
OUTPUT_DIR = "downloaded_jars"
NUM_JARS = 100
MAX_SIZE_MB = 5 * 1024 * 1024 # 5MB in bytes

# Ensure output directory exists
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)


def construct_download_url(group, artifact, version):
group_path = group.replace('.', '/')
return DOWNLOAD_URL_TEMPLATE.format(group=group_path, artifact=artifact, version=version)


def download_file(url, output_path):
def can_download_file(url):
response = requests.get(url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
if total_size > MAX_SIZE_MB:
return False
with open(output_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
file.write(chunk)
else:
return True


Expand All @@ -55,12 +48,19 @@ def get_random_artifact():


def get_metadata():
print("Reading metadata")
if os.path.isfile(metadata_path): # Check if it is a file
with open(metadata_path, 'r') as file:
return json.load(file)
elif os.path.isdir(metadata_path):
raise IsADirectoryError(f"{metadata_path} is a directory, not a file.")
return {"jars": []}
else:
print("No metadata file found, so creating new one")
os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
# Create the file with an empty array
with open(metadata_path, 'w') as file:
json.dump({"jars": []}, file, indent=4)
return {"jars": []}


def save_metadata(data):
Expand All @@ -69,6 +69,7 @@ def save_metadata(data):


metadata = get_metadata()
print("metadata gathered")
# Download 100 random JARs
while downloaded_count < NUM_JARS:
artifact = get_random_artifact()
Expand All @@ -78,19 +79,19 @@ def save_metadata(data):
artifact_id = artifact['a']
version = artifact['latestVersion']
download_url = construct_download_url(group, artifact_id, version)
output_path = os.path.join(OUTPUT_DIR, f"{artifact_id}-{version}.jar")
artifact_name = f"{artifact_id}--{version}.jar"
artifact_name = f"{artifact_id}-{version}.jar"
try:
if not any(jar['name'] == artifact_name for jar in metadata['jars']):
if download_file(download_url, output_path):
if can_download_file(download_url):
metadata['jars'].append({
'name': artifact_name
'name': artifact_name,
'download_url': download_url,
'date': datetime.date.today().isoformat()
})
save_metadata(metadata)
print(f"Downloaded: {output_path}")
downloaded_count += 1
else:
print(f"Skipped (too large): {output_path}")
print(f"Skipped (too large)")
except requests.RequestException as e:
print(f"Failed to download {download_url}: {e}")
print(f"Downloaded {downloaded_count} JAR files.")
print(f"Downloaded {downloaded_count} JAR files.")
121 changes: 86 additions & 35 deletions .github/workflows/test-jars.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
name: Download Random JARs from Maven

on:
workflow_dispatch:
schedule:
- cron: '0 9 * * 1' # '0' : 0th minute, '9' : Hour (9 AM) '*' : Day of the month '*' : Month '1' : Day of the week (0 is sunday, 1 is monday)
workflow_dispatch: # Manual trigger of this action
jobs:
download-jars:
runs-on: ubuntu-latest
Expand All @@ -19,18 +21,13 @@ jobs:
run:
pip install requests

- name: Download Metadata
id: download-metadata
uses: actions/download-artifact@v3
with:
name: metadata
path: metadata
continue-on-error: true # Allows workflow to continue even if the artifact metadata is not found (obviously it will not be found for the first run)

- name: Create empty metadata file
if: failure() # This runs if the previous step failed
run: |
echo '{"jars":[]}' > metadata/metadata.json
# - name: Download Metadata
# id: download-metadata
# uses: actions/download-artifact@v3
# with:
# name: metadata
# path: metadata
# continue-on-error: true # Allows workflow to continue even if the artifact metadata is not found (obviously it will not be found for the first run)

- name: Download random JARs
id: download
Expand All @@ -39,12 +36,6 @@ jobs:
env:
METADATA_PATH: metadata/metadata.json

- name: Upload JARs
uses: actions/upload-artifact@v3
with:
name: jars
path: downloaded_jars/

- name: Upload Metadata
uses: actions/upload-artifact@v3
with:
Expand All @@ -58,9 +49,6 @@ jobs:
java-package: 'jdk'
java-version: '8'

- name: Checkout repository
uses: actions/checkout@v2

- name: Install Maven
run: |
sudo apt-get update
Expand All @@ -70,21 +58,84 @@ jobs:
run: |
mvn clean install -DskipTests
- name: List directory contents
run: |
ls -l
ls -l ${{ github.workspace }}/downloaded_jars
- name: Run tests on downloaded JARs
run: |
for jar in $(ls ${{ github.workspace }}/downloaded_jars/*.jar); do
echo "Testing $jar"
mvn test -Dtest=sootup.java.bytecode.inputlocation.RandomJarTest -DjarPath="$jar" -pl sootup.java.bytecode
# Get the current date in YYYY-MM-DD format
current_date=$(date +"%Y-%m-%d")
echo "CURRENT_DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
# Loop through each artifact in metadata.json that matches the current date
for row in $(jq -c --arg date "$current_date" '.jars[] | select(.date == $date)' ${{ github.workspace }}/metadata/metadata.json); do
# Extract artifactId and download_url from each object
artifactId=$(echo "$row" | jq -r '.name')
downloadUrl=$(echo "$row" | jq -r '.download_url')
echo "Testing $artifactId from $downloadUrl"
mvn test -Dtest=sootup.java.bytecode.frontend.inputlocation.RandomJarTest#testJar -DjarPath="$downloadUrl" -pl sootup.java.bytecode.frontend
done
- name: Upload the Artifact
uses: actions/upload-artifact@v3
with:
name: jar_test_csv
path: sootup.java.bytecode/jar_test.csv
- name: Check for jar_failure.csv
id: check_file
if: ${{ hashFiles('sootup.java.bytecode.frontend/jar_failure.csv') != '' }}
run: |
echo "jar_failure.csv exists"
# Read all jar_names from the CSV and store them in an environment variable
jar_names=$(awk -F, 'NR>1 {print $1}' sootup.java.bytecode.frontend/jar_failure.csv | paste -sd "," -)
echo "JAR_NAMES=${jar_names}" >> $GITHUB_ENV
- name: Set branch name with timestamp
id: set_branch_name
if: env.JAR_NAMES != ''
run: |
# Get the current week number and timestamp
current_date=$(date +%Y%m%d)
branch_name="failed-jars-branch-${current_date}"
echo "BRANCH_NAME=${branch_name}" >> $GITHUB_ENV
- name: Create a Test File
if: env.JAR_NAMES != ''
run: |
mvn test -Dtest=sootup.java.bytecode.frontend.inputlocation.RandomJarTest#writeFile -pl sootup.java.bytecode.frontend
- name: Configure Git
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "[email protected]"
- name: Create new branch and prepare files
if: env.JAR_NAMES != ''
run: |
# Create a branch named `failed-jars-branch`
git checkout -b ${{ env.BRANCH_NAME }}
echo 'New Branch Checked Out'
# Add jar_failure.csv to the new directory
git add sootup.java.bytecode.frontend/jar_failure.csv
git add sootup.java.bytecode.frontend/src/test/java/sootup/java/bytecode/frontend/inputlocation/FixJars.java
echo 'CSV file Added to git'
- name: Create Issue
if: env.JAR_NAMES != ''
run: |
echo "Repository: ${{ github.repository }}"
echo "Token: ${{ secrets.GITHUB_TOKEN }}"
ISSUE_RESPONSE=$(curl -X POST \
-H "Accept: application/vnd.github.v3+json" \
-H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-d '{"title":"Issue Title","body":"This is the issue body"}' \
https://api.github.com/repos/${{ github.repository }}/issues)
ISSUE_NUMBER=$(echo "$ISSUE_RESPONSE" | jq '.number')
echo "ISSUE_NUMBER=$ISSUE_NUMBER" >> $GITHUB_ENV
- name: Move files and commit changes
if: env.JAR_NAMES != ''
run: |
echo " Token: ${{ secrets.GITHUB_TOKEN }}"
# Move jar files listed in jar_failure.csv
git mv sootup.java.bytecode.frontend/jar_failure.csv sootup.java.bytecode.frontend/src/test/resources/jar_failure.csv
echo 'jar_failure.csv moved to the branch'
# Commit and push changes
git add .
git commit -m "Linking issue #${{ env.ISSUE_NUMBER }} to the branch"
git push origin ${{ env.BRANCH_NAME }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,20 @@ void extractWarFile(Path warFilePath, final Path destDirectory) {
Path filepath = destDirectory.resolve(zipEntry.getName());
final File file = filepath.toFile();

String canonicalPathStr = file.getCanonicalPath();
if (!canonicalPathStr.startsWith(destDirectory + File.separator)) {
throw new IllegalArgumentException(
"ZipSlip Attack Mitigated: ZipEntry points outside of the target dir: "
+ file.getName());
}

file.deleteOnExit();
if (zipEntry.isDirectory()) {
file.mkdir();
boolean mkdir = file.mkdir();
if (!mkdir) {
throw new IllegalStateException(
"Could not create Directory: " + file.getAbsolutePath());
}
} else {
byte[] incomingValues = new byte[4096];
int readBytesZip;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package sootup.java.bytecode.frontend.inputlocation;

import java.util.Collections;
import java.util.stream.Collectors;
import sootup.interceptors.BytecodeBodyInterceptors;
import sootup.java.core.JavaSootClass;
import sootup.java.core.JavaSootMethod;
import sootup.java.core.views.JavaView;

public abstract class BaseFixJarsTest {

String failedMethodSignature = "";

public JavaView supplyJavaView(String jarDownloadUrl) {
DownloadJarAnalysisInputLocation inputLocation =
new DownloadJarAnalysisInputLocation(
jarDownloadUrl,
BytecodeBodyInterceptors.Default.getBodyInterceptors(),
Collections.emptyList());
return new JavaView(inputLocation);
}

public void assertMethodConversion(JavaView javaView, String methodSignature) {
try {
javaView
.getMethod(javaView.getIdentifierFactory().parseMethodSignature(methodSignature))
.get()
.getBody();
} catch (Exception e) {
e.printStackTrace();
}
}

public void assertJar(JavaView javaView) {
for (JavaSootClass clazz : javaView.getClasses().collect(Collectors.toList())) {
for (JavaSootMethod javaSootMethod : clazz.getMethods()) {
if (javaSootMethod.hasBody()) {
try {
javaSootMethod.getBody();
} catch (Exception exception) {
failedMethodSignature = javaSootMethod.getSignature().toString();
throw exception;
}
}
}
}
}
}
Loading

0 comments on commit bdf9950

Please sign in to comment.