Address code style check issues #515

Signed-off-by: Jono Yang <[email protected]>
aboutcode-org · Aug 13, 2024 · 4c7cfc0 · 4c7cfc0
1 parent 790a8a6
commit 4c7cfc0
Show file tree

Hide file tree

Showing 35 changed files with 85 additions and 144 deletions.
diff --git a/Makefile b/Makefile
@@ -62,15 +62,15 @@ doc8:
 
 valid:
 	@echo "-> Run Ruff format"
-	@${ACTIVATE} ruff format -exclude purldb-toolkit/ --exclude purl2vcs
+	@${ACTIVATE} ruff format  --exclude etc/scripts/ --exclude purldb-toolkit/ --exclude purl2vcs/
 	@echo "-> Run Ruff linter"
-	@${ACTIVATE} ruff check --fix --exclude purldb-toolkit/ --exclude purl2vcs
+	@${ACTIVATE} ruff check --fix --exclude etc/scripts/ --exclude purldb-toolkit/ --exclude purl2vcs/
 
 check:
 	@echo "-> Run Ruff linter validation (pycodestyle, bandit, isort, and more)"
-	@${ACTIVATE} ruff check --exclude purldb-toolkit/ --exclude purl2vcs
+	@${ACTIVATE} ruff check --exclude etc/scripts/ --exclude purldb-toolkit/ --exclude purl2vcs/
 	@echo "-> Run Ruff format validation"
-	@${ACTIVATE} ruff format --check --exclude purldb-toolkit/ --exclude purl2vcs
+	@${ACTIVATE} ruff format --check --exclude etc/scripts/ --exclude purldb-toolkit/ --exclude purl2vcs/
 	@$(MAKE) doc8
 
 clean:

diff --git a/clearcode/store_scans.py b/clearcode/store_scans.py
@@ -33,18 +33,18 @@
 from clearcode.models import CDitem
 
 """
-The input is a bunch of scans from ClearlyDefined and 
-the output is a bunch of git repositories with commited and 
-pushed scans such that we balance the scans roughly evenly accross 
+The input is a bunch of scans from ClearlyDefined and
+the output is a bunch of git repositories with commited and
+pushed scans such that we balance the scans roughly evenly accross
 different repositories.
 
-The primary reason for multiple repositories is size of a single 
-repo. There is a size limit of 5 GB at GitHub and it's difficult 
+The primary reason for multiple repositories is size of a single
+repo. There is a size limit of 5 GB at GitHub and it's difficult
 to work with repositories with million files.
 
-Therefore the approach is to use hashing as a way to name git 
-repositories and directories. We compute hash on the purl of the scanned 
-package and use the first few layers of this hash for the repo and 
+Therefore the approach is to use hashing as a way to name git
+repositories and directories. We compute hash on the purl of the scanned
+package and use the first few layers of this hash for the repo and
 directory names.
 
 Initial processing steps are:
@@ -54,15 +54,15 @@
 - Then we store the scan using the purl hash and purl as path.
 - Finally commit and push! : )
 
-Because it's not practical to process many repos at once, we organize the 
-processing one repo a time. For this, we iterate over a bunch of records get or compute 
+Because it's not practical to process many repos at once, we organize the
+processing one repo a time. For this, we iterate over a bunch of records get or compute
 the purl hash and process the records that share the same hash.
 
-We are using a short hash that is three characters long using hexadecimal encoding. 
-Therefore we can have 16*16*16 = 4096 repositories where each repo would contain about 
+We are using a short hash that is three characters long using hexadecimal encoding.
+Therefore we can have 16*16*16 = 4096 repositories where each repo would contain about
 25k scan files, if we were to store 100 million scans (which is a high mark).
-For reference one scan should use less than a 100k on average when compressed 
-with gzip or git based on looking at 15 million scans. Each repo should be roughly 
+For reference one scan should use less than a 100k on average when compressed
+with gzip or git based on looking at 15 million scans. Each repo should be roughly
 couple hundred mega bytes big, based on 15 million scans.
 """
 

diff --git a/clearcode/sync.py b/clearcode/sync.py
@@ -350,7 +350,7 @@ def is_unchanged_remotely(self, url, session=session):
             remote_etag = response.headers.get("etag")
             if remote_etag and self.etags_cache.get(url) == remote_etag:
                 return True
-        except:
+        except Exception:
             return False
 
     def is_fetched(self, checksum, url):

diff --git a/matchcode/tests/test_match.py b/matchcode/tests/test_match.py
@@ -34,7 +34,7 @@ class MatchPackagesTestCase(MatchcodeTestCase):
     def setUp(self):
         # Execute the superclass' setUp method before creating our own
         # DB objects
-        super(MatchPackagesTestCase, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="abbot-0.12.3.jar",
@@ -158,7 +158,7 @@ class MatchNestedPackagesTestCase(MatchcodeTestCase):
     def setUp(self):
         # Execute the superclass' setUp method before creating our own
         # DB objects
-        super(MatchNestedPackagesTestCase, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="plugin-request-2.4.1.tgz",
@@ -219,7 +219,7 @@ class DirectoryMatchingTestCase(MatchcodeTestCase):
     maxDiff = None
 
     def setUp(self):
-        super(DirectoryMatchingTestCase, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="abbrev-1.0.3.tgz",

diff --git a/matchcode/tests/test_models.py b/matchcode/tests/test_models.py
@@ -41,7 +41,7 @@ class BaseModelTest(MatchcodeTestCase):
     maxDiff = None
 
     def setUp(self):
-        super(BaseModelTest, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="abbot-0.12.3.jar",

diff --git a/minecode/filter.py b/minecode/filter.py
@@ -39,28 +39,28 @@ def sf_net(input_file, output):
         writer = csv.writer(fo, quoting=csv.QUOTE_ALL)
         with open(input_file) as fi:
             reader = csv.reader(fi)
-            for i, l in enumerate(reader):
+            for i, row in enumerate(reader):
                 if i == 0:
                     # add headers on first row
-                    l.extend(new_headers)
-                if not l:
+                    row.extend(new_headers)
+                if not row:
                     continue
-                project_id = l[0]
-                name = l[1]
-                version_column = l[2]
+                project_id = row[0]
+                name = row[1]
+                version_column = row[2]
                 sep = ":  released on "
                 if sep not in version_column:
                     # write as is if we do not have a file release date
                     # separator
-                    writer.writerow(l)
+                    writer.writerow(row)
                     continue
                 filename, release_date_ts = version_column.split(sep, 1)
                 found_version = version.version_hint(filename)
-                l.append(found_version or "")
-                l.append(release_date_ts or "")
-                l.append(download_url_template % locals())
-                l.append("")  # reviewed
-                l.append("")  # curated name
+                row.append(found_version or "")
+                row.append(release_date_ts or "")
+                row.append(download_url_template % locals())
+                row.append("")  # reviewed
+                row.append("")  # curated name
                 excluded_reason = ""
                 if "." in project_id:
                     excluded_reason = "mirror or special project"
@@ -70,10 +70,10 @@ def sf_net(input_file, output):
                     excluded_reason = "special chars in name"
                 elif not good_filename(project_id, filename, name):
                     excluded_reason = "multi component possible"
-                l.append(excluded_reason)
-                l.append("")  # curated_owner
-                l.append("")  # owner_type
-                writer.writerow(l)
+                row.append(excluded_reason)
+                row.append("")  # curated_owner
+                row.append("")  # owner_type
+                writer.writerow(row)
 
 
 def good_name(s):

diff --git a/minecode/indexing.py b/minecode/indexing.py
@@ -100,7 +100,7 @@ def index_package(
         declared_license_expression = summary_data.get("declared_license_expression")
         other_license_expressions = summary_data.get("other_license_expressions", [])
         other_license_expressions = [
-            l["value"] for l in other_license_expressions if l["value"]
+            license_expression["value"] for license_expression in other_license_expressions if license_expression["value"]
         ]
         other_license_expression = combine_expressions(other_license_expressions)
 

diff --git a/minecode/management/commands/check_licenses.py b/minecode/management/commands/check_licenses.py
@@ -99,8 +99,7 @@ def find_ambiguous_packages(
     )
     license_filter = reduce(operator.or_, filter_expression)
 
-    for package in Package.objects.filter(type__in=types).filter(license_filter):
-        yield package
+    yield from Package.objects.filter(type__in=types).filter(license_filter)
 
 
 def dump(packages, json_location):

diff --git a/minecode/mappers/bitbucket.py b/minecode/mappers/bitbucket.py
@@ -33,10 +33,7 @@ def get_packages(self, uri, resource_uri):
         """Yield Package built from resource_uri record for a single package version."""
         downloads_data = json.loads(resource_uri.data, object_pairs_hook=OrderedDict)
         for download_data in downloads_data.get("values", []):
-            for package in build_bitbucket_download_packages(
-                download_data, resource_uri.package_url
-            ):
-                yield package
+            yield from build_bitbucket_download_packages(download_data, resource_uri.package_url)
 
 
 def build_bitbucket_download_packages(download_data, purl):

diff --git a/minecode/mappers/cpan.py b/minecode/mappers/cpan.py
@@ -49,7 +49,7 @@ def build_packages_from_release_json(metadata, uri=None):
             continue
 
         extracted_license_statement = [
-            l for l in release.get("license", []) if l and l.strip()
+            lic for lic in release.get("license", []) if lic and lic.strip()
         ]
 
         common_data = dict(
@@ -87,7 +87,7 @@ def build_packages_from_release_json(metadata, uri=None):
         # like perl_5. The license here under resources section is the
         # url of license for example: http://dev.perl.org/licenses/ So
         # it's useful to collect both information...
-        license_url = [l for l in resources.get("license", []) if l and l.strip()]
+        license_url = [lic for lic in resources.get("license", []) if lic and lic.strip()]
         if license_url:
             common_data["extracted_license_statement"].extend(license_url)
 
@@ -164,7 +164,7 @@ def build_packages_from_metafile(metadata, uri=None, purl=None):
     licenses_content = content.get("license")
     extracted_license_statement = []
     if licenses_content:
-        if isinstance(licenses_content, (list,)):
+        if isinstance(licenses_content, list):
             for lic in licenses_content:
                 extracted_license_statement.append(lic)
         else:

diff --git a/minecode/mappers/cran.py b/minecode/mappers/cran.py
@@ -95,7 +95,7 @@ def build_packages_from_html(metadata, uri=None, purl=None):
                 if key == "Version:":
                     common_data["version"] = value
                 elif key == "URL:":
-                    if type(value) == list and len(value) > 0:
+                    if type(value) is list and len(value) > 0:
                         homepages = []
                         for home_page in value:
                             homepages.append(home_page)
@@ -129,7 +129,7 @@ def build_packages_from_html(metadata, uri=None, purl=None):
                         )
                         common_data["parties"].append(party.to_dict())
                 elif "source" in key or "binaries" in key:
-                    if type(value) == list:
+                    if type(value) is list:
                         for url in value:
                             download_urls.append(get_download_url(url))
                 elif key == "Published:":

diff --git a/minecode/mappers/eclipse.py b/minecode/mappers/eclipse.py
@@ -63,7 +63,7 @@ def build_packages_with_json(metadata, purl=None, uri=None):
 
         if project_metadata.get("licenses"):
             common_data["extracted_license_statement"] = [
-                l.get("name") for l in project_metadata.get("licenses", [])
+                lic.get("name") for lic in project_metadata.get("licenses", [])
             ]
             common_data["license_detections"] = []
 

diff --git a/minecode/mappers/sourceforge.py b/minecode/mappers/sourceforge.py
@@ -79,11 +79,11 @@ def build_packages_from_metafile(metadata, purl=None, uri=None):
 
         extracted_license_statement = []
         licenses = categories.get("license") or []
-        for l in licenses:
-            license_name = l.get("fullname")
+        for lic in licenses:
+            license_name = lic.get("fullname")
             # full name is first priority than shortname since shortname is like gpl, it doesn't show detailed gpl version etc.
             if license_name:
-                extracted_license_statement.append(l.get("shortname"))
+                extracted_license_statement.append(lic.get("shortname"))
             if license_name:
                 extracted_license_statement.append(license_name)
         if extracted_license_statement:

diff --git a/minecode/models.py b/minecode/models.py
@@ -492,7 +492,7 @@ def save(self, *args, **kwargs):
         self.normalize_fields()
         self.has_map_error = True if self.map_error else False
         self.has_visit_error = True if self.visit_error else False
-        super(ResourceURI, self).save(*args, **kwargs)
+        super().save(*args, **kwargs)
 
 
 class ScannableURIManager(models.Manager):
@@ -790,7 +790,7 @@ def save(self, *args, **kwargs):
         if not self.canonical:
             self.canonical = get_canonical(self.uri)
         self.normalize_fields()
-        super(ScannableURI, self).save(*args, **kwargs)
+        super().save(*args, **kwargs)
 
     def process_scan_results(
         self, scan_results_location, scan_summary_location, project_extra_data
@@ -971,7 +971,7 @@ class Meta:
     def save(self, *args, **kwargs):
         """Save, adding defaults for computed fields and validating fields."""
         self.normalize_fields()
-        super(PriorityResourceURI, self).save(*args, **kwargs)
+        super().save(*args, **kwargs)
 
 
 # TODO: Use the QuerySet.as_manager() for more flexibility and chaining.
@@ -1087,7 +1087,7 @@ class Meta:
     def save(self, *args, **kwargs):
         """Save, adding defaults for computed fields and validating fields."""
         self.normalize_fields()
-        super(ImportableURI, self).save(*args, **kwargs)
+        super().save(*args, **kwargs)
 
 
 class ProcessingError(BaseURI):

diff --git a/minecode/saneyaml.py b/minecode/saneyaml.py
@@ -121,7 +121,7 @@ def ordered_loader(loader, node):
 class SaneDumper(SafeDumper):
     def increase_indent(self, flow=False, indentless=False):
         """Ensure that lists items are always indented."""
-        return super(SaneDumper, self).increase_indent(flow, indentless=False)
+        return super().increase_indent(flow, indentless=False)
 
     def ignore_aliases(self, data):
         """Avoid having aliases created from re-used Python objects."""

diff --git a/minecode/seed.py b/minecode/seed.py
@@ -45,7 +45,7 @@ def get_active_seeders(seeders=()):
     if not seeders:
         seeders = get_configured_seeders()
     for seeder in seeders:
-        if isinstance(seeder, (bytes, unicode)):
+        if isinstance(seeder, bytes | unicode):
             module_name, _, class_name = seeder.rpartition(".")
             module = importlib.import_module(module_name)
             yield getattr(module, class_name)()

diff --git a/minecode/tests/test_api.py b/minecode/tests/test_api.py
@@ -30,7 +30,7 @@ class ScannableURIAPITestCase(JsonBasedTesting, TestCase):
 
     def setUp(self):
         self.scan_queue_worker_user = User.objects.create_user(
-            username="username", email="[email protected]", password="secret"
+            username="username", email="[email protected]", password="secret" # NOQA
         )
         scan_queue_workers_group, _ = Group.objects.get_or_create(
             name="scan_queue_workers"
@@ -49,7 +49,7 @@ def setUp(self):
         self.staff_user = User.objects.create_user(
             username="staff_username",
             email="[email protected]",
-            password="secret",
+            password="secret", # NOQA
             is_staff=True,
         )
         self.staff_auth = f"Token {self.staff_user.auth_token.key}"
@@ -60,7 +60,7 @@ def setUp(self):
         self.regular_user = User.objects.create_user(
             username="regular_username",
             email="[email protected]",
-            password="secret",
+            password="secret", # NOQA
         )
         self.regular_auth = f"Token {self.regular_user.auth_token.key}"
         self.regular_client = APIClient(enforce_csrf_checks=True)

diff --git a/minecode/tests/test_conan.py b/minecode/tests/test_conan.py
@@ -25,7 +25,7 @@ class ConanPriorityQueueTests(JsonBasedTesting, TestCase):
     test_data_dir = os.path.join(os.path.dirname(__file__), "testfiles")
 
     def setUp(self):
-        super(ConanPriorityQueueTests, self).setUp()
+        super().setUp()
         self.package_url1 = PackageURL.from_string("pkg:conan/[email protected]")
         zlib_conanfile_loc = self.get_test_loc("conan/zlib/manifest/conanfile.py")
         zlib_conandata_loc = self.get_test_loc("conan/zlib/manifest/conandata.yml")

diff --git a/minecode/tests/test_gnu.py b/minecode/tests/test_gnu.py
@@ -22,7 +22,7 @@ class GnuPriorityQueueTests(JsonBasedTesting, TestCase):
     test_data_dir = os.path.join(os.path.dirname(__file__), "testfiles")
 
     def setUp(self):
-        super(GnuPriorityQueueTests, self).setUp()
+        super().setUp()
         glibc_data_loc = self.get_test_loc("gnu/glibc/index.html")
 
         with open(glibc_data_loc) as f:

diff --git a/minecode/tests/test_maven.py b/minecode/tests/test_maven.py
@@ -903,7 +903,7 @@ class MavenPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
     test_data_dir = os.path.join(os.path.dirname(__file__), "testfiles")
 
     def setUp(self):
-        super(MavenPriorityQueueTests, self).setUp()
+        super().setUp()
 
         self.expected_pom_loc = self.get_test_loc("maven/pom/classworlds-1.1.pom")
         with open(self.expected_pom_loc) as f: