Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug #7

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion scan_explorer_service/manifest_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from scan_explorer_service.models import Article, Page, Collection
from typing import Union
from itertools import chain
from flask import current_app

class ManifestFactoryExtended(ManifestFactory):
""" Extended manifest factory.
Expand All @@ -14,16 +15,22 @@ class ManifestFactoryExtended(ManifestFactory):
def create_manifest(self, item: Union[Article, Collection]):
manifest = self.manifest(
ident=f'{item.id}/manifest.json', label=item.id)

manifest.description = item.id
manifest.add_sequence(self.create_sequence(item))

for range in self.create_range(item):
manifest.add_range(range)

current_app.logger.debug(f"Created manifest {manifest}")
return manifest

def create_sequence(self, item: Union[Article, Collection]):
sequence: Sequence = self.sequence()
for page in item.pages:
sequence.add_canvas(self.get_or_create_canvas(page))

current_app.logger.debug(f"Sequence {sequence}")
return sequence

def create_range(self, item: Union[Article, Collection]):
Expand All @@ -33,11 +40,14 @@ def create_range(self, item: Union[Article, Collection]):
range: Range = self.range(ident=item.bibcode, label=item.bibcode)
for page in item.pages:
range.add_canvas(self.get_or_create_canvas(page))

current_app.logger.debug(f"Range {[range]}")
return [range]

def get_canvas_dict(self) -> Dict[str, Canvas]:
if not hasattr(self, 'canvas_dict'):
self.canvas_dict = {}
current_app.logger.debug(f"Canvas dict {self.canvas_dict}")
return self.canvas_dict

def get_or_create_canvas(self, page: Page):
Expand All @@ -58,7 +68,7 @@ def get_or_create_canvas(self, page: Page):
annotation.on = canvas.id
canvas.add_annotation(annotation)
canvas_dict[page.id] = canvas

current_app.logger.debug(f"Canvas {canvas}")
return canvas

def create_image_annotation(self, page: Page):
Expand All @@ -72,6 +82,7 @@ def create_image_annotation(self, page: Page):
image.format = page.format
image.height = page.height
image.width = page.width
current_app.logger.debug(f"Annotation {annotation}")
return annotation

def add_search_service(self, manifest: Manifest, search_url: str):
Expand Down
16 changes: 12 additions & 4 deletions scan_explorer_service/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,27 +151,35 @@ def __init__(self, **kwargs):
@property
def image_url(self):
image_api_url = url_for_proxy('proxy.image_proxy', path=self.image_path)
current_app.logger.debug(f'image api url: {image_api_url}')
return image_api_url

@property
def image_path(self):
separator = current_app.config.get('IMAGE_API_SLASH_SUB', '%2F')
image_path = separator.join(self.image_path_basic)
image_path = separator.join(self.image_path_basic[0])
current_app.logger.debug(f'color type: {self.color_type}')
if self.color_type != PageColor.BW:
image_path += '.tif'
current_app.logger.debug(f'image path: {image_path}')
return image_path

@property
def image_path_basic(self):
image_format = ''
image_path = [self.collection.type, self.collection.journal, self.collection.volume]
image_path = [item.replace('.', '_') for item in image_path]
image_path = ['bitmaps'] + image_path + ['600', self.name]

return image_path
current_app.logger.debug(f'image path basic: {image_path}')
if self.color_type != PageColor.BW:
image_format = '.tif'
return image_path, image_format

@property
def thumbnail_url(self):
return f'{self.image_url}/square/480,480/0/{self.image_color_quality}.jpg'
url = f'{self.image_url}/square/480,480/0/{self.image_color_quality}.jpg'
current_app.logger.debug('thumbnail url: ' + url)
return url

@property
def image_color_quality(self):
Expand Down
6 changes: 6 additions & 0 deletions scan_explorer_service/open_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def create_query_string_query(query_string: str):
}
}
}
current_app.logger.debug(f"query string: {query}")
return query

def append_aggregate(query: dict, agg_field: EsFields, page: int, size: int, sort: OrderOptions):
Expand Down Expand Up @@ -112,6 +113,7 @@ def set_page_search_fields(query: dict) -> dict:
return query

def page_os_search(qs: str, page, limit, sort):
qs = qs.replace("&", "+")
query = create_query_string_query(qs)
query = set_page_search_fields(query)
from_number = (page - 1) * limit
Expand Down Expand Up @@ -143,7 +145,11 @@ def page_ocr_os_search(collection_id: str, page_number:int):
return es_result

def aggregate_search(qs: str, aggregate_field, page, limit, sort):
qs = qs.replace("&", "+")
query = create_query_string_query(qs)
current_app.logger.debug(f"query: {query}")
query = append_aggregate(query, aggregate_field, page, limit, sort)
current_app.logger.debug(f"query with aggregate: {query}")
es_result = es_search(query)
current_app.logger.debug(f"es_result: {es_result}")
return es_result
6 changes: 6 additions & 0 deletions scan_explorer_service/tests/test_search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ def test_parse_query(self):
final_query, _ = parse_query_string('PageColor:grAYsCaLe')
self.assertEqual(final_query, 'page_color:Grayscale')

final_query, _ = parse_query_string('PageColor:BW')
self.assertEqual(final_query, 'page_color:BW')

final_query, _ = parse_query_string('PageColor:cOlor')
self.assertEqual(final_query, 'page_color:Color')


if __name__ == '__main__':
unittest.main()
5 changes: 5 additions & 0 deletions scan_explorer_service/utils/db_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from sqlalchemy import or_
from scan_explorer_service.models import Article, Collection, Page
from flask import current_app


def collection_exists(session, journal, volume):
Expand Down Expand Up @@ -79,18 +80,22 @@ def page_overwrite(session, page):
def article_thumbnail(session, id):
page = session.query(Page).join(Article, Page.articles).filter(
Article.id == id).order_by(Page.volume_running_page_num.asc()).first()
current_app.logger.debug(f'article thumbnail {page}')
return page.thumbnail_url

def collection_thumbnail(session, id):
page = session.query(Page).filter(Page.collection_id == id).order_by(
Page.volume_running_page_num.asc()).first()
current_app.logger.debug(f'collection thumbnail {page.thumbnail_url}')
return page.thumbnail_url

def page_thumbnail(session, id):
page = session.query(Page).filter(Page.id == id).one()
current_app.logger.debug(f'page thumbnail {page.thumbnail_url}')
return page.thumbnail_url

def item_thumbnail(session, id, type):
current_app.logger.debug(f'Getting item thumbnail: id {id} type {type}')
if type == 'page':
return page_thumbnail(session, id)
elif type == 'article':
Expand Down
10 changes: 7 additions & 3 deletions scan_explorer_service/utils/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@ def write_object_s3(self, file_bytes, object_name):

def read_object_s3(self, object_name):
try:
current_app.logger.debug(f"Attempting to download object: {object_name}")
with io.BytesIO() as s3_obj:
self.bucket.download_fileobj(object_name, s3_obj)
current_app.logger.debug(f"Object downloaded successfully: {object_name}")
s3_obj.seek(0)
s3_file = s3_obj.read()
current_app.logger.debug(f"Read {len(s3_file)} bytes from object: {object_name}")
current_app.logger.debug(f"First 100 bytes of file content: {s3_file[:100]}")
return s3_file
except (ClientError, ParamValidationError) as e:
current_app.logger.exception(e)
raise e
except Exception as e:
current_app.logger.exception(f"Unexpected error reading object {object_name}: {str(e)}")
raise


29 changes: 24 additions & 5 deletions scan_explorer_service/utils/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import enum
import re

from flask import current_app

class SearchOptions(enum.Enum):
"""Available Search Options"""
Bibcode = 'bibcode'
Expand Down Expand Up @@ -53,30 +55,44 @@ class OrderOptions(str, enum.Enum):

def parse_query_args(args):
qs = re.sub(':\s*', ':', args.get('q', '', str))
current_app.logger.debug(f'qs {qs}')

qs, qs_dict = parse_query_string(qs)

current_app.logger.debug(f'qs {qs}, qs_dict {qs_dict}')


page = args.get('page', 1, int)
limit = args.get('limit', 10, int)
sort_raw = args.get('sort')
sort = parse_sorting_option(sort_raw)
current_app.logger.debug(f'qs {qs}, qs_dict {qs_dict}, sort {sort}')
return qs, qs_dict, page, limit, sort

def parse_query_string(qs):
qs_to_split = qs.replace('[', '"[').replace(']',']"')
current_app.logger.debug(f'qs to split {qs_to_split}')
qs_arr = [q for q in shlex.split(qs_to_split) if ':' in q]
current_app.logger.debug(f'qs arr {qs_arr}')
qs_dict = {}
qs_only_free = qs
current_app.logger.debug(f'qs only free {qs_only_free}')

for kv in qs_arr:
kv_arr = kv.split(':', maxsplit=1)
current_app.logger.debug(f'kv_arr {kv_arr}')
#Remove all parameter from the original search to be able to handle the free search
qs_only_free = qs_only_free.replace(kv, "")
current_app.logger.debug(f'qs_only_free {qs_only_free}')

if len(kv_arr) == 2:
qs_dict[kv_arr[0].lower()] = kv_arr[1].strip()
#If the option have qutoes we remove them from the free. Previous removal would than have failed
#If the option have quotes we remove them from the free. Previous removal would than have failed
alt_kv = kv_arr[0] + ':"' + kv_arr[1] + '"'
qs_only_free = qs_only_free.replace(alt_kv, '')
current_app.logger.debug(f'kv_arr == 2. alt_kv {alt_kv}, qs_only_free {qs_only_free}')

current_app.logger.debug(f'qs dict {qs_dict}')
check_query(qs_dict)
#Adds a () around each free search to force OS to look for each individual entry against all default fields
for parameter in re.split('\s+', qs_only_free):
Expand All @@ -86,11 +102,11 @@ def parse_query_string(qs):
for key in qs_dict.keys():
#Translate input on the keys to the dedicated OS columns
insensitive_replace = re.compile(re.escape(key), re.IGNORECASE)
qs = insensitive_replace.sub(query_translations[key.lower()], qs)

insensitive_replace = re.compile(re.escape(qs_dict[key]), re.IGNORECASE)
qs = insensitive_replace.sub(query_translations[key.lower()], qs)
# To ensure only the strings after the colon are replaced and no partial replacements are made
insensitive_replace = re.compile(r'(?<=:)\b' + re.escape(qs_dict[key]) + r'\b', re.IGNORECASE)
qs = insensitive_replace.sub(qs_dict[key], qs)

current_app.logger.debug(f'qs: {qs} and qs dict: {qs_dict}')
return qs, qs_dict

def parse_sorting_option(sort_input: str):
Expand Down Expand Up @@ -130,11 +146,14 @@ def check_page_color(qs_dict: dict):
page_color = qs_dict[SearchOptions.PageColor.value]
valid_types = [p.name for p in PageColor]
if page_color in valid_types:
current_app.logger.debug("Page color {page_color} is valid")
return

# Check lowercased and updated to cased
for p in PageColor:
if page_color.replace('"','').lower() == p.name.lower():
qs_dict[SearchOptions.PageColor.value] = p.name
current_app.logger.debug("Page color {qs_dict[SearchOptions.PageColor.value]} changed to {p.name}")
return
raise Exception("%s is not a valid page color, %s is possible choices"% (page_color, str(valid_types)))

Expand Down
Loading
Loading