Skip to content

Commit

Permalink
bug: citation duplicates and accordion overflow (#51)
Browse files Browse the repository at this point in the history
Co-authored-by: Kevin Boyer <[email protected]>
  • Loading branch information
ccheng26 and KevinJBoyer authored Aug 13, 2024
1 parent 43fc8da commit 1e283f6
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 9 deletions.
2 changes: 1 addition & 1 deletion app/src/chainlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _get_retrieval_metadata(chunks_with_scores: Sequence[ChunkWithScore]) -> dic
"chunks": [
{
"document.name": chunk_with_score.chunk.document.name,
"chunk.id": chunk_with_score.chunk.id,
"chunk.id": str(chunk_with_score.chunk.id),
"score": chunk_with_score.score,
}
for chunk_with_score in chunks_with_scores
Expand Down
6 changes: 4 additions & 2 deletions app/src/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,13 @@ def _format_to_accordion_html(document: Document, score: float) -> str:
def _format_to_accordion_group_html(documents: OrderedDict[Document, list[ChunkWithScore]]) -> str:
global _accordion_id
html = ""
internal_citation = ""
for document in documents:
internal_citation = ""
_accordion_id += 1
for index, chunk in enumerate(documents[document], start=1):
formatted_chunk = re.sub(r"\n+", "\n", chunk.chunk.content).strip()
chunk_lines = chunk.chunk.content.splitlines()
formatted_chunk = " ".join(chunk_lines)
formatted_chunk = re.sub(r"\t+", "", formatted_chunk).strip()
formatted_chunk = f"<p>{formatted_chunk} </p>" if formatted_chunk else ""
citation = f"<h4>Citation #{index} (score: {str(chunk.score)})</h4>"
similarity_score = f"<p>Similarity Score: {str(chunk.score)}</p>"
Expand Down
6 changes: 3 additions & 3 deletions app/tests/src/test_chainlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ def test__get_retrieval_metadata(chunks_with_scores):
"chunks": [
{
"document.name": chunks_with_scores[0].chunk.document.name,
"chunk.id": chunks_with_scores[0].chunk.id,
"chunk.id": str(chunks_with_scores[0].chunk.id),
"score": chunks_with_scores[0].score,
},
{
"document.name": chunks_with_scores[1].chunk.document.name,
"chunk.id": chunks_with_scores[1].chunk.id,
"chunk.id": str(chunks_with_scores[1].chunk.id),
"score": chunks_with_scores[1].score,
},
{
"document.name": chunks_with_scores[2].chunk.document.name,
"chunk.id": chunks_with_scores[2].chunk.id,
"chunk.id": str(chunks_with_scores[2].chunk.id),
"score": chunks_with_scores[2].score,
},
]
Expand Down
6 changes: 3 additions & 3 deletions app/tests/src/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ def test_format_bem_documents():
chunks_shown_max_num=2, chunks_shown_min_score=0.91, chunks_with_scores=chunks_with_scores
)

assert docs[0].content not in html
assert docs[1].content not in html
assert docs[3].content in html
assert docs[0].content.replace("\n", " ") not in html
assert docs[1].content.replace("\n", " ") not in html
assert docs[3].content.replace("\n", " ") in html
assert "Citation #2" in html
assert "Citation #3" not in html
assert "<p>Similarity Score: 0.95</p>" in html

0 comments on commit 1e283f6

Please sign in to comment.