Skip to content

Commit

Permalink
feat: Provide direct link to text (#162)
Browse files Browse the repository at this point in the history
  • Loading branch information
ccheng26 authored Jan 2, 2025
1 parent 49771a7 commit d3db181
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
4 changes: 3 additions & 1 deletion app/src/chat_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from src.db.models.document import Subsection
from src.generate import ChatHistory
from src.healthcheck import HealthCheck, health
from src.util.string_utils import format_highlighted_uri

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api", tags=["Chat API"])
Expand Down Expand Up @@ -202,12 +203,13 @@ class Citation(BaseModel):
@staticmethod
def from_subsection(subsection: Subsection) -> "Citation":
chunk = subsection.chunk
highlighted_text_src = format_highlighted_uri(chunk.document.source, subsection.text)
return Citation(
citation_id=f"citation-{subsection.id}",
source_id=str(chunk.document.id),
source_name=chunk.document.name,
page_number=chunk.page_number,
uri=chunk.document.source,
uri=highlighted_text_src,
headings=subsection.text_headings,
citation_text=subsection.text,
)
Expand Down
13 changes: 13 additions & 0 deletions app/src/util/string_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,16 @@ def parse_heading_markdown(md: str) -> tuple[int, str]:
def remove_links(markdown: str) -> str:
# Remove markdown links, e.g., `[This is a link](https://example.com/relative/path) and [another](https://example.com/absolute/path)` -> `This is a link and another`
return re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", markdown)


def format_highlighted_uri(source_url: str | None, subsection_text: str) -> str | None:
if not source_url:
return None
citation_without_special_chars = re.sub(r"\W+", " ", subsection_text).strip()
citation_arr = citation_without_special_chars.split(" ")
formatted_text_to_highlight = "%20".join(citation_arr[:5]) + "," + "%20".join(citation_arr[-5:])
return (
(source_url[:-1] if source_url.endswith("/") else source_url)
+ "#:~:text="
+ formatted_text_to_highlight
)
12 changes: 12 additions & 0 deletions app/tests/src/util/test_string_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from src.util.string_utils import (
format_highlighted_uri,
headings_as_markdown,
parse_heading_markdown,
remove_links,
Expand Down Expand Up @@ -108,3 +109,14 @@ def test_parse_heading_markdown():
def test_remove_links():
markdown = "[This is a link](relative/path) and [another](https://example.com/absolute/path)"
assert remove_links(markdown) == "This is a link and another"


def test_citation_formatting():
subsection_text = "* [CalFresh](https://www.getcalfresh.org/?source=edd) (formerly known as Food Stamps) \n CalFresh provides monthly food assistance to people and families with low income, including those who lost their job because of the pandemic. Visit [GetCalFresh.org](https://www.getcalfresh.org/?source=edd) to apply online.\n* [California Association of Food Banks](http://www.cafoodbanks.org/) \n In California, federal, state, and local community organizations coordinate to make sure that groceries are available at local food banks.\n* [Free Summer Lunch Programs](http://www.cde.ca.gov/ds/sh/sn/summersites.asp) \n Free lunches are available to all children under 18, regardless of income.\n* [School Meals](https://www.fns.usda.gov/school-meals/applying-free-and-reduced-price-school-meals) \n Free or reduced-price breakfast and lunch at public schools when in session.\n* [Women, Infants and Children (WIC) Program](https://www.cdph.ca.gov/Programs/CFH/DWICSN/Pages/Program-Landing1.aspx) \n Pregnant women and children under age 5 receive nutrition support at WIC."
source_url = "https://edd.ca.gov/en/disability/options_to_file_for_pfl_benefits/"
highlighted_url = format_highlighted_uri(source_url, subsection_text)

assert (
highlighted_url
== "https://edd.ca.gov/en/disability/options_to_file_for_pfl_benefits#:~:text=CalFresh%20https%20www%20getcalfresh%20org,receive%20nutrition%20support%20at%20WIC"
)

0 comments on commit d3db181

Please sign in to comment.