Skip to content

Commit

Permalink
Added new column product, with back compatible format (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinMikita committed May 12, 2018
1 parent 732f40a commit 20175c9
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 28 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Always downloading and indexing the file:
`http://[domain]/search.tsv`


Example: http://*www.kartenportal.ch*/search.tsv or ugly http://*blog.klokantech.com:8080/beta*/search.tsv
Example: http://www.kartenportal.ch/search.tsv or https://blog.klokantech.com/search.tsv


## Input TSV format
Expand All @@ -34,6 +34,7 @@ lang - filter
date - filter, in ISO 8601 format: YYYY-MM-DDTHH:MM:SS+HH:MM, required
tags - filter on a set + fulltext; comma-separated
custom_data - only stored, not indexed, no filter
product - filter on a set + fulltext; comma-separated, optional (can be omitted)
```

All in tab separated value. Web must provide correct TSV (**no tabs in the content**).
Expand Down
12 changes: 6 additions & 6 deletions conf/sphinx/sphinx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -175,26 +175,26 @@ for domain in domains:
source src_%(domain_id)s
{
type = tsvpipe
tsvpipe_command = cat /data/%(domain)s/search.tsv | sed -e 's/\\r/ /g' | gawk -F"\\t" -v OFS='\\t' 'NR > 0 && NF == 8 {split($6,D,/[-T:+]/); split($6,X,""); if (X[20]=="+") D[4] -= D[7]; else D[4] += D[7]; $9 = mktime(D[1]" "D[2]" "D[3]" "D[4]" "D[5]" "D[6]); print NR"\\t"$0; }'
tsvpipe_command = cat /data/%(domain)s/search.tsv | sed -e 's/\\r/ /g' | gawk -F"\\t" -v OFS='\\t' 'NR > 0 && (NF == 8 || NF == 9) {split($6,D,/[-T:+]/); split($6,X,""); if (X[20]=="+") D[4] -= D[7]; else D[4] += D[7]; if (NF == 9) {$10 = $9;} else {$10 = "";}; $9 = mktime(D[1]" "D[2]" "D[3]" "D[4]" "D[5]" "D[6]); print NR"\\t"$0; }'
}
"""
#for column in ['title', 'content', 'tags']:
# for column in ['title', 'content', 'tags']:
for column in ['index']:
domain_config_col = """
# /* ------------------------------ */
# /* Source + Index */
source src_%(domain_id)s_%(column)s : src_%(domain_id)s
{
"""
for col in ['url', 'title', 'content', 'type', 'lang', 'date', 'tags', 'custom_data', 'date_filter']:
#if col == column:
for col in ['url', 'title', 'content', 'type', 'lang', 'date', 'tags', 'custom_data', 'date_filter', 'product']:
# if col == column:
if col == 'date_filter':
domain_config_col += " tsvpipe_attr_timestamp = {}\n".format(col)
elif col in ['title', 'content', 'tags']:
elif col in ['title', 'content', 'tags', 'product']:
domain_config_col += " tsvpipe_field_string = {}\n".format(col)
else:
domain_config_col += " tsvpipe_attr_string = {}\n".format(col)
domain_config_col += """
domain_config_col += """
}
index ind_%(domain_id)s_%(column)s : ind_main_charset
Expand Down
52 changes: 31 additions & 21 deletions web/websearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def process_query(index, query, query_filter, start=0, count=0):
port = int(getenv('WEBSEARCH_SERVER_PORT'))
# pprint([host, port, getenv('WEBSEARCH_SERVER')])

#querylist = query.split(" ")
#query = "|".join(querylist)
# querylist = query.split(" ")
# query = "|".join(querylist)

if count == 0:
count = SEARCH_DEFAULT_COUNT
Expand All @@ -75,9 +75,9 @@ def process_query(index, query, query_filter, start=0, count=0):
while repeat > 0:
try:
cl = SphinxClient()
cl.SetServer (host, port)
cl.SetConnectTimeout(5.0) # float seconds
cl.SetLimits(start, count) #offset, limit, maxmatches=0, cutoff=0
cl.SetServer(host, port)
cl.SetConnectTimeout(5.0) # float seconds
cl.SetLimits(start, count) # offset, limit, maxmatches=0, cutoff=0
# cl.SetSortMode( SPH_SORT_ATTR_DESC, 'date')
# cl.SetMatchMode(SPH_MATCH_EXTENDED2)
cl.SetRankingMode(SPH_RANK_SPH04)
Expand Down Expand Up @@ -138,7 +138,7 @@ def process_query(index, query, query_filter, start=0, count=0):

# Process query under index
pprint(prefix + query)
result = cl.Query ( prefix + query, index )
result = cl.Query(prefix + query, index)

# pprint(result)
repeat = 0
Expand Down Expand Up @@ -201,12 +201,22 @@ def process_query_mysql(index, query, query_filter, start=0, count=0):
argsFilter = []
whereFilter = []

appended_match = ''
# Update match query to use query_filter (tags and product)
for f in ['tags', 'product']:
if query_filter[f] is None:
continue
# construct @<field> (<val1> | <val2>)
appended_match += ' @{} ({})'.format(
f,
' | '.join(query_filter[f]))

# Prepare query
whereFilter.append('MATCH(%s)')
argsFilter.append(query)
argsFilter.append(query + appended_match)

# Prepare filter for query
for f in ['date', 'type', 'lang', 'tags']:
for f in ['date', 'type', 'lang']:
if query_filter[f] is None:
continue
inList = []
Expand Down Expand Up @@ -284,9 +294,9 @@ def process_query_mysql(index, query, query_filter, start=0, count=0):
matches = []
for row in cursor:
match = {
'weight' : 0,
'attrs' : {},
'id' : 0,
'weight': 0,
'attrs': {},
'id': 0,
}
for (name, value) in zip(desc, row):
col = name[0]
Expand All @@ -313,7 +323,6 @@ def process_query_mysql(index, query, query_filter, start=0, count=0):
return status, prepareResultJson(result, query_filter)



# ---------------------------------------------------------
def prepareResultJson(result, query_filter):
count = result['count']
Expand All @@ -333,7 +342,7 @@ def prepareResultJson(result, query_filter):
if isinstance(r[attr], str):
res[attr] = r[attr].decode('utf-8')
else:
res[ attr ] = r[attr]
res[attr] = r[attr]
response['results'].append(res)

# Prepare next and previous index
Expand Down Expand Up @@ -367,11 +376,11 @@ def formatResponse(data, code=200):
data['route'] = '/'
return render_template(tpl, rc=True if code == 200 else False, **data), code

json = dumps( result )
json = dumps(result)
mime = 'application/json'
# Append callback for JavaScript
if request.args.get('callback'):
json = request.args.get('callback') + "("+json+");";
json = request.args.get('callback') + "(" + json + ");"
mime = 'application/javascript'
return Response(json, mimetype=mime), code

Expand Down Expand Up @@ -421,15 +430,17 @@ def search():

q = request.args.get('q').encode('utf-8')

query_filter = {'type': None, 'lang': None, 'date': None,
query_filter = {
'type': None, 'lang': None, 'date': None,
'tags': None, 'datestart': None, 'dateend': None,
'sortBy': None}
'sortBy': None, 'product': None
}
filter = False
for f in query_filter:
if request.args.get(f):
v = None
# Some arguments may be list
if f in ('type', 'lang', 'sortBy', 'tags'):
if f in ('type', 'lang', 'sortBy', 'tags', 'product'):
vl = request.args.getlist(f)
if len(vl) == 1:
v = vl[0].encode('utf-8')
Expand Down Expand Up @@ -487,7 +498,7 @@ def search():
"""
API Update endpoint
"""
@app.route('/update/<path:domain>', methods = ['POST'])
@app.route('/update/<path:domain>', methods=['POST'])
def update(domain):
global domains
data = {'route': '/update', 'template': None}
Expand All @@ -496,7 +507,7 @@ def update(domain):
if domain not in domains:
data['result'] = {'error': 'Domain not allowed!'}
return formatResponse(data, 403)

domain_id = domain.replace('.', '').replace(':', '').replace('/', '').encode('utf-8')
data['domain'] = domain.encode('utf-8')
url = 'http://%(domain)s/search.tsv' % data
Expand Down Expand Up @@ -588,4 +599,3 @@ def nl2br(value):
"""
if __name__ == '__main__':
app.run(threaded=False, host='0.0.0.0', port=8000)

0 comments on commit 20175c9

Please sign in to comment.