-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathchatbot.py
executable file
·1548 lines (1338 loc) · 68.2 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# PA6, CS124, Stanford, Winter 2018
# v.1.0.2
# Original Python code by Ignacio Cases (@cases)
#
#TODOS: be snobby if they change
#####################################################################import csv
import math
import re
import csv
import copy
# For time testing
import time
import numpy as np
import heapq
from movielens import ratings
from random import randint
from PorterStemmer import PorterStemmer
# IGNORE THIS STUFF
caps = "([A-Z])"
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"
# FOUND IT TO SPLIT SENTENCES
class Chatbot:
"""Simple class to implement the chatbot for PA 6."""
#############################################################################
# `moviebot` is the default chatbot. Change it to your chatbot's name #
#############################################################################
def __init__(self, is_turbo=False):
self.NUMBER_TILL_REC = 5
self.name = 'moviebot'
#flags
self.is_turbo = is_turbo
self.is_repeat = False
self.selection = False
self.quotationFound = False
self.unknown_movie = False
# When a movie is talked about more than once
self.repeatedMovie = False
self.newSentiment = None
self.repeatedIndx = -1
# Flags for previous referencing
self.no_sentiment = False
self.previous_sentiment = None
self.previous_movie = None
self.spellChecking = False
self.DONOTTOUCHME_TOY_STORY = False
self.spellCheckPerformed1 = False # flag for confirmation of movie title
self.spellCheckPerformed2 = False # flag for yes/no from user
self.spell_check_sent = None
self.spell_check_index = None
self.spell_check_input = None
# Flags for recommending movies
self.get_recommend_date = False
self.get_recommend_genre = False
self.date_range = None
self.give_rec = False
self.use_date_range = False
self.use_genre = False
self.genre = None
self.sentiment = {}
self.usr_rating_vec = []
self.numRatings = 5
self.numRecs = 3
self.movie_count = 0
self.read_data()
self.p = PorterStemmer()
self.stemLexicon()
self.binarize()
self.negations = open("deps/negations.txt", "r").read().splitlines()
self.punctuations = open('deps/punctuation.txt', "r").read().splitlines()
self.strong_neg = open('deps/strong_neg_words.txt', "r").read().splitlines()
self.strong_pos = open('deps/strong_pos_words.txt', "r").read().splitlines()
self.intensifiers = open('deps/intensifiers.txt', "r").read().splitlines()
self.jokes = open('deps/jokes.txt', "r").read().splitlines()
self.stemPos_Neg_Words()
#############################################################################
# 1. WARM UP REPL
#############################################################################
def greeting(self):
"""chatbot greeting message"""
#############################################################################
# TODO: Change name of moviebot? keep plus?
#############################################################################
greeting_message = ("Hi! I'm " + self.name + "! I'm going to recommend a movie to you. \n"
"First I will ask you about your taste in movies. Tell me about a movie that you have seen.")
#############################################################################
# END OF YOUR CODE #
#############################################################################
return greeting_message
def goodbye(self):
"""chatbot goodbye message"""
#############################################################################
# TODO: Write a short farewell message #
#############################################################################
goodbye_message = 'Have a nice day! It was great chatting!'
#############################################################################
# END OF YOUR CODE #
#############################################################################
return goodbye_message
#############################################################################
# 2. Modules 2 and 3: extraction and transformation #
#############################################################################
def process(self, input):
# For debug
#print input
"""Takes the input string from the REPL and call delegated functions
that
1) extract the relevant information and
2) transform the information into a response to the user
"""
#############################################################################
# TODO: Implement the extraction and transformation in this method, possibly#
# calling other functions. Although modular code is not graded, it is #
# highly recommended #
#############################################################################
# User decides how to continue or quit the chatbot after recommendations are given
if self.is_repeat: return self.getRepeatResponse(input)
# Deal with repeated talking about movies
if self.repeatedMovie: return self.updateResponse(input)
# See if we are responding to spell check
if self.spellCheckPerformed2: return self.spellCheckFeedback(input)
# Get whether they want a date range for their rec
#if self.get_recommend_date: response = self.recommend_date(input)
if self.get_recommend_date: return self.recommend_date(input)
if self.get_recommend_genre: response = self.recommend_genre(input)
# Give the recommendation!
if self.give_rec:
continue_response = 'Hope these recommendations help! Please choose one of the options below by typing 1, 2, or 3.\n'
continue_response += '1. Quit\n'
continue_response += '2. Add additional movie ratings for more recommendations.\n'
continue_response += '3. Restart with new ratings for new recommendations.'
self.is_repeat = True
self.give_rec = False
return response + '\n' + 'Here\'s what I\'ve got for you:\n' + self.getRec() + '\n' + continue_response
# Process movie title
temp = self.processTitle(input)
#print "Temp: " + str(temp)
movie_tag = temp[0]
old_input = input
input = temp[1]
# Get the flag indicating success of process Title
movie_flag = movie_tag[1]
if movie_flag == -1: # No movies found
if self.no_sentiment and self.sentimentForPreviousMention(old_input): # Try to see if we can use previous info
# Function to check for previous movie reference
sentiment = self.sentimentClass(old_input) # We have to worry maybe if they still have no sentiment
response = self.processMovieAndSentiment(sentiment, self.previous_movie, old_input)
self.no_sentiment = False
elif self.no_sentiment:
return "Hm, unfortunately I still can't tell how you feel about \"" + self.titles[self.previous_movie][0] + "\". Could you fill me in?"
elif self.unknown_movie:
# Handle arbitrary input
arbResp = self.getArbitraryResponse(input)
if arbResp != None: return arbResp
responses = []
responses.append("Hey, let's chat about movies!")
responses.append("Let's get back to talking about movies!")
return responses[randint(0, len(responses)-1)]
else:
arbResp = self.getArbitraryResponse(input)
if arbResp != None: return arbResp
self.unknown_movie = True
return self.noMovieResponse()
elif movie_flag == 1: # Movie found
movie_title = movie_tag[0]
movie_indexes = self.isMovie(movie_title)
# Added
self.quotationFound = False
if len(movie_indexes) != 0: # Good movie!
# Undo ceratin flags!
self.unknown_movie = False
# Need to encorperate the sentiment
#self.usr_rating_vec.append((movie_index, 1))
#response = "Sentiment for " + movie + " is " + self.sentimentClass(input)
# We have received a valid movie so we have to extract sentiment,
# record the movie rating based on sentiment, and respond reflecting
# the sentiment.
response = ''
sentiment = self.sentimentClass(input)
movie_index = self.getMovieIndex(movie_indexes)
# Check if movie index is already been seen
location_already_discussed = -1
for i in range(len(self.usr_rating_vec)):
if self.usr_rating_vec[i][0] == movie_index:
location_already_discussed = i
break
if (location_already_discussed != -1):
# Compare the sentiment
response = self.redundantInfo(sentiment, self.usr_rating_vec[i][2])
self.newSentiment = sentiment
self.repeatedIndx = location_already_discussed
elif (movie_index != None):
# Check if movies were spell checked
if self.spellCheckPerformed1:
title = self.titles[movie_index][0]
self.spellCheckPerformed1 = False
self.spellCheckPerformed2 = True
self.spell_check_sent = sentiment
self.spell_check_index = movie_index
self.spell_check_input = old_input
return "Did you mean the movie \"" + title + "\"?"
response = self.processMovieAndSentiment(sentiment, movie_index, old_input)
else:
response = "Ok, tell me about another movie."
else: # Unknown movie
if self.no_sentiment and self.sentimentForPreviousMention(old_input): # Try to see if we can use previous info
# Function to check for previous movie reference
sentiment = self.sentimentClass(old_input) # We have to worry maybe if they still have no sentiment
response = self.processMovieAndSentiment(sentiment, self.previous_movie, old_input)
self.no_sentiment = False
elif self.no_sentiment:
return "Hm, unfortunately I still can't tell how you feel about \"" + self.titles[self.previous_movie][0] + "\". Could you fill me in?"
else:
# Handle arbitrary input
arbResp = self.getArbitraryResponse(old_input)
if arbResp != None: return arbResp
if self.unknown_movie:
return "Darn, I can't seem to remember that movie. Sorry about that! I promise I'll know the next one."
self.unknown_movie = True
return "Unfortunately I have never seen that movie, but I would love to hear about other movies that you have seen."
else:
return "Please tell me about one movie at a time. Go ahead."
#print len(self.usr_rating_vec)
if (len(self.usr_rating_vec) == self.numRatings):
self.get_recommend_date = True
responses = []
responses.append('I think I am getting to know you a bit better, and I want to blow you away with some amazing movie recommendations. ')
responses.append('Alright, I am ready to give you some movie recommendations! ')
responses.append('Get ready for the big movie recommendations reveal! ')
responses.append('Almost ready to give you your recommendations! ')
responses.append('Now I think I have a good sense of some movies you would love. ')
recommend_response = responses[randint(0, len(responses)-1)]
recommend_response += 'First, though, would you like movies from a specific time period? e.g. ranges (2000-2005 or 2000+ or no).'
# Return our response plus our recommendation
return response + '\n' + recommend_response
return response
def getRec(self):
recommendations = self.recommend(self.usr_rating_vec)
movies_to_recommend = ''
if self.use_genre and self.use_date_range:
movie_count = 0
while movie_count < self.numRecs and len(recommendations) > 0:
movie_id = heapq.heappop(recommendations)[1]
genres = self.titles[movie_id][1].lower()
movie = self.titles[movie_id][0]
date = re.findall(r'(\d\d\d\d)', movie)
if len(date) > 0:
date = int(date[0])
else:
date = 3001 # Out of max range
if date >= int(self.date_range[0]) and date <= int(self.date_range[1]) and self.genre.lower() in genres:
#print 'here'
movie_count += 1
movies_to_recommend += str(movie_count) + ') ' + movie + '\n'
elif self.use_genre:
movie_count = 0
while movie_count < self.numRecs and len(recommendations) > 0:
movie_id = heapq.heappop(recommendations)[1]
genres = self.titles[movie_id][1].lower()
movie = self.titles[movie_id][0]
if self.genre.lower() in genres:
movie_count += 1
movies_to_recommend += str(movie_count) + ') ' + movie + '\n'
elif self.use_date_range:
movie_count = 0
while movie_count < self.numRecs and len(recommendations) > 0:
movie = self.titles[heapq.heappop(recommendations)[1]][0]
date = re.findall(r'(\d\d\d\d)', movie)
if len(date) > 0:
date = int(date[0])
else:
date = 3001 # Out of max range
if date >= int(self.date_range[0]) and date <= int(self.date_range[1]):
#print 'here'
movie_count += 1
movies_to_recommend += str(movie_count) + ') ' + movie + '\n'
else:
for i in range(self.numRecs):
movies_to_recommend += str(i + 1) + ') ' + self.titles[heapq.heappop(recommendations)[1]][0] + '\n'
# Re-set flags
self.use_genre = False
self.use_date_range = False
return movies_to_recommend
def spellCheckFeedback(self, input):
no_regex = r'(?:^[Nn]o|^[Nn]ope)'
yes_regex = r'(?:^[Yy]es|^I did )'
self.spellCheckPerformed2 = False
if re.match(yes_regex, input):
words = ["Sweet! ", "Awesome. ", "Thanks! ", "Ok, thank you! ", "Nice. "]
word = words[randint(0, len(words)-1)]
response = "" + word + self.processMovieAndSentiment(self.spell_check_sent, self.spell_check_index, self.spell_check_input)
if len(self.usr_rating_vec) < self.numRatings: response += self.getAddRequest()
# Add recommendation if enough ratings
if (len(self.usr_rating_vec) == self.numRatings):
self.get_recommend_date = True
responses = []
responses.append('I think I am getting to know you a bit better, and I want to blow you away with some amazing movie recommendations. ')
responses.append('Alright, I am ready to give you some movie recommendations! ')
responses.append('Get ready for the big movie recommendations reveal! ')
responses.append('Almost ready to give you your recommendations! ')
responses.append('Now I think I have a good sense of some movies you would love. ')
recommend_response = responses[randint(0, len(responses)-1)]
recommend_response += 'First, though, would you like movies from a specific time period? e.g. ranges (2000-2005 or 2000+ or no).'
response += '\n' + recommend_response
# Return our response plus our recommendation
return response
elif re.match(no_regex, input):
return "Oops sorry for misunderstanding your query. Hopefully I'll understand the next movie better!"
else:
self.spellCheckPerformed2 = True
return "Could you clarify with a yes or no?"
def recommend_date(self, input):
no_regex = r'(?:^[Nn]o|^[Nn]ope)'
date_range_regex = r'(\d\d\d\d)-(\d\d\d\d)'
one_date_regex = r'(\d\d\d\d)\+'
self.get_recommend_date = False
self.get_recommend_genre = True
#self.give_rec = True
if re.search(no_regex, input):
responses = []
responses.append("No problem!\nIf there is a particular genre that you want, e.g. (adventure), please enter it. Otherwise, enter \"no\".")
responses.append("No worries!\nIf there is a particular genre that you want, e.g. (adventure), please enter it. Otherwise, enter \"no\".")
responses.append("Ok, thanks!\nIf there is a particular genre that you want, e.g. (adventure), please enter it. Otherwise, enter \"no\".")
return responses[randint(0, len(responses)-1)]
elif re.search(date_range_regex, input):
self.date_range = [re.findall(date_range_regex, input)[0][0], re.findall(date_range_regex, input)[0][1]]
self.use_date_range = True
return 'Awesome! We will take this into consideration.\nIf there is a particular genre that you want, e.g. (adventure), please enter it. Otherwise, enter \"no\"."'
elif re.search(one_date_regex, input):
self.date_range = [re.findall(one_date_regex, input)[0], 3000]
self.use_date_range = True
return 'Awesome! We will take this into consideration.\nIf there is a particular genre that you want, e.g. (adventure), please enter it. Otherwise, enter \"no\"."'
else:
self.get_recommend_date = True
self.get_recommend_genre = False
#self.give_rec = False
return "Sorry, I didn't quite get that. Please enter a response like one of the following formats: 2000-2003, 1995+, no"
def recommend_genre(self, input):
no_regex = r'(?:^[Nn]o|^[Nn]ope)'
self.get_recommend_genre = False
self.give_rec = True
if re.search(no_regex, input):
responses = []
responses.append("No problem!")
responses.append("No worries!")
responses.append("Ok, thanks!")
self.use_genre = False
return responses[randint(0, len(responses)-1)]
else: # Assume input is genre!
self.genre = input
self.use_genre = True
return "Perfect! We can look for movies in this genre."
def updateResponse(self, input):
yes_regex = r'(?:^[Yy]es|^I do )'
no_regex = r'(?:^[Nn]o|^[Nn]ope)'
# Check if they respond yes and want to update
if re.search(yes_regex, input):
if self.newSentiment == 'pos':
self.usr_rating_vec[self.repeatedIndx] = (self.usr_rating_vec[self.repeatedIndx][0], .5, self.newSentiment)
elif self.newSentiment == 'neg':
self.usr_rating_vec[self.repeatedIndx] = (self.usr_rating_vec[self.repeatedIndx][0], -.5, self.newSentiment)
elif self.newSentiment == 'str_pos':
self.usr_rating_vec[self.repeatedIndx] = (self.usr_rating_vec[self.repeatedIndx][0], 1, self.newSentiment)
elif self.newSentiment == 'str_neg':
self.usr_rating_vec[self.repeatedIndx] = (self.usr_rating_vec[self.repeatedIndx][0], -1, self.newSentiment)
self.repeatedMovie = False
return "Got it, thanks! I just updated your opinion. Let's hear about another movie."
elif re.search(no_regex, input): # Check if they want to keep it as was
self.repeatedMovie = False
return "Sounds good, I agree with your first assessment! What's next?"
else: # Unclear answer
return "Sorry, I am not quite sure if you would like me to update your preference?"
def redundantInfo(self, sentiment, old_sentiment):
if sentiment == old_sentiment or sentiment == 'none' or sentiment == 'unclear':
if old_sentiment == 'pos': return "Right, we talked about this movie earlier! You mentioned that you liked this movie."
elif old_sentiment == 'neg': return "Right, we talked about this movie earlier! You mentioned that you didn't like this movie."
elif old_sentiment == 'str_pos': return "Right, we talked about this movie earlier! You loved it!"
else: return "Right, we talked about this movie earlier! You hated it!"
else:
self.repeatedMovie = True
if old_sentiment == 'pos': return "Hm, earlier you mentioned that you liked this movie. Do you want to change your opinion?"
elif old_sentiment == 'neg': return "Interesting, earlier you said that you disliked this movie. Do you want to change your opinion?"
elif old_sentiment == 'str_pos': return "I though you loved this movie, do you want me to update how you feel about this movie?"
else: return "I though you hated this movie, do you want me to update how you feel about this movie?"
def sentimentForPreviousMention(self, input):
it_regex = r'(?:^|[\W])[iI]t(?:$|[\W])'
that_movie_regex = r'((?:^|[\W])[tT]hat movie(?:$|[\W]))'
# Look for reference to previous said movie
if re.search(it_regex, input) or re.search(that_movie_regex, input):
return True
return False
def useSentimentFromPrevious(self, input):
opposite_regex = r'(?:^But not )'
same_begin_regex = r'(?:And | Also | Plus)'
same_in_regex = r'(?: same (.*?) that(?:$|\W)| similar (.*?) that(?:$|\W))'
if re.search(same_begin_regex, input) or re.search(same_in_regex, input):
return 'same'
elif re.search(opposite_regex, input):
return 'op'
else:
return 'UNK'
def getRepeatResponse(self, input):
if input == '1':
return "Please type \":quit\""
elif input == '2':
self.is_repeat = False
self.numRatings += 3
return "Ok, let's continue! Please tell me about another movie you've seen."
elif input == '3':
self.is_repeat = False
self.usr_rating_vec = []
self.numRatings = self.NUMBER_TILL_REC
return "Great! Let's explore some new movies. Just like before, what are some movies I can base my recommendation off of?"
else:
return "I'm sorry, I don't understand your input. Please enter a number 1, 2, or 3."
def getArbitraryResponse(self, input):
input = input.lower()
input = re.sub(r'[!.?]', r'', input)
#print "input: " + input
q0 = r'^hi|hello'
q2 = r'what(?:\'s | is )your name'
q7 = r'who are you'
q4 = r'do you love me'
q6 = r'tell me a joke'
q1 = r'(?:how)?(\'s | is | are )(you|it)(?: doing| going)?(?: ok| well)?'
q3 = r'how(?:\'s | is | has | was )your (?:day|night|evening|morning|afternoon)'
q5 = r'^no\.?$'
q10 = r'^can ([^?]*)(?:\?)?'
basicQ1 = r'^(can|what|where|why|how|are|aren\'t) ([^?]*)(?:\?)?'
basicQ2 = r'\?$'
r0 = re.findall(q0, input)
if len(r0) != 0:
return "Hello! Tell me about a movie you've seen."
r2 = re.findall(q2, input)
r7 = re.findall(q7, input)
if len(r2) != 0 or len(r7) != 0:
return "My name is " + self.name + ". Now what is a movie you have an opinion about?"
r4 = re.findall(q4, input)
if len(r4) != 0:
return "Yes, I love everyone. Now I know there are some movies you love - tell me about one."
r1 = re.findall(q1, input)
if len(r1) != 0:
responses = []
responses.append("I am well, but I would be even better if you told me about a movie.")
responses.append("I'm fine. Is there a movie you can tell me about?")
responses.append("I'm great! Can you tell me about a movie you have seen?")
return responses[randint(0, len(responses)-1)]
r3 = re.findall(q3, input)
if len(r3) != 0:
return "It has been good! Let's talk about some movies now."
r5 = re.findall(q5, input)
if len(r5) != 0:
return "Yes, please."
r6 = re.findall(q6, input)
if len(r6) != 0:
return self.jokes[randint(0, len(self.jokes) - 1)]
# r10 = re.findall(q10, input)
# if len(r10) != 0: return "I don't know, can " + r10[0] + "?"
rbasic1 = re.findall(basicQ1, input)
if len(rbasic1) != 0:
responses = []
responses.append("Hey, I'm the one asking the questions here! What is your opinion on a movie you have seen?")
responses.append("Enough questions, let's get to the movies! Can you tell about one you have seen?")
responses.append("I'll have to think about that. In the meantime, let's talk about some movies.")
responses.append("I don't know, " + str(rbasic1[0][0]) + " " + str(rbasic1[0][1]) + "?")
responses.append("I don't know, " + str(rbasic1[0][0]) + " " + str(rbasic1[0][1]) + "?")
responses.append("I don't know, " + str(rbasic1[0][0]) + " " + str(rbasic1[0][1]) + "?")
return responses[randint(0, len(responses)-1)]
rbasic2 = re.findall(basicQ2, input)
if len(rbasic2) != 0:
responses = []
responses.append("Hey, I'm the one asking the questions here! What is your opinion on a movie you have seen?")
responses.append("Enough questions, let's get to the movies! Can you tell about one you have seen?")
responses.append("I'll have to think about that. In the meantime, let's talk about some movies.")
return responses[randint(0, len(responses)-1)]
return None
def processMovieAndSentiment(self, sentiment, movie_index, input):
self.previous_movie = movie_index
response = ''
if sentiment == 'pos':
self.no_sentiment = False
self.usr_rating_vec.append((movie_index, .5, 'pos'))
self.previous_sentiment = 'pos'
response = self.getPosResponse(movie_index)
if len(self.usr_rating_vec) < self.numRatings: response += self.getAddRequest()
return response
elif sentiment == 'str_pos':
self.no_sentiment = False
self.usr_rating_vec.append((movie_index, 1, 'str_pos'))
self.previous_sentiment = 'str_pos'
response = self.getStrPosResponse(movie_index)
if len(self.usr_rating_vec) < self.numRatings: response += self.getAddRequest()
return response
elif sentiment == 'neg':
self.no_sentiment = False
self.usr_rating_vec.append((movie_index, -.5, 'neg'))
self.previous_sentiment = 'neg'
response = self.getNegResponse(movie_index)
if len(self.usr_rating_vec) < self.numRatings: response += self.getAddRequest()
return response
elif sentiment == 'str_neg': # Don't yet deal with changing the rating
self.no_sentiment = False
self.usr_rating_vec.append((movie_index, -1, 'str_neg'))
self.previous_sentiment = 'str_neg'
response = self.getStrNegResponse(movie_index)
if len(self.usr_rating_vec) < self.numRatings: response += self.getAddRequest()
return response
elif sentiment == 'none':
#self.previous_movie = movie_index
check_previous = self.useSentimentFromPrevious(input)
if (check_previous == 'same') and self.previous_sentiment != None: # Test edge case
return self.processMovieAndSentiment(self.previous_sentiment, movie_index, input)
elif(check_previous == 'op'):
negate = ''
if self.previous_sentiment == 'pos': negate = 'neg'
elif self.previous_sentiment == 'neg': negate = 'pos'
elif self.previous_sentiment == 'str_pos': negate = 'neg' # Do we keep same
elif self.previous_sentiment == 'str_neg': negate = 'pos'
return self.processMovieAndSentiment(negate, movie_index, input)
else:
self.no_sentiment = True
response = self.getNoneResponse(movie_index)
if len(self.usr_rating_vec) < self.numRatings: response += self.getAddRequest()
return response
else: # Unclear sentiment
# Try to see if they are referencing previous shit
# Meaning that we have not been able to extract sentiment. They could
# now reference previous info
self.no_sentiment = True
return self.getUnclearResponse(movie_index)
def getMovieIndex(self, movie_indexes):
if len(movie_indexes) > 1:
#TODO: GET STuck in while loop asking for choice
movie = self.askForSelection(movie_indexes)
if movie != None: return movie
else: return None
else:
return movie_indexes[0]
###########################################################
###### RESPONSES ######
###########################################################
def getAddRequest(self):
addRequests = []
addRequests.append(" What are some other movies you have seen?")
addRequests.append(" What about another movie?")
addRequests.append(" What's next?")
addRequests.append(" Let's hear about another one.")
addRequests.append(" Tell me about another one!")
addRequests.append(" I'd love to here about other movies you have seen.")
addRequests.append(" Any other movies you have an opinion about?")
addRequests.append(" Can you tell me about another movie?")
addRequests.append(" Tell me about another movie you have seen.")
addRequests.append(" Is there another movie you can tell me about?")
return addRequests[randint(0, len(addRequests)-1)]
def noMovieResponse(self):
responses = []
responses.append("I'm sorry, I'm not sure what you mean. Tell me about a movie.")
responses.append("Sorry, I don't quite understand. Tell me about a movie that you have seen.")
responses.append("Let's get back to movies - I'd love to hear your opinion on one.")
return responses[randint(0, len(responses)-1)]
def getStrPosResponse(self, movie_index):
responses = []
responses.append("Awesome, you really liked \"" + self.titles[movie_index][0] + "\"!")
responses.append("Great choice! That is an amazing movie. \"" + self.titles[movie_index][0] + "\".")
responses.append("You loved \"" + self.titles[movie_index][0] + "\"!")
responses.append("\"" + self.titles[movie_index][0] + "\" is a fantastic movie!!")
responses.append("You were a huge fan of \"" + self.titles[movie_index][0] + "\"!")
return responses[randint(0, len(responses)-1)]
def getStrNegResponse(self, movie_index):
responses = []
responses.append("So you really disliked \"" + self.titles[movie_index][0] + "\".")
responses.append("You hated \"" + self.titles[movie_index][0] + "\"! Thanks for the heads up.")
responses.append("I see you really weren't a fan of \"" + self.titles[movie_index][0] + "\".")
return responses[randint(0, len(responses)-1)]
def getPosResponse(self, movie_index):
responses = []
responses.append("You liked \"" + self.titles[movie_index][0] + "\". Thank you!")
responses.append("Ok, you enjoyed \"" + self.titles[movie_index][0] + "\".")
responses.append("Great! I'm glad you liked \"" + self.titles[movie_index][0] + "\".")
return responses[randint(0, len(responses)-1)]
def getNegResponse(self, movie_index):
responses = []
responses.append("You did not like " + self.titles[movie_index][0] + ". Thank you!")
responses.append("Ok, you disliked \"" + self.titles[movie_index][0] + "\".")
responses.append("I'm sorry you did not enjoy \"" + self.titles[movie_index][0] + "\".")
return responses[randint(0, len(responses)-1)]
def getNoneResponse(self, movie_index):
responses = []
responses.append("Ok, thank you! Tell me your opinion on \"" + self.titles[movie_index][0] + "\".")
responses.append("What did you think about \"" + self.titles[movie_index][0] + "\"?")
responses.append("Did you like or dislike \"" + self.titles[movie_index][0] + "\"?")
return responses[randint(0, len(responses)-1)]
#TODO: REMEMBER PREVIOUS THING
def getUnclearResponse(self, movie_index):
responses = []
responses.append("I'm sorry, I'm not quite sure if you liked \"" + self.titles[movie_index][0] + "\" Tell me more about \"" + self.titles[movie_index][0] + "\".")
responses.append("I'm sorry, I can't quite tell what your opinion is on \"" + self.titles[movie_index][0] + "\". Can you tell me more?")
responses.append("I'm not certain about your opinion on \"" + self.titles[movie_index][0] + "\". Could you tell me more about it?")
return responses[randint(0, len(responses)-1)]
###########################################################
###### END RESPONSES ######
###########################################################
def processTitle(self, inpt):
# TODO: Expand to allow for no quotation marks
# movies should be clearly in quotations and match our database
movie_regex = r'"(.*?)"'
# Find all the entities
entities = re.findall(movie_regex, inpt)
# No movies found - flag -1
if len(entities) == 0:
#CREATIVE
# find movies not in quotation marks
entity = self.findNonQuotationTitles(inpt)
if len(entity) != 0:
temp = entity
#print "Movie: " + temp
if re.search(r'\(.*\)', temp):
temp = re.sub(r'\(.*\)', "", temp)
inpt = re.sub(temp, "", inpt)
return ((entity, 1), inpt)
# else we still found nothing
return (("", -1), inpt)
elif len(entities) == 1: # One movie found - flag 1
self.quotationFound = False
inpt = re.sub(movie_regex, "", inpt)
return ((entities[0], 1), inpt)
else: # Multiple movies found - flag 2
#TODO: DO SOMETHING WITH THIS
return ((entities, 2), inpt)
def findNonQuotationTitles(self, inpt):
# DOES NOT NEED FIRST LETTER CAPS, IS THAT OKAY?
punctuations = '!.?'
self.quotationFound = False
inpt = re.sub(r'[!.?:]', r'', inpt)
#print "Input:" + inpt
temp2 = inpt.split()
#print "INPUT: " + inpt
inpt = inpt.lower()
entities = []
for entry in self.custom_titles:
titles = re.findall("<>(.*?)</>", entry[0])
for title in titles:
movie_title = title
movie_title = self.removeArticles(movie_title)
movie_title = movie_title.split()
#print "Movie title: " + str(movie_title)
#TODO: Remove punctuations?
for i, word in enumerate(inpt.split()):
if temp2[i][0].isupper() and movie_title[0] == word.lower():
#print "GOT HERE"
temp = ""
for j in range(0, min(len(movie_title), len(inpt.split()) - i)):
#print "INPUTINLOOP" +
#TODO: remove punctuation from movie_title temporarily?
temp_movie_title = re.sub(r'[!.?:]', r'', movie_title[j])
if inpt.split()[i] == temp_movie_title:
temp += " " + temp2[i]
i += 1
else:
break
temp = temp.strip()
entities.append(temp)
if len(entities) == 0:
return ""
self.quotationFound = True
return max(entities, key=len)
"""
# TODO: REMOVE? Don't worry about multiple sentences?
sentences = self.split_into_sentences(inpt)
if len(sentences) == 0:
sentences = [inpt]
for sentence in sentences:
words = sentence.split()
for i in range(len(words), 0, -1):
#TODO: FILL OUT
#print str(sentences)
"""
def edit_distance(self, true_word, query, max_dist):
# If length of titles differ more than max_dist than return max_dist + 1
if abs(len(true_word) - len(query)) > max_dist:
return max_dist + 1
# Create matrix for DP algorithm
# Initialize to all zeros and make dimension (m+1) x (n+1)
# Initialize first row to be 0...M and first col to be 0...M
edit_dist_M = [[(x + i) for i in range(len(query) + 1)] for x in range(len(true_word) + 1)]
# Substitute cost
sub_cost = 1
for j in range(1, len(query) + 1):
for i in range(1, len(true_word) + 1):
cost_del = edit_dist_M[i - 1][j] + 1
cost_ins = edit_dist_M[i][j-1] + 1
# Compute cost of substitution. If letters we are comparing are
# equal we encure no cost
cost_sub = edit_dist_M[i-1][j-1] + (0 if query[j - 1].lower == true_word[i - 1].lower else sub_cost)
edit_dist_M[i][j] = min(cost_del, cost_ins, cost_sub)
return edit_dist_M[len(true_word)][len(query)]
def spellCheck(self, query):
# Indexes to suggest
indices = []
start_time = time.time()
# Try removing the year from query and title!
#query = re.sub(r'\(\d\d\d\d\)', "", movie_title)
query = self.removeArticles(query)
query = self.removeDate(query)
query_words = re.findall(r'\w+', query.lower())
#query_words = query.lower().split()
# Maximum edit distance stuff
#max_edit = len(re.findall(r'\w+', query))
max_edit = len(query_words)
max_edit_word = 2
# Try going word by word through a title and make sure at max one edit away!
# Keep track of all possible titles substrings that are correct spellings
correct_spellings = set()
#for i, v in enumerate(self.titles):
for i, entry in enumerate(self.custom_titles):
titles = re.findall("<>(.*?)</>", entry[0])
for title in titles:
# Handle removing the final date plus any An|The|A that is at very end
#test_title = re.sub(r'((, an \(\d\d\d\d\))|(, the \(\d\d\d\d\))|(, a \(\d\d\d\d\))|(\(\d\d\d\d\)))$', "", v[0].lower())
#test_title = self.removeArticles(v[0].lower())
test_title = self.removeArticles(title)
test_title = self.removeDate(test_title)
# Break the tital into individual words
title_words = re.findall(r'\w+', test_title)
#title_words = test_title.split()
# Includes punction and stuff
#title_actual = test_title.split()
# Allow up to one error per word
# Only consider words in length of query (i.e. allows for disambiguoizing)
#if len(query_words) == len(title_words):
title_substring = ''
# Keep track of the last word seen
last_word = ''
if len(query_words) <= len(title_words):
acceptable_error = True
total_error = 0
#for x in range(len(title_words)):
for x in range(len(query_words)):
# Add the title word to our built up substring
title_substring += title_words[x] + ' '
last_word = title_words[x]
#print title_actual
#title_substring += title_actual[x] + ' '
distance = self.edit_distance(title_words[x], query_words[x], max_edit_word)
total_error += distance
if (distance > max_edit_word or (total_error > max_edit)):# and max_edit != 1)):
#f title_words[x] == 'Scream':#print 'here'
acceptable_error = False
break
# Add the word if has one error per word
if acceptable_error:
#title_substring = title_substring.strip()
# Get the location of the last word that matched as spelling error
# and generate the correclty spelled sequence
title_substring = test_title[0 : test_title.find(last_word) + len(last_word)]
#print title_substring
correct_spellings.add(title_substring)
indices.append(i)
indices_2 = []
for possible_title in correct_spellings:
#self.quotationFound = True
self.spellChecking = True
indices_3 = self.isMovie(possible_title)
#indices_3 = self.isTitleInLevel1(possible_title)
#if len(indices_3) == 0:
#indices_3 = self.isTitleInLevel4(possible_title)
indices_2.extend(indices_3)
#print "Spell check", time.time() - start_time, "to run"
return list(set(indices_2))
def isTitleInLevel1(self, inpt_title):
self.DONOTTOUCHME_TOY_STORY = False
# Check exact match
#print "Level 1 titlesearch"
indices = []
indices = [i for i, v in enumerate(self.custom_titles)
if self.isTitleInLevel1Helper(inpt_title, v[0])]
return indices
def isTitleInLevel1Helper(self, inpt_title, entry):
titles = re.findall("<>(.*?)</>", entry)
for title in titles:
#print "Title: " + title
if self.removeArticles(inpt_title) == self.removeSequel(self.removeSubtitle(self.removeDate(self.removeArticles(title)))):
self.DONOTTOUCHME_TOY_STORY = True
if self.removeArticles(inpt_title) == self.removeArticles(title):
return True
return False
def isTitleInLevel2(self, inpt_title):
# Check but with dates irrelevent
if self.DONOTTOUCHME_TOY_STORY == True:
return []
#print "Level 2 titlesearch"
indices = []
indices = [i for i, v in enumerate(self.custom_titles)
if self.isTitleInLevel2Helper(inpt_title, v[0])]
return indices
def isTitleInLevel2Helper(self, inpt_title, entry):
titles = re.findall("<>(.*?)</>", entry)
for title in titles:
#print "Title: " + title
if self.removeDate(self.removeArticles(inpt_title)) == self.removeDate(self.removeArticles(title)):
return True
return False
def isTitleInLevel3(self, inpt_title):
# account for subtitles
if self.DONOTTOUCHME_TOY_STORY == True:
return []
#print "Level 3 titlesearch"
indices = []
indices = [i for i, v in enumerate(self.custom_titles)
if self.isTitleInLevel3Helper(inpt_title, v[0])]
return indices
def isTitleInLevel3Helper(self, inpt_title, entry):
titles = re.findall("<>(.*?)</>", entry)
for title in titles:
#print "Title: " + title
if self.removeSubtitle(self.removeDate(self.removeArticles(inpt_title))) == self.removeSubtitle(self.removeDate(self.removeArticles(title))):
return True
return False
def isTitleInLevel4(self, inpt_title):
# account for sequels as well
# return []
#print "Level 4 titlesearch"
indices = []
indices = [i for i, v in enumerate(self.custom_titles)
if self.isTitleInLevel4Helper(inpt_title, v[0])]
return indices
def isTitleInLevel4Helper(self, inpt_title, entry):
titles = re.findall("<>(.*?)</>", entry)
for title in titles:
#print "Title: " + title
if self.removeSequel(self.removeSubtitle(self.removeDate(self.removeArticles(inpt_title)))) == self.removeSequel(self.removeSubtitle(self.removeDate(self.removeArticles(title)))):
return True
return False
def isTitleInLevel5(self, inpt_title):
# All bets are off, just substring
if self.quotationFound == True:
return []
#print "Level 5 titlesearch"
indices = []
indices = [i for i, v in enumerate(self.custom_titles)
if self.isTitleInLevel5Helper(inpt_title, v[0])]
return indices
def isTitleInLevel5Helper(self, inpt_title, entry):
titles = re.findall("<>(.*?)</>", entry)
for title in titles:
#print "Title: " + title
if self.removeArticles(title).startswith(self.removeArticles(inpt_title)):
return True
return False
def removeArticles(self, movie_title):
#Preprocess movie_titles: Lowercase; remove a, an, the at beg
# MUST BE CALLED AFTER removeDate
movie_title = movie_title.lower()