-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_analyzer.py
374 lines (301 loc) · 13.4 KB
/
text_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
import json
import re
import time
import logging
from typing import List, Tuple, Dict
class TextAnalyzer:
"""
A class for analyzing text content and generating visual style guidelines.
This class processes text through multiple stages:
1. Content filtering
2. Context analysis
3. Segment analysis
4. Visual concept generation
Uses either OpenAI or Anthropic LLMs for text analysis.
"""
# Prompt template for overall style guide generation
CONTEXT_PROMPT = '''
You are a visual art director analyzing text to create a cohesive visual narrative.
YOUR TASK:
Analyze the full text and create a consistent visual style guide that will unify all images.
FULL TEXT:
{text}
RESPOND WITH A JSON OBJECT CONTAINING:
{{
"overall_theme": "Main thematic elements and emotional tone",
"visual_style": {{
"art_style": "Consistent artistic approach",
"color_palette": "3-5 key colors that represent the theme",
"composition": "Preferred composition guidelines",
"lighting": "Lighting style that matches the mood",
"symbolic_elements": ["Recurring symbols or motifs to use"],
"environment": "Common environmental elements"
}},
"mood_progression": {{
"start": "Opening emotional tone",
"middle": "Development of mood",
"end": "Concluding emotional tone"
}}
}}
'''
# Prompt template for individual segment analysis
SEGMENT_PROMPT = '''
You are a visual scene creator working within an established style guide.
STYLE GUIDE:
{style_guide}
TEXT TO VISUALIZE:
{text}
CREATE A SCENE THAT:
1. Follows the established visual style
2. Uses the defined color palette
3. Incorporates recurring symbolic elements
4. Maintains consistency with the overall theme
5. Reflects the appropriate mood for this point in the narrative
RESPOND WITH A JSON ARRAY:
[
{{
"summary": "Brief description of this moment",
"prompt": "Detailed DALL-E prompt incorporating style guide elements",
"style_notes": "Specific style considerations for this scene"
}}
]
'''
def __init__(self, config: dict, verbose: bool = False):
"""
Initialize the TextAnalyzer with configuration.
Required config structure:
{
"text_analysis": {
"provider": str, # "openai" or "anthropic"
"api_key": str, # Required for chosen provider
"model": str, # Model name to use
"max_tokens": int, # Maximum tokens for LLM response
"max_segments": int # Target number of segments to generate
}
}
Args:
config (dict): Configuration dictionary
verbose (bool, optional): Enable detailed logging. Defaults to False.
"""
self.config = config
self.verbose = verbose
# Setup logging
logging.basicConfig(
level=logging.INFO if verbose else logging.WARNING,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('text_analyzer.log'),
logging.StreamHandler()
]
)
self.logger = logging.getLogger(__name__)
def filter_text(self, text: str) -> str:
"""
Filter sensitive content from text.
Args:
text (str): Raw input text
Returns:
str: Text with sensitive content replaced by appropriate placeholders
"""
profanity = {
# Korean profanity mappings
'ㅅㅂ': '[expression]',
'ㅆㅂ': '[expression]',
'시발': '[expression]',
'씨발': '[expression]',
'병신': '[expression]',
'지랄': '[expression]',
'좆': '[expression]',
'존나': '[very]',
'니미': '[expression]',
'엿': '[expression]',
# English profanity mappings
'fuck': '[expression]',
'shit': '[expression]',
'damn': '[expression]',
'bitch': '[person]',
'bastard': '[person]',
'ass': '[expression]',
# Common variants
'f*ck': '[expression]',
's*it': '[expression]',
'f***': '[expression]',
'sh*t': '[expression]',
'b*tch': '[person]',
}
filtered_text = text.lower()
for word, replacement in profanity.items():
pattern = re.compile(re.escape(word), re.IGNORECASE)
filtered_text = pattern.sub(replacement, filtered_text)
return filtered_text
def analyze_context(self, segments: List[dict]) -> dict:
"""
Analyze the overall context and generate visual style guide.
Args:
segments (List[dict]): List of text segments with keys:
- text (str): Segment content
- start (float): Start time
- end (float): End time
Returns:
dict: Style guide containing:
- overall_theme (str): Main theme
- visual_style (dict): Art style, colors, composition, etc.
- mood_progression (dict): Emotional progression
"""
self.logger.info(f"[Context] Starting analysis of {len(segments)} segments")
self.logger.info("[Context] Filtering and combining text...")
filtered_texts = []
total_chars = 0
for i, segment in enumerate(segments, 1):
filtered_text = self.filter_text(segment["text"])
filtered_texts.append(filtered_text)
total_chars += len(filtered_text)
if i % 10 == 0:
self.logger.info(f"[Context] Processed {i}/{len(segments)} segments")
combined_text = " ".join(filtered_texts)
self.logger.info(f"[Context] Combined text: {total_chars} characters")
self.logger.info(f"[Context] Using {self.config['text_analysis']['provider'].upper()} model: {self.config['text_analysis']['model']}")
response = self._get_llm_response(self.CONTEXT_PROMPT.format(text=combined_text))
style_guide = json.loads(response)
self.logger.info("\n=== Style Guide Generated ===")
self.logger.info(f"Theme: {style_guide['overall_theme'][:50]}...")
self.logger.info(f"Art Style: {style_guide['visual_style']['art_style']}")
self.logger.info(f"Color Palette: {style_guide['visual_style']['color_palette']}")
return style_guide
def analyze_segment(self, segment: dict, style_guide: dict) -> List[dict]:
"""
Generate visual concepts for a specific text segment.
Args:
segment (dict): Text segment with keys:
- text (str): Segment content
- start (float): Start time
- end (float): End time
style_guide (dict): Visual style guidelines
Returns:
List[dict]: List of visual concepts, each containing:
- summary (str): Scene description
- prompt (str): DALL-E generation prompt
- style_notes (str): Additional style guidance
"""
self.logger.info(f"[Segment] Processing segment ({len(segment['text'])} chars)")
filtered_text = self.filter_text(segment["text"])
self.logger.info("[Segment] Text filtered, generating concepts...")
formatted_prompt = self.SEGMENT_PROMPT.format(
style_guide=json.dumps(style_guide, indent=2),
text=filtered_text
)
response = self._get_llm_response(formatted_prompt)
concepts = json.loads(response)
self.logger.info(f"[Segment] Generated {len(concepts)} concepts")
for i, concept in enumerate(concepts, 1):
self.logger.info(f"[Segment] Concept {i}: {concept['summary'][:50]}...")
return concepts
def select_segments(self, segments: List[dict], max_segments: int) -> List[dict]:
"""
Merge segments into specified number of larger segments.
Args:
segments (List[dict]): Original text segments
max_segments (int): Target number of segments
Returns:
List[dict]: Merged segments with original timing and combined text
"""
if len(segments) <= max_segments:
return segments
total_segments = len(segments)
segments_per_group = total_segments // max_segments
merged_segments = []
start_idx = 0
for i in range(max_segments):
if i == max_segments - 1:
group = segments[start_idx:]
else:
end_idx = start_idx + segments_per_group
group = segments[start_idx:end_idx]
start_idx = end_idx
merged_segment = {
"start": group[0]["start"],
"end": group[-1]["end"],
"text": " ".join(seg["text"] for seg in group)
}
merged_segments.append(merged_segment)
self.logger.info(f"\n--- Segment Group {i+1}/{max_segments} ---")
self.logger.info(f"Combined {len(group)} segments")
self.logger.info(f"Time range: {merged_segment['start']:.1f}s - {merged_segment['end']:.1f}s")
self.logger.info(f"Text length: {len(merged_segment['text'])} chars")
return merged_segments
def process_content(self, segments: List[dict]) -> Tuple[dict, List[dict], List[dict]]:
"""
Process all content through the complete analysis pipeline.
Args:
segments (List[dict]): Text segments to process
Returns:
Tuple containing:
- dict: Visual style guide
- List[dict]: Generated visual concepts
- List[dict]: Selected/merged segments used for generation
"""
self.logger.info("\n=== Starting Content Analysis Pipeline ===")
self.logger.info(f"Total segments: {len(segments)}")
# Generate style guide
self.logger.info("\n=== Phase 1: Style Guide Generation ===")
style_guide = self.analyze_context(segments)
# Generate visual concepts
self.logger.info("\n=== Phase 2: Visual Concept Generation ===")
# Select segments for visualization
selected_segments = self.select_segments(
segments,
self.config["text_analysis"]["max_segments"]
)
self.logger.info(f"Selected {len(selected_segments)} segments")
all_concepts = []
for i, segment in enumerate(selected_segments, 1):
self.logger.info(f"\n--- Processing Segment {i}/{len(selected_segments)} ---")
self.logger.info(f"Time range: {segment['start']:.2f}s - {segment['end']:.2f}s")
concepts = self.analyze_segment(segment, style_guide)
all_concepts.extend(concepts)
self.logger.info(f"Total concepts: {len(all_concepts)}")
self.logger.info("\n=== Content Analysis Complete ===")
self.logger.info("Style guide generated")
self.logger.info(f"Total concepts: {len(all_concepts)}")
return style_guide, all_concepts, selected_segments
def _get_llm_response(self, prompt: str) -> str:
"""
Get response from configured language model.
Args:
prompt (str): Prompt to send to LLM
Returns:
str: LLM response text
Raises:
Exception: If LLM request fails
"""
text_config = self.config["text_analysis"]
self.logger.info(f"[LLM] Request to {text_config['provider'].upper()}")
self.logger.info(f"[LLM] Max tokens: {text_config['max_tokens']}")
start_time = time.time()
try:
if text_config["provider"] == "anthropic":
response = text_config["client"].messages.create(
model=text_config["model"],
max_tokens=text_config["max_tokens"],
messages=[{
"role": "user",
"content": prompt
}],
temperature=0.1
)
result = response.content[0].text.strip()
else: # OpenAI
response = self.openai_client.chat.completions.create(
model=text_config["model"],
messages=[{"role": "user", "content": prompt}],
max_tokens=text_config["max_tokens"],
temperature=0.1
)
result = response.choices[0].message.content
elapsed_time = time.time() - start_time
self.logger.info(f"[LLM] Response received in {elapsed_time:.2f}s")
self.logger.info(f"[LLM] Response length: {len(result)} chars")
return result
except Exception as e:
self.logger.error(f"[LLM] Error: {str(e)}")
raise