-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdefault.yaml
87 lines (71 loc) · 2.69 KB
/
default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
pipeline_config:
artifact_format: .jsonl
save_frequency: 100
save_dir: tests/outputs/captioning/gbc_wiki_images
save_callback: null
save_images: False
batch_query: False
batch_size: 32 # ignored if batch_query is False
include_entity_query: True
include_composition_query: True
include_relation_query: True
mask_inside_threshold: 0.85
shared:
suitable_for_detection_func:
_target_: gbc.texts.text_helpers.suitable_for_detection
_partial_: true
potential_same_object_func:
_target_: gbc.texts.text_helpers.potential_same_object
_partial_: true
detection_model:
_target_: gbc.captioning.detection.grounding_dino.GroundingDinoDetection
model_name: IDEA-Research/grounding-dino-tiny
pixtral:
model_name: nm-testing/pixtral-12b-FP8-dynamic
gpu_memory_utilization: 0.8 # can be set to 0.3 if you have 80GB GPU memory
queries:
image_query:
_target_: gbc.captioning.mllm.pixtral.PixtralImageQuery
model_kwargs: ${...shared.pixtral}
system_file: prompts/captioning/system_image.txt
query_file: prompts/captioning/query_image.txt
suitable_for_detection_func: ${...shared.suitable_for_detection_func}
entity_query:
_target_: gbc.captioning.mllm.pixtral.PixtralEntityQuery
model_kwargs: ${...shared.pixtral}
system_file: prompts/captioning/system_entity.txt
query_file: prompts/captioning/query_entity.txt
suitable_for_detection_func: ${...shared.suitable_for_detection_func}
potential_same_object_func: ${...shared.potential_same_object_func}
query_kwargs:
max_tokens: 512
relation_query:
_target_: gbc.captioning.mllm.pixtral.PixtralRelationQuery
model_kwargs: ${...shared.pixtral}
system_file: prompts/captioning/system_relation.txt
query_file: prompts/captioning/query_relation.txt
composition_query:
_target_: gbc.captioning.mllm.pixtral.PixtralCompositionQuery
model_kwargs: ${...shared.pixtral}
system_file: prompts/captioning/system_composition.txt
query_file: prompts/captioning/query_composition.txt
detection_from_image:
_target_: gbc.captioning.detection.detection_action.DetectionAction
detection_model: ${...shared.detection_model}
score_threshold: 0.35 # 0.05
nms_single_threshold: 0.05
nms_multiple_threshold: 0.2
select_high_conf_tolerance: 0.05
topk: 6
min_abs_area: 5000
max_rel_area: null
detection_from_entity:
_target_: gbc.captioning.detection.detection_action.DetectionAction
detection_model: ${...shared.detection_model}
score_threshold: 0.35 # 0.05
nms_single_threshold: 0.05
nms_multiple_threshold: 0.2
select_high_conf_tolerance: 0.05
topk: 6
min_abs_area: 5000
max_rel_area: 0.8