forked from Ak-Gautam/indexify-extractors
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextractors.json
32 lines (32 loc) · 2.61 KB
/
extractors.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
[
{"type": "embedding", "module_name": "clip_embedding.openai_clip_extractor:ClipEmbeddingExtractor"},
{"type": "embedding", "module_name": "colbert.colbertv2:ColBERTv2Base"},
{"type": "embedding", "module_name": "e5_embedding.e5_small_v2:E5SmallEmbeddings"},
{"type": "embedding", "module_name": "flag_embedding.bge_base:BGEBase"},
{"type": "embedding", "module_name": "hash-embedding.identity_hash_embedding:IdentityHashEmbedding"},
{"type": "embedding", "module_name": "jina_base_en.jina_base_en:JinaEmbeddingsBase"},
{"type": "embedding", "module_name": "minilm-l6.minilm_l6:MiniLML6Extractor"},
{"type": "embedding", "module_name": "mpnet.mpnet_base_v2:MPNetV2"},
{"type": "embedding", "module_name": "openai-embedding.openai_embedding:OpenAIEmbeddingExtractor"},
{"type": "embedding", "module_name": "scibert.scibert_uncased:SciBERTExtractor"},
{"type": "audio", "module_name": "whisper-asr.whisper_extractor:WhisperExtractor"},
{"type": "audio", "module_name": "whisper-mlx.whisper_extractor:WhisperExtractor", "skip_deploy": true},
{"type": "audio", "module_name": "whisper-diarization.whisper_diarization:WhisperDiarizationExtractor"},
{"type": "video", "module_name": "audio-extractor.audio_extractor:AudioExtractor"},
{"type": "video", "module_name": "face-extractor.face_extractor:FaceExtractor", "skip_deploy": true},
{"type": "video", "module_name": "keyframes.key_frame_extractor:KeyFrameExtractor"},
{"type": "html", "module_name": "wikipedia.wikipedia:WikipediaExtractor"},
{"type": "image", "module_name": "yolo.yolo_extractor:YoloExtractor"},
{"type": "image", "module_name": "groundingdino.grounding_dino:GroundingDinoExtractor"},
{"type": "image", "module_name": "moondream.moondream_extractor:MoondreamExtractor"},
{"type": "invoices", "module_name": "donut_cord.base_cord_v2:DonutBaseV2", "skip_deploy": true},
{"type": "invoices", "module_name": "donut_invoice.donut_base_invoice:SimpleInvoiceParserExtractor"},
{"type": "pdf", "module_name": "pdf-extractor.pdf_extractor:PDFExtractor"},
{"type": "pdf", "module_name": "layoutlm_document_qa.layoutlm_document_qa:LayoutLMDocumentQA"},
{"type": "pdf", "module_name": "ocrmypdf.ocr_my_pdf:OCRMyPDFExtractor"},
{"type": "pdf", "module_name": "unstructuredio.unstructured_pdf:UnstructuredIOExtractor"},
{"type": "text", "module_name": "chunking.chunk_extractor:ChunkExtractor"},
{"type": "text", "module_name": "summarization.summary_extractor:SummaryExtractor"},
{"type": "text", "module_name": "schema.schema_extractor:SchemaExtractor"},
{"type": "text", "module_name": "text-lid.language_extractor:LanguageExtractor", "skip_deploy": true}
]