Examples Of MilVus Collection
Table Of Contents:
- Example-1
- Example-2
(1) Example-1
Step – 1: Import Require Libraries
import json
import pandas as pd
from datasets import load_dataset
from pymilvus.model.hybrid import BGEM3EmbeddingFunction
from pymilvus import (
FieldSchema,
CollectionSchema,
DataType,
Collection,
AnnSearchRequest,
RRFRanker,
connections,
) Step – 2: Reading Input Data
image_path = "F:\\smartsearchmodelling\\vector_files\\Images.json"
with open(image_path, 'r') as file:
# Load JSON Into Python Dictionary.
image_data = json.load(file)
text_path = "F:\\smartsearchmodelling\\vector_files\\Sample_Audio_v5.parquet"
text_data = pd.read_parquet(text_path)
required_columns = ['exact_product_name', 'title', 'features',
'description','details', 'All_key_words_combined']
image_data = image_data
textual_data = text_data[required_columns]
textual_data["merged_text"] = str(textual_data["exact_product_name"]) + "\n" + str(textual_data["title"]) + "\n" + str(textual_data["features"]) + "\n" + str(textual_data["description"]) + "\n" + str(textual_data["details"]) + "\n" + str(textual_data["All_key_words_combined"])
docs = textual_data["merged_text"].to_list() Step – 3: Embedding The Input Text
ef = BGEM3EmbeddingFunction(use_fp16=False, device="cpu")
dense_dim = ef.dim["dense"]
docs_embeddings = ef(docs) Step – 4: Setting Up MilVus Data Base
from pymilvus import connections
connections.connect(uri="http://localhost:19530") Step – 5: Create Fields For MilVus Collection
fields = [
# Use auto generated id as primary key
FieldSchema(
name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=True, max_length=100
),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=8192),
FieldSchema(name="sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
FieldSchema(name="dense_vector", dtype=DataType.FLOAT_VECTOR, dim=dense_dim),
]
Step – 6: Create Collection Schema
schema = CollectionSchema(fields, "") Step – 7: Create Collection
col = Collection("sparse_dense_demo", schema) Step – 8: Create Indexes For Collection Vector
sparse_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}
dense_index = {"index_type": "FLAT", "metric_type": "COSINE"}
col.create_index("sparse_vector", sparse_index)
col.create_index("dense_vector", dense_index) Step – 9: Insert Data Into Collection
sparse_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}
dense_index = {"index_type": "FLAT", "metric_type": "COSINE"}
col.create_index("sparse_vector", sparse_index)
col.create_index("dense_vector", dense_index) 
