Examples Of MilVus Collection

Table Of Contents:

  1. Example-1
  2. Example-2

(1) Example-1

Step – 1: Import Require Libraries

import json
import pandas as pd
from datasets import load_dataset
from pymilvus.model.hybrid import BGEM3EmbeddingFunction
from pymilvus import (
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
    AnnSearchRequest,
    RRFRanker,
    connections,
)

Step – 2: Reading Input Data

image_path = "F:\\smartsearchmodelling\\vector_files\\Images.json"
with open(image_path, 'r') as file:
    # Load JSON Into Python Dictionary.
    image_data = json.load(file)

text_path = "F:\\smartsearchmodelling\\vector_files\\Sample_Audio_v5.parquet"
text_data = pd.read_parquet(text_path)

required_columns = ['exact_product_name', 'title', 'features', 
                    'description','details', 'All_key_words_combined']

image_data = image_data
textual_data = text_data[required_columns]

textual_data["merged_text"] = str(textual_data["exact_product_name"]) + "\n" + str(textual_data["title"]) +  "\n" + str(textual_data["features"]) + "\n" + str(textual_data["description"]) + "\n" + str(textual_data["details"]) + "\n" + str(textual_data["All_key_words_combined"])
docs = textual_data["merged_text"].to_list()

Step – 3: Embedding The Input Text

ef = BGEM3EmbeddingFunction(use_fp16=False, device="cpu")
dense_dim = ef.dim["dense"]
docs_embeddings = ef(docs)

Step – 4: Setting Up MilVus Data Base

from pymilvus import connections
connections.connect(uri="http://localhost:19530")

Step – 5: Create Fields For MilVus Collection

fields = [
    # Use auto generated id as primary key
    FieldSchema(
        name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=True, max_length=100
    ),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=8192),
    FieldSchema(name="sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
    FieldSchema(name="dense_vector", dtype=DataType.FLOAT_VECTOR, dim=dense_dim),
]

Step – 6: Create Collection Schema

schema = CollectionSchema(fields, "")

Step – 7: Create Collection

col = Collection("sparse_dense_demo", schema)

Step – 8: Create Indexes For Collection Vector

sparse_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}
dense_index = {"index_type": "FLAT", "metric_type": "COSINE"}
col.create_index("sparse_vector", sparse_index)
col.create_index("dense_vector", dense_index)

Step – 9: Insert Data Into Collection

sparse_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}
dense_index = {"index_type": "FLAT", "metric_type": "COSINE"}
col.create_index("sparse_vector", sparse_index)
col.create_index("dense_vector", dense_index)

Leave a Reply

Your email address will not be published. Required fields are marked *