texts = [ "Machine learning is a branch of AI", "Deep learning uses neural networks", "NLP enables machines to understand human language"]response = client.embeddings.create( model="text-embedding-3-small", input=texts)embeddings = [item.embedding for item in response.data]print(f"Processed {len(embeddings)} texts")
import numpy as npdef cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))text1 = "The weather is nice today"text2 = "It's a sunny day"text3 = "I like apples"emb1 = get_embedding(text1)emb2 = get_embedding(text2)emb3 = get_embedding(text3)print(f"Text1 vs Text2: {cosine_similarity(emb1, emb2):.4f}")print(f"Text1 vs Text3: {cosine_similarity(emb1, emb3):.4f}")
documents = [ "Python is an interpreted programming language", "JavaScript is commonly used for web development", "Machine learning is a branch of AI",]def semantic_search(query, docs, doc_embeddings, top_k=3): query_embedding = get_embedding(query) similarities = [] for i, doc_emb in enumerate(doc_embeddings): sim = cosine_similarity(query_embedding, doc_emb) similarities.append((i, sim)) similarities.sort(key=lambda x: x[1], reverse=True) return [(docs[i], sim) for i, sim in similarities[:top_k]]# Searchresults = semantic_search("What is AI?", documents, doc_embeddings)