"""Pattern embedding module for vectorizing patterns."""

from typing import List, Union
import numpy as np
from sentence_transformers import SentenceTransformer
from pathlib import Path



class PatternEmbedder:
    """Pattern embedder for converting patterns to vectors."""
    
    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        """Initialize pattern embedder.
        
        Args:
            model_name: Name of the sentence transformer model
        """
        #self.model = SentenceTransformer(model_name)'iCoLoc/models/all-MiniLM-L6-v2'
        #self.model = SentenceTransformer('../../models/all-MiniLM-L6-v2')
        # embedder.py 在 src/learning/，上三级为项目根目录
        _MODEL_DIR = Path(__file__).resolve().parent.parent.parent / "models" / "all-MiniLM-L6-v2"
        self.model = SentenceTransformer(str(_MODEL_DIR))
        
    
    def encode_pattern(self, pattern: List[str]) -> np.ndarray:
        """Encode a single pattern to vector.
        
        Args:
            pattern: List of feature types in the pattern
            
        Returns:
            Vector representation of the pattern
        """
        text = " ".join(pattern)
        return self.model.encode(text)
    
    def encode_patterns(self, patterns: List[List[str]]) -> np.ndarray:
        """Encode multiple patterns to vectors.
        
        Args:
            patterns: List of patterns, each is a list of feature types
            
        Returns:
            Array of vectors, shape (n_patterns, embedding_dim)
        """
        texts = [" ".join(p) for p in patterns]
        return self.model.encode(texts)
    
    def get_sentence_embedding_dimension(self) -> int:
        """Get the dimension of sentence embeddings.
        
        Returns:
            Embedding dimension
        """
        return self.model.get_sentence_embedding_dimension()


