
    @i                         d Z ddlZddlmZmZmZmZ ddlZddl	m
Z
 ddlmZ  ej                  e      Z G d d      Zy)z8Dataset for contrastive learning with triplet structure.    N)ListTupleDictAny)MemoryStore)PatternEmbedderc                      e Zd ZdZddededefdZdej                  fdZ
deeej                  ej                  ej                  f      fdZdefd	Zd
edeej                  ej                  ej                  f   fdZy)PreferenceDatasetzDataset for triplet-based contrastive learning.
    
    Each sample is (anchor, positive, negative):
    - anchor: User preference vector
    - positive: Liked pattern vector
    - negative: Disliked pattern vector
    memory_storeembedderuser_idc                    || _         || _        || _        |j                         | _        | j                  j                  |      | _        | j                  (t        j                  d| d       g | _	        g | _
        n| j                  j                  dg       D cg c]  }t        j                  |       c}| _	        | j                  j                  dg       D cg c]  }t        j                  |       c}| _
        t        j                  dt        | j                         dt        | j                         d       yc c}w c c}w )	zInitialize preference dataset.
        
        Args:
            memory_store: Memory store instance
            embedder: Pattern embedder for encoding patterns
            user_id: User identifier
        NzUser z not found in memorypositivenegativezDataset initialized: z positive, z negative vectors)r   r   r   load_user_memoryuser_memorygetuser_profileloggerwarningpositive_vectorsnegative_vectorsnparrayinfolen)selfr   r   r   vs        G   /home/ubuntu/codebase/yexijia/保研/colocation_mvp/learning/dataset.py__init__zPreferenceDataset.__init__   s+    )  (88: ,,009$NNU7)+?@A$&D!$&D! ;?:K:K:O:OPZ\^:_$`:_QRXXa[:_$`D!:>:K:K:O:OPZ\^:_$`:_QRXXa[:_$`D!+C0E0E,F+G{$//011BD 	E %a$`s   EEreturnc                    t        | j                        dk(  rQt        | j                        dk(  r9| j                  j                  j                         }t        j                  |      S t        | j                        dkD  r"t        j                  | j                  d      }n?| j                  r| j                  d   j                  d   nd}t        j                  |      }t        | j                        dkD  r"t        j                  | j                  d      }n?| j                  r| j                  d   j                  d   nd}t        j                  |      }||z
  }|S )zuBuild user anchor vector.
        
        Returns:
            User preference vector (mean_pos - mean_neg)
        r   )axisi  )
r   r   r   r   model get_sentence_embedding_dimensionr   zerosmeanshape)r   dimmean_posmean_neguser_vectors        r   build_user_vectorz#PreferenceDataset.build_user_vector2   s    t$$%*s43H3H/IQ/N--%%FFHC88C=  t$$%)wwt441=H7;7L7L$''*003RUCxx}H t$$%)wwt441=H7;7L7L$''*003RUCxx}H )    c                 h   t        | j                        dk(  st        | j                        dk(  rt        j	                  d       g S | j                         }g }| j                  D ]'  }| j                  D ]  }|j                  |||f        ) t        j                  dt        |       d       |S )zvGenerate triplet samples.
        
        Returns:
            List of (anchor, positive, negative) triplets
        r   z(Insufficient data for triplet generationz
Generated z	 triplets)r   r   r   r   r   r-   appendr   )r   anchortripletspos_vecneg_vecs        r   get_tripletszPreferenceDataset.get_tripletsP   s     t$$%*c$2G2G.HA.MNNEFI'') ,,G00' :; 1 - 	jXy9:r.   c                 8    | j                         }t        |      S )zGet dataset size.)r5   r   )r   r2   s     r   __len__zPreferenceDataset.__len__e   s    $$&8}r.   idxc                 |    | j                         }|t        |      k\  rt        d| dt        |             ||   S )zGet a triplet sample.
        
        Args:
            idx: Sample index
            
        Returns:
            (anchor, positive, negative) triplet
        zIndex z" out of range for dataset of size )r5   r   
IndexError)r   r8   r2   s      r   __getitem__zPreferenceDataset.__getitem__j   sF     $$&#h-vcU*LSQY]O\]]}r.   N)user_001)__name__
__module____qualname____doc__r   r   strr    r   ndarrayr-   r   r   r5   intr7   r;    r.   r   r
   r
      s    E[ EO EVY E82:: <d5RZZ)K#LM * 
s uRZZRZZ-O'P r.   r
   )r@   numpyr   typingr   r   r   r   loggingmemory.storer   learning.embedderr   	getLoggerr=   r   r
   rD   r.   r   <module>rK      s8    >  ) )  $ -			8	$j jr.   