
    @i                         S r SSKrSSKJrJrJrJr  SSKrSSK	J
r
  SSKJr  \R                  " \5      r " S S5      rg)z8Dataset for contrastive learning with triplet structure.    N)ListTupleDictAny)MemoryStore)PatternEmbedderc                      \ rS rSrSrSS\S\S\4S jjrS\	R                  4S jrS\\\	R                  \	R                  \	R                  4      4S	 jrS\4S
 jrS\S\\	R                  \	R                  \	R                  4   4S jrSrg)PreferenceDataset   zDataset for triplet-based contrastive learning.

Each sample is (anchor, positive, negative):
- anchor: User preference vector
- positive: Liked pattern vector
- negative: Disliked pattern vector
memory_storeembedderuser_idc                    Xl         X l        X0l        UR                  5       U l        U R                  R                  U5      U l        U R                  c(  [        R                  SU S35        / U l	        / U l
        OU R                  R                  S/ 5       Vs/ s H  n[        R                  " U5      PM     snU l	        U R                  R                  S/ 5       Vs/ s H  n[        R                  " U5      PM     snU l
        [        R                  S[        U R                  5       S[        U R                  5       S35        gs  snf s  snf )	zInitialize preference dataset.

Args:
    memory_store: Memory store instance
    embedder: Pattern embedder for encoding patterns
    user_id: User identifier
NzUser z not found in memorypositivenegativezDataset initialized: z positive, z negative vectors)r   r   r   load_user_memoryuser_memorygetuser_profileloggerwarningpositive_vectorsnegative_vectorsnparrayinfolen)selfr   r   r   vs        C   /home/ubuntu/codebase/yexijia/保研/iCoLoc/src/learning/dataset.py__init__PreferenceDataset.__init__   s%    )  (88: ,,009$NNU7)+?@A$&D!$&D! ;?:K:K:O:OPZ\^:_$`:_QRXXa[:_$`D!:>:K:K:O:OPZ\^:_$`:_QRXXa[:_$`D!+C0E0E,F+G{$//011BD 	E %a$`s    E! Ereturnc                    [        U R                  5      S:X  aS  [        U R                  5      S:X  a:  U R                  R                  R                  5       n[        R                  " U5      $ [        U R                  5      S:  a   [        R                  " U R                  SS9nOEU R                  (       a  U R                  S   R                  S   OSn[        R                  " U5      n[        U R                  5      S:  a   [        R                  " U R                  SS9nOEU R                  (       a  U R                  S   R                  S   OSn[        R                  " U5      nX#-
  nU$ )zUBuild user anchor vector.

Returns:
    User preference vector (mean_pos - mean_neg)
r   )axisi  )
r   r   r   r   model get_sentence_embedding_dimensionr   zerosmeanshape)r   dimmean_posmean_neguser_vectors        r    build_user_vector#PreferenceDataset.build_user_vector2   s    t$$%*s43H3H/IQ/N--%%FFHC88C=  t$$%)wwt441=H7;7L7L$''*003RUCxx}H t$$%)wwt441=H7;7L7L$''*003RUCxx}H )    c                 r   [        U R                  5      S:X  d  [        U R                  5      S:X  a  [        R	                  S5        / $ U R                  5       n/ nU R                   H)  nU R                   H  nUR                  XU45        M     M+     [        R                  S[        U5       S35        U$ )zVGenerate triplet samples.

Returns:
    List of (anchor, positive, negative) triplets
r   z(Insufficient data for triplet generationz
Generated z	 triplets)r   r   r   r   r   r/   appendr   )r   anchortripletspos_vecneg_vecs        r    get_tripletsPreferenceDataset.get_tripletsP   s     t$$%*c$2G2G.HA.MNNEFI'') ,,G00' :; 1 - 	jXy9:r1   c                 8    U R                  5       n[        U5      $ )zGet dataset size.)r8   r   )r   r5   s     r    __len__PreferenceDataset.__len__e   s    $$&8}r1   idxc                 |    U R                  5       nU[        U5      :  a  [        SU S[        U5       35      eX!   $ )zjGet a triplet sample.

Args:
    idx: Sample index
    
Returns:
    (anchor, positive, negative) triplet
zIndex z" out of range for dataset of size )r8   r   
IndexError)r   r=   r5   s      r    __getitem__PreferenceDataset.__getitem__j   sD     $$&#h-vcU*LSQY]O\]]}r1   )r   r   r   r   r   r   r   N)user_001)__name__
__module____qualname____firstlineno____doc__r   r   strr!   r   ndarrayr/   r   r   r8   intr;   r@   __static_attributes__ r1   r    r
   r
      s    E[ EO EVY E82:: <d5RZZ)K#LM * 
s uRZZRZZ-O'P r1   r
   )rG   numpyr   typingr   r   r   r   loggingmemory.storer   learning.embedderr   	getLoggerrC   r   r
   rL   r1   r    <module>rS      s6    >  ) )  $ -			8	$j jr1   