#!/usr/bin/env python3
"""Convert CSV file to JSON format for co-location mining."""

import csv
import json
from pathlib import Path


def convert_csv_to_json(csv_path: str, json_path: str):
    """Convert CSV file to JSON format.
    
    CSV format: type,id,x,y
    JSON format: [{"id": int, "type": str, "x": float, "y": float}, ...]
    
    Args:
        csv_path: Path to input CSV file
        json_path: Path to output JSON file
    """
    data = []
    
    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) >= 4:
                try:
                    type_name = row[0].strip()
                    instance_id = int(row[1].strip())
                    x = float(row[2].strip())
                    y = float(row[3].strip())
                    
                    data.append({
                        "id": instance_id,
                        "type": type_name,
                        "x": x,
                        "y": y
                    })
                except (ValueError, IndexError) as e:
                    print(f"Warning: Skipping invalid row: {row} - {e}")
                    continue
    
    # 保存为 JSON
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    
    print(f"Successfully converted {len(data)} records from {csv_path} to {json_path}")
    
    # 统计信息
    type_counts = {}
    for item in data:
        type_name = item['type']
        type_counts[type_name] = type_counts.get(type_name, 0) + 1
    
    print("\nType distribution:")
    for type_name, count in sorted(type_counts.items()):
        print(f"  {type_name}: {count}")
    
    return data


if __name__ == "__main__":
    # 输入和输出路径
    csv_path = "/home/ubuntu/codebase/yexijia/保研/APWb-WAIM-APES/APES_Mining/Beijing_haidian.csv"
    json_path = "/home/ubuntu/codebase/yexijia/保研/colocation_mvp/data/beijing_haidian.json"
    
    # 确保输出目录存在
    output_dir = Path(json_path).parent
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # 执行转换
    convert_csv_to_json(csv_path, json_path)
    
    print(f"\nOutput saved to: {json_path}")

