twitter-checker/cookie_parser.py

import json
from pathlib import Path
from typing import Dict
import debug_logger as log


def clean_cookie_content(content: str) -> str:
    lines = content.strip().split('\n')
    cleaned_lines = []

    content_stripped = content.strip()
    if content_stripped.startswith('[') or content_stripped.startswith('{'):
        bracket_count = 0
        brace_count = 0
        json_end = 0

        for i, char in enumerate(content_stripped):
            if char == '[':
                bracket_count += 1
            elif char == ']':
                bracket_count -= 1
                if bracket_count == 0 and content_stripped[0] == '[':
                    json_end = i + 1
                    break
            elif char == '{':
                brace_count += 1
            elif char == '}':
                brace_count -= 1
                if brace_count == 0 and content_stripped[0] == '{':
                    json_end = i + 1
                    break

        if json_end > 0:
            return content_stripped[:json_end]

    for line in lines:
        line = line.strip()
        if not line:
            continue
        if line.startswith('#'):
            cleaned_lines.append(line)
            continue
        parts = line.split('\t')
        if len(parts) >= 7:
            try:
                int(parts[4])
                cleaned_lines.append(line)
            except ValueError:
                continue

    return '\n'.join(cleaned_lines) if cleaned_lines else content


def netscape_to_dict(netscape_content: str) -> Dict[str, str]:
    cookies = {}
    for line in netscape_content.strip().split('\n'):
        if line.startswith('#') or not line.strip():
            continue
        parts = line.split('\t')
        if len(parts) >= 7:
            cookies[parts[5]] = parts[6]
    return cookies


def json_to_dict(json_content: str) -> Dict[str, str]:
    cookies = {}
    data = json.loads(json_content)

    if isinstance(data, list):
        for cookie in data:
            if 'name' in cookie and 'value' in cookie:
                cookies[cookie['name']] = cookie['value']
    elif isinstance(data, dict):
        cookies = data

    return cookies


def dict_to_cookie_string(cookies: Dict[str, str]) -> str:
    return "; ".join([f"{name}={value}" for name, value in cookies.items()])


def read_cookies_file(file_path: str) -> str:
    log.debug(f"read_cookies_file called for: {file_path}")
    path = Path(file_path)
    if not path.exists():
        log.error(f"Cookie file does not exist: {file_path}")
        return None

    try:
        content = path.read_text(encoding='utf-8')
        log.debug(f"File content read, length: {len(content)}")
        log.debug(f"First 200 chars: {content[:200]}")
    except Exception as e:
        log.error(f"Failed to read file {file_path}: {e}")
        return None

    content = clean_cookie_content(content)
    log.debug(f"After cleaning, content length: {len(content) if content else 0}")
    if not content:
        log.error(f"Content is empty after cleaning for {file_path}")
        return None

    cookies_dict = {}
    try:
        log.debug("Attempting to parse as JSON...")
        cookies_dict = json_to_dict(content)
        log.debug(f"Parsed as JSON, {len(cookies_dict)} cookies found")
    except json.JSONDecodeError as e:
        log.debug(f"Not JSON format: {e}, trying Netscape format...")
        try:
            cookies_dict = netscape_to_dict(content)
            log.debug(f"Parsed as Netscape, {len(cookies_dict)} cookies found")
        except Exception as e:
            log.error(f"Failed to parse as Netscape: {e}")
            return None

    if not cookies_dict:
        log.error(f"No cookies found in dictionary for {file_path}")
        return None

    log.debug(f"Cookie names found: {list(cookies_dict.keys())}")
    cookie_string = dict_to_cookie_string(cookies_dict)
    log.debug(f"Final cookie string length: {len(cookie_string)}")
    return cookie_string