The aim of the scripts below is to scrape data from my local node and try to find a correlation between Tor and clearnet connections.
The script successfully fetches peer data from the victim node. However, the retrieved IP addresses tend to remain relatively constant between requests
- create_connection (creates a socket connection to my local node)
create_message (Defines Version Message details and then create version message using the payload) and send the message via our socketThis goes on until we have no new new_addresses
I wanted to collect as much data from both connections (tor and clearnet) as possible
My observation is that both clearnet and tor return the same addresses over and over, the range from the count is constantly between 253 and 260 even after running the script multiple times
 
        
        
            
                import socket
                import struct
                import time
                import random
                from hashlib import sha256
                import ipaddress
                from datetime import datetime
                import socks
                
                def decode_netaddr(data, with_time=True):
                    pos = 0
                    if with_time:
                        timestamp = struct.unpack('<I', data[pos:pos+4])[0]
                        pos += 4
                    else:
                        timestamp = int(time.time())
                    
                    services = struct.unpack('<Q', data[pos:pos+8])[0]
                    pos += 8
                    ipbytes = data[pos:pos+16]
                    pos += 16
                
                    if ipbytes.startswith(b'\x00' * 10 + b'\xff\xff'):
                        ip = str(ipaddress.IPv4Address(ipbytes[-4:]))
                    else:
                        ip = str(ipaddress.IPv6Address(ipbytes))
                    
                    port = struct.unpack('>H', data[pos:pos+2])[0]
                
                    return {
                        'timestamp': datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S'),
                        'services': f"0x{services:x}",
                        'ip': ip,
                        'port': port
                    }
                
                def decode_addr_message(payload):
                    addresses = []
                    pos = 0
                    # count of address 
                    count = int(payload[pos])
                    pos += 1
                    for _ in range(count):
                        if pos + 30 > len(payload):
                            break
                        
                        addr = decode_netaddr(payload[pos:pos+30])
                        addresses.append(addr)
                        pos += 30
                    return addresses
                
                
                def create_message(command, payload=b''):
                    magic = 0xD9B4BEF9
                    command_bytes = command.encode('ascii') + b'\0' * (12 - len(command))
                    length = len(payload)
                    
                    # Checksum is first 4 bytes of double SHA256 of payload
                    if length > 0:
                        checksum = sha256(sha256(payload).digest()).digest()[:4]
                    else:
                        checksum = b'\x5D\xF6\xE0\xE2'  # Checksum for empty payload
                    # Construct message
                    message = struct.pack('<L12sL4s', magic, command_bytes, length, checksum)
                    if payload:
                        message += payload
                    return message
                
                
                def parse_message_header(data):
                    if len(data) < 24:  # Header is 24 bytes
                        return None, None, None, None
                    magic, command, length, checksum = struct.unpack('<L12sL4s', data[:24])
                    command = command.strip(b'\0').decode('ascii')
                    return magic, command, length, checksum
                
                
                def create_connection(use_tor=False, target_host='127.0.0.1', port=8333):
                    """Create either a regular socket or Tor socket connection"""
                    if use_tor:
                        sock = socks.socksocket()
                        sock.set_proxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9050)
                    else:
                        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                    sock.settimeout(30)
                    return sock
                
                
                def send_getaddr(use_tor=False, target_host='127.0.0.1', port=8333):
                    addresses = []
                    try:
                        sock = create_connection(use_tor)
                        print(f"Connecting to {target_host}:{port} via {'Tor' if use_tor else 'Clearnet'}...")
                        sock.connect((target_host, port))
                
                        # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                        # sock.settimeout(30)
                        # print("Connecting to node...")
                        # sock.connect(('127.0.0.1', 8333))
                        
                        # Send version message
                        version = 70015
                        services = 0
                        timestamp = int(time.time())
                        addr_recv_services = struct.pack('<Q', services)
                        addr_trans_services = struct.pack('<Q', services)
                        addr_recv_ip = b'\x00' * 10 + b'\xff\xff' + socket.inet_aton('127.0.0.1')
                        addr_recv_port = struct.pack('>H', 8333)
                        addr_trans_ip = b'\x00' * 10 + b'\xff\xff' + socket.inet_aton('127.0.0.1')
                        addr_trans_port = struct.pack('>H', 8333)
                        
                        nonce = random.getrandbits(64)
                        user_agent = b"/TestNode:0.0.1/"
                        start_height = 0
                        relay = True
                
                        version_payload = struct.pack('<IQQ', version, services, timestamp)
                        version_payload += addr_recv_services + addr_recv_ip + addr_recv_port
                        version_payload += addr_trans_services + addr_trans_ip + addr_trans_port
                        version_payload += struct.pack('<Q', nonce)
                        version_payload += bytes([len(user_agent)]) + user_agent
                        version_payload += struct.pack('<I?', start_height, relay)
                
                        print("Sending version message...")
                        sock.send(create_message('version', version_payload))
                        
                        # Process responses
                        buffer = b''
                        handshake_complete = False
                        addresses = []
                        
                        while True:
                            data = sock.recv(1024)
                            if not data:
                                break
                                
                            buffer += data
                            while len(buffer) >= 24:  # Header size
                                magic, command, length = struct.unpack('<L12sL', buffer[:20])
                                command = command.strip(b'\0').decode('ascii')
                                
                                if len(buffer) < 24 + length:
                                    break
                                    
                                message = buffer[24:24+length]
                                buffer = buffer[24+length:]
                                
                                print(f"Received command: {command}")
                                
                                if command == 'version':
                                    print("Sending verack...")
                                    sock.send(create_message('verack'))
                                elif command == 'verack':
                                    if not handshake_complete:
                                        print("Sending getaddr...")
                                        sock.send(create_message('getaddr'))
                                        handshake_complete = True
                                elif command == 'addr':
                                    new_addresses = decode_addr_message(message)
                                    addresses.extend(new_addresses)
                                    print(f"Decoded {len(new_addresses)} addresses")
                                    for addr in new_addresses:
                                        print(f"\nIP: {addr['ip']}:{addr['port']}")
                                        print(f"Time: {addr['timestamp']}")
                                        print(f"Services: {addr['services']}")
                                    
                    except Exception as e:
                        print(f"Error: {str(e)}")
                    finally:
                        sock.close()
                        print("Connection closed")
                        return addresses
                
                if __name__ == "__main__":
                    # Test clearnet
                    print("Testing clearnet connection...")
                    clearnet_addresses = send_getaddr(use_tor=False)
                    print("\nTotal clearnet addresses received:", len(clearnet_addresses))
                
                    # Test Tor (if you have an onion address to test)
                    print("\nTesting Tor connection...")
                    tor_addresses = send_getaddr(use_tor=True, target_host='hs26bmwa22yst7s7eyolyerj3ffiy6e26ayhw7g3qzlunndiwaty7ryd.onion')
                    print("\nTotal Tor addresses received:", len(tor_addresses))
                            
        
        
        
                
                    IP: 0:2600:1700:22d9:981f:e8af:1273:3e5b:9555                                                                                                                                            
                    Time: 2056-05-01 03:47:28                                                                                                                                                                
                    Services: 0xc09673d                                                                                                                                                                      
                                                                                                                                                                                                            
                    IP: ::ffff:4d39:21072                                                                                                                                                                    
                    Time: 2030-07-01 22:42:24                                                                                                                                                                
                    Services: 0xc496732                                                                                                                                                                      
                                                                                                                                                                                                            
                    IP: 0:2409:8a55:213:1060:73ef:9f47:beff:65089                                                                                                                                            
                    Time: 2091-02-22 04:53:36                                                                                                                                                                
                    Services: 0xc086748                                                                                                                                                                      
                                                                                                                                                                                                            
                    IP: 0:2001:1c01:3016:f700:ecc4:dca6:bcdb:58440                                                                                                                                           
                    Time: 2022-05-29 18:11:28                                                                                                                                                                
                    Services: 0x4086738                                                                                                                                                                      
                                                                                                                                                                                                            
                    IP: ::ffff:5004:6335                                                                                                                                                                     
                    Time: 2001-07-14 03:06:56                                                                                                                                                                
                    Services: 0xc08673e                                                                                                                                                                      
                                                                                                                                                                                                            
                    IP: ::ffff:179a:45340                                                                                                                                                                    
                    Time: 2034-04-27 14:53:04                                                                                                                                                                
                    Services: 0xc7d674d                                                                                                                                                                      
                    Error: timed out                                                                                                                                                                         
                    Connection closed                                                                                                                                                                        
                                                                                                                                                                                                            
                    Total clearnet addresses received: 253                                                                                                                                                   
                                                                                                                                                                                                            
                    Testing Tor connection...                                                                                                                                                                
                    Connecting to hs26bmwa22yst7s7eyolyerj3ffiy6e26ayhw7g3qzlunndiwaty7ryd.onion:8333 via Tor...                                                                                             
                    Sending version message...                                                                                                                                                               
                    Received command: version                                                                                                                                                                
                    Sending verack...                                                                                                                                                                        
                    Received command: verack                                                                                                                                                                 
                    Sending getaddr...                                                                                                                                                                       
                    Received command: sendcmpct                                                                                                                                                              
                    Received command: ping                                                                                                                                                                   
                    Received command: feefilter                                                                                                                                                              
                    Received command: addr                                                                                                                                                                   
                    Decoded 253 addresses 
                    IP: ::ffff:4832:1365
                    Time: 2077-03-27 09:43:44
                    Services: 0x4096740
                    
                    IP: 0:2600:1700:22d9:981f:e8af:1273:3e5b:9555
                    Time: 2056-05-01 03:47:28
                    Services: 0xc09673d
                    
                    IP: ::ffff:4d39:21072
                    Time: 2030-07-01 22:42:24
                    Services: 0xc496732
                    
                    IP: 0:2409:8a55:213:1060:73ef:9f47:beff:65089
                    Time: 2091-02-22 04:53:36
                    Services: 0xc086748
                    
                    IP: 0:2001:1c01:3016:f700:ecc4:dca6:bcdb:58440
                    Time: 2022-05-29 18:11:28
                    Services: 0x4086738
                    
                    IP: ::ffff:5004:6335
                    Time: 2001-07-14 03:06:56
                    Services: 0xc08673e
                    
                    IP: ::ffff:179a:45340
                    Time: 2034-04-27 14:53:04
                    Services: 0xc7d674d
                    Received command: addr
                    Decoded 2 addresses
                    
                    IP: 84.32.186.158:8333
                    Time: 2024-12-03 23:00:57
                    Services: 0x409
                    
                    IP: 209.204.29.11:8333
                    Time: 2024-12-03 22:56:22
                    Services: 0x409
                    Error: timed out
                    Connection closed
                    
                    Total Tor addresses received: 255
                
            
        
            
                from datetime import datetime
                import numpy as np
                from collections import defaultdict
                from getaddr_test import send_getaddr    
                import time
                
                
                def collect_samples(delay_between=60, num_samples=5):
                    """Collect multiple samples from both clearnet and Tor"""
                    all_samples = []
                    
                    for i in range(num_samples):
                        print(f"\nCollecting sample {i+1}/{num_samples}")
                        sample = {
                            'timestamp': datetime.now(),
                            'clearnet': send_getaddr(use_tor=False),
                            'tor': send_getaddr(
                                use_tor=True, 
                                target_host='hs26bmwa22yst7s7eyolyerj3ffiy6e26ayhw7g3qzlunndiwaty7ryd.onion'
                            )
                        }
                        all_samples.append(sample)
                        
                        if i < num_samples - 1:
                            print(f"Waiting {delay_between} seconds...")
                            time.sleep(delay_between)
                    
                    return all_samples
                
                def analyze_timestamps(addresses, time_threshold=300):
                    timestamp_groups = defaultdict(list)
                    
                    for addr in addresses:
                        try:
                            # Parse the timestamp string into a datetime object
                            dt = datetime.strptime(addr['timestamp'], '%Y-%m-%d %H:%M:%S')
                            unix_ts = int(dt.timestamp())
                            
                            timestamp_groups[unix_ts].append({
                                'ip': addr['ip'],
                                'port': addr['port'],
                                'services': addr['services']
                            })
                        except (ValueError, TypeError) as e:
                            print(f"Error processing timestamp for address: {addr}")
                            continue
                    
                    print(f"\nAnalyzing {len(addresses)} addresses for timestamp correlations...")
                    print(f"Using time threshold of {time_threshold} seconds")
                    
                    # Find related timestamps
                    correlated_nodes = []
                    timestamps = sorted(timestamp_groups.keys())
                    
                    for i in range(len(timestamps)):
                        current_group = []
                        base_ts = timestamps[i]
                        
                        for j in range(i + 1, len(timestamps)):
                            if timestamps[j] - base_ts <= time_threshold:
                                if not current_group:
                                    current_group.extend(timestamp_groups[base_ts])
                                current_group.extend(timestamp_groups[timestamps[j]])
                            else:
                                break
                        
                        if current_group:
                            correlated_nodes.append({
                                'timestamp': datetime.fromtimestamp(base_ts).strftime('%Y-%m-%d %H:%M:%S'),
                                'nodes': current_group,
                                'count': len(current_group)
                            })
                    
                    # Print findings
                    if correlated_nodes:
                        print("\nFound potentially correlated nodes:")
                        for group in correlated_nodes:
                            print(f"\nTimestamp cluster at {group['timestamp']} with {group['count']} nodes:")
                            for node in group['nodes'][:5]:  # Show first 5 nodes in each group
                                print(f"  IP: {node['ip']}:{node['port']} Services: {node['services']}")
                            if len(group['nodes']) > 5:
                                print(f"  ... and {len(group['nodes']) - 5} more nodes")
                    else:
                        print("\nNo suspicious timestamp correlations found")
                    
                    return correlated_nodes
                
                
                def detect_fingerprinting(addresses):
                    print("\nAnalyzing for potential fingerprinting patterns...")
                    
                    # Group by /16 subnets
                    subnet_groups = defaultdict(list)
                    for addr in addresses:
                        try:
                            if '::ffff:' in addr['ip']:  # Handle IPv4-mapped-IPv6 addresses
                                ip_parts = addr['ip'].split(':')[-1].split('.')
                                subnet = '.'.join(ip_parts[:2])
                                subnet_groups[subnet].append(addr)
                        except Exception as e:
                            print(f"Error processing IP address: {addr}")
                            continue
                    
                    suspicious_patterns = []
                    
                    for subnet, nodes in subnet_groups.items():
                        if len(nodes) < 3:  # Need at least 3 nodes for meaningful analysis
                            continue
                        
                        # Analyze timestamp patterns
                        timestamps = []
                        for node in nodes:
                            try:
                                dt = datetime.strptime(node['timestamp'], '%Y-%m-%d %H:%M:%S')
                                timestamps.append(int(dt.timestamp()))
                            except (ValueError, TypeError) as e:
                                print(f"Error processing timestamp in subnet analysis: {node}")
                                continue
                        
                        if len(timestamps) < 3:  # Skip if we don't have enough valid timestamps
                            continue
                            
                        timestamps.sort()
                        time_diffs = np.diff(timestamps)
                        
                        if len(time_diffs) > 0:
                            mean_diff = np.mean(time_diffs)
                            std_diff = np.std(time_diffs)
                            
                            # Check for suspiciously regular patterns
                            if std_diff < mean_diff * 0.1:
                                suspicious_patterns.append({
                                    'subnet': subnet,
                                    'node_count': len(nodes),
                                    'mean_time_diff': mean_diff,
                                    'std_diff': std_diff,
                                    'nodes': nodes
                                })
                    
                    # Print findings
                    if suspicious_patterns:
                        print("\nFound suspicious patterns that might indicate fingerprinting:")
                        for pattern in suspicious_patterns:
                            print(f"\nSubnet: {pattern['subnet']}")
                            print(f"Node count: {pattern['node_count']}")
                            print(f"Mean time difference: {pattern['mean_time_diff']:.2f} seconds")
                            print(f"Time difference standard deviation: {pattern['std_diff']:.2f}")
                            print("Sample nodes:")
                            for node in pattern['nodes'][:3]:  # Show first 3 nodes
                                print(f"  IP: {node['ip']}:{node['port']} Time: {node['timestamp']}")
                    else:
                        print("\nNo suspicious fingerprinting patterns detected")
                    
                    return suspicious_patterns
                
                
                
                def analyze_network_differences(samples):
                    """Analyze differences between clearnet and Tor responses"""
                    for sample in samples:
                        clearnet_addrs = sample['clearnet']
                        tor_addrs = sample['tor']
                        
                        print(f"\nAnalyzing sample from {sample['timestamp']}:")
                        print(f"Clearnet addresses: {len(clearnet_addrs)}")
                        print(f"Tor addresses: {len(tor_addrs)}")
                        
                        # Analyze each network separately
                        print("\nClearnet Analysis:")
                        analyze_timestamps(clearnet_addrs)
                        detect_fingerprinting(clearnet_addrs)
                        
                        print("\nTor Analysis:")
                        analyze_timestamps(tor_addrs)
                        detect_fingerprinting(tor_addrs)
                
                def analyze_cross_network_correlations(clearnet_addrs, tor_addrs, time_threshold=300):
                    """
                    Compare addresses between clearnet and Tor to find correlations
                    """
                    print("\nAnalyzing cross-network correlations...")
                    print(f"Comparing {len(clearnet_addrs)} clearnet addresses with {len(tor_addrs)} Tor addresses")
                    
                    # Store timestamps and services for both networks
                    correlations = []
                    
                    for c_addr in clearnet_addrs:
                        c_time = datetime.strptime(c_addr['timestamp'], '%Y-%m-%d %H:%M:%S')
                        c_unix = int(c_time.timestamp())
                        c_services = c_addr['services']
                        
                        for t_addr in tor_addrs:
                            t_time = datetime.strptime(t_addr['timestamp'], '%Y-%m-%d %H:%M:%S')
                            t_unix = int(t_time.timestamp())
                            t_services = t_addr['services']
                            
                            # Check for similar timestamps and matching services
                            if abs(c_unix - t_unix) <= time_threshold:
                                correlations.append({
                                    'clearnet_addr': c_addr,
                                    'tor_addr': t_addr,
                                    'time_difference': abs(c_unix - t_unix),
                                    'services_match': c_services == t_services
                                })
                
                    if correlations:
                        print(f"\nFound {len(correlations)} cross-network correlations:")
                        for corr in correlations:
                            print(f"\nCorrelation:")
                            print(f"Clearnet: {corr['clearnet_addr']['ip']}:{corr['clearnet_addr']['port']}")
                            print(f"Tor: {corr['tor_addr']['ip']}:{corr['tor_addr']['port']}")
                            print(f"Time difference: {corr['time_difference']} seconds")
                            print(f"Services match: {corr['services_match']}")
                            print(f"Clearnet services: {corr['clearnet_addr']['services']}")
                            print(f"Tor services: {corr['tor_addr']['services']}")
                    else:
                        print("\nNo cross-network correlations found")
                    
                    return correlations
                
                if __name__ == "__main__":
                    print("Starting data collection...")
                    samples = collect_samples(delay_between=300, num_samples=3)
                    
                    all_correlations = []
                    for sample in samples:
                        print(f"\nAnalyzing sample from {sample['timestamp']}:")
                        
                        # Cross-network analysis
                        correlations = analyze_cross_network_correlations(
                            sample['clearnet'],
                            sample['tor']
                        )
                        all_correlations.extend(correlations)
                        
                        # Individual network analysis
                        analyze_network_differences(samples)
                
                    # Final summary
                    print("\nFinal Analysis Summary:")
                    print(f"Total samples analyzed: {len(samples)}")
                    print(f"Total correlations found: {len(all_correlations)}")
                    
                    if all_correlations:
                        matching_services = sum(1 for c in all_correlations if c['services_match'])
                        print(f"Correlations with matching services: {matching_services}")
                        avg_time_diff = sum(c['time_difference'] for c in all_correlations) / len(all_correlations)
                        print(f"Average time difference: {avg_time_diff:.2f} seconds")
            
        
    
        
    Received 253 addresses
    Analyzing 253 addresses for timestamp correlations...
    Using time threshold of 300 seconds
    No suspicious timestamp correlations found
    Analyzing for potential fingerprinting patterns...
    No suspicious fingerprinting patterns detected
    Analysis Summary:
    Total addresses analyzed: 253
    Correlated timestamp groups found: 0
    Suspicious fingerprinting patterns found: 0
    No suspicious timestamp correlations found
    Analyzing for potential fingerprinting patterns...
    No suspicious fingerprinting patterns detected
    Analyzing sample from 2024-12-03 15:36:47.778225:
    Clearnet addresses: 253
    Tor addresses: 0
    Clearnet Analysis:
    Analyzing 253 addresses for timestamp correlations...
    Using time threshold of 300 seconds
    No suspicious timestamp correlations found
    Analyzing for potential fingerprinting patterns...
    No suspicious fingerprinting patterns detected
    Tor Analysis:
    Analyzing 0 addresses for timestamp correlations...
    Using time threshold of 300 seconds
    No suspicious timestamp correlations found
    Analyzing for potential fingerprinting patterns...
    No suspicious fingerprinting patterns detected
    Final Analysis Summary:
    Total samples analyzed: 3
    Total correlations found: 0