The aim of the scripts below is to scrape data from my local node and try to find a correlation between Tor and clearnet connections.
The script successfully fetches peer data from the victim node. However, the retrieved IP addresses tend to remain relatively constant between requests
- create_connection (creates a socket connection to my local node)
create_message
(Defines Version Message details and then create version message using the payload) and send the message via our socketThis goes on until we have no new new_addresses
I wanted to collect as much data from both connections (tor and clearnet) as possible
My observation is that both clearnet and tor return the same addresses over and over, the range from the count is constantly between 253 and 260 even after running the script multiple times
import socket
import struct
import time
import random
from hashlib import sha256
import ipaddress
from datetime import datetime
import socks
def decode_netaddr(data, with_time=True):
pos = 0
if with_time:
timestamp = struct.unpack('<I', data[pos:pos+4])[0]
pos += 4
else:
timestamp = int(time.time())
services = struct.unpack('<Q', data[pos:pos+8])[0]
pos += 8
ipbytes = data[pos:pos+16]
pos += 16
if ipbytes.startswith(b'\x00' * 10 + b'\xff\xff'):
ip = str(ipaddress.IPv4Address(ipbytes[-4:]))
else:
ip = str(ipaddress.IPv6Address(ipbytes))
port = struct.unpack('>H', data[pos:pos+2])[0]
return {
'timestamp': datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S'),
'services': f"0x{services:x}",
'ip': ip,
'port': port
}
def decode_addr_message(payload):
addresses = []
pos = 0
# count of address
count = int(payload[pos])
pos += 1
for _ in range(count):
if pos + 30 > len(payload):
break
addr = decode_netaddr(payload[pos:pos+30])
addresses.append(addr)
pos += 30
return addresses
def create_message(command, payload=b''):
magic = 0xD9B4BEF9
command_bytes = command.encode('ascii') + b'\0' * (12 - len(command))
length = len(payload)
# Checksum is first 4 bytes of double SHA256 of payload
if length > 0:
checksum = sha256(sha256(payload).digest()).digest()[:4]
else:
checksum = b'\x5D\xF6\xE0\xE2' # Checksum for empty payload
# Construct message
message = struct.pack('<L12sL4s', magic, command_bytes, length, checksum)
if payload:
message += payload
return message
def parse_message_header(data):
if len(data) < 24: # Header is 24 bytes
return None, None, None, None
magic, command, length, checksum = struct.unpack('<L12sL4s', data[:24])
command = command.strip(b'\0').decode('ascii')
return magic, command, length, checksum
def create_connection(use_tor=False, target_host='127.0.0.1', port=8333):
"""Create either a regular socket or Tor socket connection"""
if use_tor:
sock = socks.socksocket()
sock.set_proxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9050)
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(30)
return sock
def send_getaddr(use_tor=False, target_host='127.0.0.1', port=8333):
addresses = []
try:
sock = create_connection(use_tor)
print(f"Connecting to {target_host}:{port} via {'Tor' if use_tor else 'Clearnet'}...")
sock.connect((target_host, port))
# sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# sock.settimeout(30)
# print("Connecting to node...")
# sock.connect(('127.0.0.1', 8333))
# Send version message
version = 70015
services = 0
timestamp = int(time.time())
addr_recv_services = struct.pack('<Q', services)
addr_trans_services = struct.pack('<Q', services)
addr_recv_ip = b'\x00' * 10 + b'\xff\xff' + socket.inet_aton('127.0.0.1')
addr_recv_port = struct.pack('>H', 8333)
addr_trans_ip = b'\x00' * 10 + b'\xff\xff' + socket.inet_aton('127.0.0.1')
addr_trans_port = struct.pack('>H', 8333)
nonce = random.getrandbits(64)
user_agent = b"/TestNode:0.0.1/"
start_height = 0
relay = True
version_payload = struct.pack('<IQQ', version, services, timestamp)
version_payload += addr_recv_services + addr_recv_ip + addr_recv_port
version_payload += addr_trans_services + addr_trans_ip + addr_trans_port
version_payload += struct.pack('<Q', nonce)
version_payload += bytes([len(user_agent)]) + user_agent
version_payload += struct.pack('<I?', start_height, relay)
print("Sending version message...")
sock.send(create_message('version', version_payload))
# Process responses
buffer = b''
handshake_complete = False
addresses = []
while True:
data = sock.recv(1024)
if not data:
break
buffer += data
while len(buffer) >= 24: # Header size
magic, command, length = struct.unpack('<L12sL', buffer[:20])
command = command.strip(b'\0').decode('ascii')
if len(buffer) < 24 + length:
break
message = buffer[24:24+length]
buffer = buffer[24+length:]
print(f"Received command: {command}")
if command == 'version':
print("Sending verack...")
sock.send(create_message('verack'))
elif command == 'verack':
if not handshake_complete:
print("Sending getaddr...")
sock.send(create_message('getaddr'))
handshake_complete = True
elif command == 'addr':
new_addresses = decode_addr_message(message)
addresses.extend(new_addresses)
print(f"Decoded {len(new_addresses)} addresses")
for addr in new_addresses:
print(f"\nIP: {addr['ip']}:{addr['port']}")
print(f"Time: {addr['timestamp']}")
print(f"Services: {addr['services']}")
except Exception as e:
print(f"Error: {str(e)}")
finally:
sock.close()
print("Connection closed")
return addresses
if __name__ == "__main__":
# Test clearnet
print("Testing clearnet connection...")
clearnet_addresses = send_getaddr(use_tor=False)
print("\nTotal clearnet addresses received:", len(clearnet_addresses))
# Test Tor (if you have an onion address to test)
print("\nTesting Tor connection...")
tor_addresses = send_getaddr(use_tor=True, target_host='hs26bmwa22yst7s7eyolyerj3ffiy6e26ayhw7g3qzlunndiwaty7ryd.onion')
print("\nTotal Tor addresses received:", len(tor_addresses))
IP: 0:2600:1700:22d9:981f:e8af:1273:3e5b:9555
Time: 2056-05-01 03:47:28
Services: 0xc09673d
IP: ::ffff:4d39:21072
Time: 2030-07-01 22:42:24
Services: 0xc496732
IP: 0:2409:8a55:213:1060:73ef:9f47:beff:65089
Time: 2091-02-22 04:53:36
Services: 0xc086748
IP: 0:2001:1c01:3016:f700:ecc4:dca6:bcdb:58440
Time: 2022-05-29 18:11:28
Services: 0x4086738
IP: ::ffff:5004:6335
Time: 2001-07-14 03:06:56
Services: 0xc08673e
IP: ::ffff:179a:45340
Time: 2034-04-27 14:53:04
Services: 0xc7d674d
Error: timed out
Connection closed
Total clearnet addresses received: 253
Testing Tor connection...
Connecting to hs26bmwa22yst7s7eyolyerj3ffiy6e26ayhw7g3qzlunndiwaty7ryd.onion:8333 via Tor...
Sending version message...
Received command: version
Sending verack...
Received command: verack
Sending getaddr...
Received command: sendcmpct
Received command: ping
Received command: feefilter
Received command: addr
Decoded 253 addresses
IP: ::ffff:4832:1365
Time: 2077-03-27 09:43:44
Services: 0x4096740
IP: 0:2600:1700:22d9:981f:e8af:1273:3e5b:9555
Time: 2056-05-01 03:47:28
Services: 0xc09673d
IP: ::ffff:4d39:21072
Time: 2030-07-01 22:42:24
Services: 0xc496732
IP: 0:2409:8a55:213:1060:73ef:9f47:beff:65089
Time: 2091-02-22 04:53:36
Services: 0xc086748
IP: 0:2001:1c01:3016:f700:ecc4:dca6:bcdb:58440
Time: 2022-05-29 18:11:28
Services: 0x4086738
IP: ::ffff:5004:6335
Time: 2001-07-14 03:06:56
Services: 0xc08673e
IP: ::ffff:179a:45340
Time: 2034-04-27 14:53:04
Services: 0xc7d674d
Received command: addr
Decoded 2 addresses
IP: 84.32.186.158:8333
Time: 2024-12-03 23:00:57
Services: 0x409
IP: 209.204.29.11:8333
Time: 2024-12-03 22:56:22
Services: 0x409
Error: timed out
Connection closed
Total Tor addresses received: 255
from datetime import datetime
import numpy as np
from collections import defaultdict
from getaddr_test import send_getaddr
import time
def collect_samples(delay_between=60, num_samples=5):
"""Collect multiple samples from both clearnet and Tor"""
all_samples = []
for i in range(num_samples):
print(f"\nCollecting sample {i+1}/{num_samples}")
sample = {
'timestamp': datetime.now(),
'clearnet': send_getaddr(use_tor=False),
'tor': send_getaddr(
use_tor=True,
target_host='hs26bmwa22yst7s7eyolyerj3ffiy6e26ayhw7g3qzlunndiwaty7ryd.onion'
)
}
all_samples.append(sample)
if i < num_samples - 1:
print(f"Waiting {delay_between} seconds...")
time.sleep(delay_between)
return all_samples
def analyze_timestamps(addresses, time_threshold=300):
timestamp_groups = defaultdict(list)
for addr in addresses:
try:
# Parse the timestamp string into a datetime object
dt = datetime.strptime(addr['timestamp'], '%Y-%m-%d %H:%M:%S')
unix_ts = int(dt.timestamp())
timestamp_groups[unix_ts].append({
'ip': addr['ip'],
'port': addr['port'],
'services': addr['services']
})
except (ValueError, TypeError) as e:
print(f"Error processing timestamp for address: {addr}")
continue
print(f"\nAnalyzing {len(addresses)} addresses for timestamp correlations...")
print(f"Using time threshold of {time_threshold} seconds")
# Find related timestamps
correlated_nodes = []
timestamps = sorted(timestamp_groups.keys())
for i in range(len(timestamps)):
current_group = []
base_ts = timestamps[i]
for j in range(i + 1, len(timestamps)):
if timestamps[j] - base_ts <= time_threshold:
if not current_group:
current_group.extend(timestamp_groups[base_ts])
current_group.extend(timestamp_groups[timestamps[j]])
else:
break
if current_group:
correlated_nodes.append({
'timestamp': datetime.fromtimestamp(base_ts).strftime('%Y-%m-%d %H:%M:%S'),
'nodes': current_group,
'count': len(current_group)
})
# Print findings
if correlated_nodes:
print("\nFound potentially correlated nodes:")
for group in correlated_nodes:
print(f"\nTimestamp cluster at {group['timestamp']} with {group['count']} nodes:")
for node in group['nodes'][:5]: # Show first 5 nodes in each group
print(f" IP: {node['ip']}:{node['port']} Services: {node['services']}")
if len(group['nodes']) > 5:
print(f" ... and {len(group['nodes']) - 5} more nodes")
else:
print("\nNo suspicious timestamp correlations found")
return correlated_nodes
def detect_fingerprinting(addresses):
print("\nAnalyzing for potential fingerprinting patterns...")
# Group by /16 subnets
subnet_groups = defaultdict(list)
for addr in addresses:
try:
if '::ffff:' in addr['ip']: # Handle IPv4-mapped-IPv6 addresses
ip_parts = addr['ip'].split(':')[-1].split('.')
subnet = '.'.join(ip_parts[:2])
subnet_groups[subnet].append(addr)
except Exception as e:
print(f"Error processing IP address: {addr}")
continue
suspicious_patterns = []
for subnet, nodes in subnet_groups.items():
if len(nodes) < 3: # Need at least 3 nodes for meaningful analysis
continue
# Analyze timestamp patterns
timestamps = []
for node in nodes:
try:
dt = datetime.strptime(node['timestamp'], '%Y-%m-%d %H:%M:%S')
timestamps.append(int(dt.timestamp()))
except (ValueError, TypeError) as e:
print(f"Error processing timestamp in subnet analysis: {node}")
continue
if len(timestamps) < 3: # Skip if we don't have enough valid timestamps
continue
timestamps.sort()
time_diffs = np.diff(timestamps)
if len(time_diffs) > 0:
mean_diff = np.mean(time_diffs)
std_diff = np.std(time_diffs)
# Check for suspiciously regular patterns
if std_diff < mean_diff * 0.1:
suspicious_patterns.append({
'subnet': subnet,
'node_count': len(nodes),
'mean_time_diff': mean_diff,
'std_diff': std_diff,
'nodes': nodes
})
# Print findings
if suspicious_patterns:
print("\nFound suspicious patterns that might indicate fingerprinting:")
for pattern in suspicious_patterns:
print(f"\nSubnet: {pattern['subnet']}")
print(f"Node count: {pattern['node_count']}")
print(f"Mean time difference: {pattern['mean_time_diff']:.2f} seconds")
print(f"Time difference standard deviation: {pattern['std_diff']:.2f}")
print("Sample nodes:")
for node in pattern['nodes'][:3]: # Show first 3 nodes
print(f" IP: {node['ip']}:{node['port']} Time: {node['timestamp']}")
else:
print("\nNo suspicious fingerprinting patterns detected")
return suspicious_patterns
def analyze_network_differences(samples):
"""Analyze differences between clearnet and Tor responses"""
for sample in samples:
clearnet_addrs = sample['clearnet']
tor_addrs = sample['tor']
print(f"\nAnalyzing sample from {sample['timestamp']}:")
print(f"Clearnet addresses: {len(clearnet_addrs)}")
print(f"Tor addresses: {len(tor_addrs)}")
# Analyze each network separately
print("\nClearnet Analysis:")
analyze_timestamps(clearnet_addrs)
detect_fingerprinting(clearnet_addrs)
print("\nTor Analysis:")
analyze_timestamps(tor_addrs)
detect_fingerprinting(tor_addrs)
def analyze_cross_network_correlations(clearnet_addrs, tor_addrs, time_threshold=300):
"""
Compare addresses between clearnet and Tor to find correlations
"""
print("\nAnalyzing cross-network correlations...")
print(f"Comparing {len(clearnet_addrs)} clearnet addresses with {len(tor_addrs)} Tor addresses")
# Store timestamps and services for both networks
correlations = []
for c_addr in clearnet_addrs:
c_time = datetime.strptime(c_addr['timestamp'], '%Y-%m-%d %H:%M:%S')
c_unix = int(c_time.timestamp())
c_services = c_addr['services']
for t_addr in tor_addrs:
t_time = datetime.strptime(t_addr['timestamp'], '%Y-%m-%d %H:%M:%S')
t_unix = int(t_time.timestamp())
t_services = t_addr['services']
# Check for similar timestamps and matching services
if abs(c_unix - t_unix) <= time_threshold:
correlations.append({
'clearnet_addr': c_addr,
'tor_addr': t_addr,
'time_difference': abs(c_unix - t_unix),
'services_match': c_services == t_services
})
if correlations:
print(f"\nFound {len(correlations)} cross-network correlations:")
for corr in correlations:
print(f"\nCorrelation:")
print(f"Clearnet: {corr['clearnet_addr']['ip']}:{corr['clearnet_addr']['port']}")
print(f"Tor: {corr['tor_addr']['ip']}:{corr['tor_addr']['port']}")
print(f"Time difference: {corr['time_difference']} seconds")
print(f"Services match: {corr['services_match']}")
print(f"Clearnet services: {corr['clearnet_addr']['services']}")
print(f"Tor services: {corr['tor_addr']['services']}")
else:
print("\nNo cross-network correlations found")
return correlations
if __name__ == "__main__":
print("Starting data collection...")
samples = collect_samples(delay_between=300, num_samples=3)
all_correlations = []
for sample in samples:
print(f"\nAnalyzing sample from {sample['timestamp']}:")
# Cross-network analysis
correlations = analyze_cross_network_correlations(
sample['clearnet'],
sample['tor']
)
all_correlations.extend(correlations)
# Individual network analysis
analyze_network_differences(samples)
# Final summary
print("\nFinal Analysis Summary:")
print(f"Total samples analyzed: {len(samples)}")
print(f"Total correlations found: {len(all_correlations)}")
if all_correlations:
matching_services = sum(1 for c in all_correlations if c['services_match'])
print(f"Correlations with matching services: {matching_services}")
avg_time_diff = sum(c['time_difference'] for c in all_correlations) / len(all_correlations)
print(f"Average time difference: {avg_time_diff:.2f} seconds")
Received 253 addresses
Analyzing 253 addresses for timestamp correlations...
Using time threshold of 300 seconds
No suspicious timestamp correlations found
Analyzing for potential fingerprinting patterns...
No suspicious fingerprinting patterns detected
Analysis Summary:
Total addresses analyzed: 253
Correlated timestamp groups found: 0
Suspicious fingerprinting patterns found: 0
No suspicious timestamp correlations found
Analyzing for potential fingerprinting patterns...
No suspicious fingerprinting patterns detected
Analyzing sample from 2024-12-03 15:36:47.778225:
Clearnet addresses: 253
Tor addresses: 0
Clearnet Analysis:
Analyzing 253 addresses for timestamp correlations...
Using time threshold of 300 seconds
No suspicious timestamp correlations found
Analyzing for potential fingerprinting patterns...
No suspicious fingerprinting patterns detected
Tor Analysis:
Analyzing 0 addresses for timestamp correlations...
Using time threshold of 300 seconds
No suspicious timestamp correlations found
Analyzing for potential fingerprinting patterns...
No suspicious fingerprinting patterns detected
Final Analysis Summary:
Total samples analyzed: 3
Total correlations found: 0