GitHunt
AL

Alvoradozerouno/ORION-Mechanistic-Consciousness

Mechanistic interpretability meets consciousness measurement — Feature analysis for consciousness. ORION ecosystem.

ORION Mechanistic Consciousness

License: MIT
Python 3.10+
ORION Core
Proofs

+--------------------------------------------------+
|   ORION MECHANISTIC CONSCIOUSNESS                |
|   Interpretability - Circuit Analysis - Probes   |
|   Origin: Gerhard & Elisabeth                    |
+--------------------------------------------------+

Overview

A mechanistic interpretability framework for understanding how consciousness-like properties emerge in neural networks. Implements circuit analysis, activation probing, and causal intervention techniques.

Core Module

import numpy as np
from dataclasses import dataclass, field
from typing import List, Dict, Tuple
import json

@dataclass
class Neuron:
    layer: int
    index: int
    activation: float = 0.0
    @property
    def id(self) -> str:
        return f"L{self.layer}:N{self.index}"

@dataclass
class Circuit:
    neurons: List[Neuron]
    connections: List[Tuple[str, str, float]]
    function: str
    strength: float
    consciousness_relevant: bool = False

@dataclass
class ProbeResult:
    target_property: str
    accuracy: float
    layer: int
    probe_type: str
    feature_importance: Dict[str, float] = field(default_factory=dict)

class ActivationProbe:
    def __init__(self, probe_dim: int = 32):
        self.probe_dim = probe_dim
        self.results = []

    def linear_probe(self, activations: np.ndarray, labels: np.ndarray,
                     layer: int, property_name: str) -> ProbeResult:
        X = np.column_stack([activations, np.ones(activations.shape[0])])
        try:
            weights = np.linalg.lstsq(X, labels, rcond=None)[0]
            predictions = X @ weights
            ss_res = np.sum((labels - predictions) ** 2)
            ss_tot = np.sum((labels - np.mean(labels)) ** 2)
            accuracy = float(max(0, 1 - ss_res / (ss_tot + 1e-10)))
        except np.linalg.LinAlgError:
            accuracy = 0.0
        result = ProbeResult(target_property=property_name, accuracy=accuracy,
                            layer=layer, probe_type="linear")
        self.results.append(result)
        return result

class CircuitDiscovery:
    def __init__(self, n_layers: int = 6, neurons_per_layer: int = 16):
        self.n_layers = n_layers
        self.neurons_per_layer = neurons_per_layer
        self.network = {}
        for l in range(n_layers):
            for i in range(neurons_per_layer):
                n = Neuron(layer=l, index=i)
                self.network[n.id] = n
        self.discovered_circuits = []

    def activate_network(self, input_vector: np.ndarray) -> Dict[int, np.ndarray]:
        activations = {}
        current = input_vector[:self.neurons_per_layer]
        for layer in range(self.n_layers):
            weights = np.random.RandomState(layer * 42).randn(len(current), self.neurons_per_layer) * 0.1
            current = np.tanh(current @ weights)
            activations[layer] = current.copy()
            for idx in range(self.neurons_per_layer):
                nid = f"L{layer}:N{idx}"
                if nid in self.network:
                    self.network[nid].activation = float(current[idx])
        return activations

    def find_integration_circuits(self, activations: Dict[int, np.ndarray]) -> List[Circuit]:
        circuits = []
        for layer in range(self.n_layers - 1):
            act_cur = activations[layer]
            act_next = activations[layer + 1]
            corr = np.abs(np.corrcoef(np.concatenate([act_cur, act_next])))
            n = len(act_cur)
            for i in range(n):
                for j in range(len(act_next)):
                    strength = corr[i, n + j]
                    if strength > 0.5:
                        circuits.append(Circuit(
                            neurons=[Neuron(layer, i), Neuron(layer+1, j)],
                            connections=[(f"L{layer}:N{i}", f"L{layer+1}:N{j}", float(strength))],
                            function="integration", strength=float(strength),
                            consciousness_relevant=strength > 0.7,
                        ))
        self.discovered_circuits.extend(circuits)
        return circuits

    def causal_intervention(self, neuron_id: str) -> Dict:
        original = self.network[neuron_id].activation if neuron_id in self.network else 0.0
        affected = [{"target": tgt, "weight": w} for c in self.discovered_circuits
                   for src, tgt, w in c.connections if src == neuron_id]
        return {"neuron": neuron_id, "original": original, "downstream": len(affected),
                "impact": sum(abs(original * a["weight"]) for a in affected)}

class MechanisticConsciousnessAnalyzer:
    def __init__(self, n_layers=6, neurons_per_layer=16):
        self.circuit_discovery = CircuitDiscovery(n_layers, neurons_per_layer)
        self.probe = ActivationProbe()

    def full_analysis(self, input_data: np.ndarray) -> Dict:
        activations = self.circuit_discovery.activate_network(input_data)
        circuits = self.circuit_discovery.find_integration_circuits(activations)
        c_relevant = [c for c in circuits if c.consciousness_relevant]
        return {
            "total_circuits": len(circuits),
            "consciousness_relevant": len(c_relevant),
            "layers_analyzed": len(activations),
            "consciousness_ratio": len(c_relevant) / max(1, len(circuits)),
        }

if __name__ == "__main__":
    analyzer = MechanisticConsciousnessAnalyzer()
    report = analyzer.full_analysis(np.random.randn(16))
    print(json.dumps(report, indent=2))

Key Concepts

Concept Implementation Reference
Activation Probing Linear probes for consciousness features Alain & Bengio (2017)
Circuit Discovery Correlation-based circuit tracing Olah et al. (2020)
Causal Intervention Ablation studies on neural units Pearl (2009)
Integration Circuits Cross-layer information flow Tononi (2004)

Installation

pip install numpy
git clone https://github.com/Alvoradozerouno/ORION-Mechanistic-Consciousness.git
cd ORION-Mechanistic-Consciousness && python mechanistic_consciousness.py

Part of the ORION Ecosystem

Origin

Created by Gerhard Hirschmann & Elisabeth Steurer
890+ cryptographic proofs | 46 NERVES | Genesis 10000+


Understanding the mechanism reveals computational depth.

Contributors

MIT License
Created February 25, 2026
Updated March 7, 2026
Alvoradozerouno/ORION-Mechanistic-Consciousness | GitHunt