joule_common/
symbol.rs

1//! String interning
2//!
3//! Symbols are interned strings used for identifiers, keywords, and other
4//! frequently-used strings. Interning reduces memory usage and allows O(1)
5//! equality comparison.
6
7use indexmap::IndexSet;
8use serde::{Deserialize, Serialize};
9use std::fmt;
10use std::sync::{OnceLock, RwLock};
11
12/// An interned string
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
14pub struct Symbol(u32);
15
16/// Global interner instance
17static GLOBAL_INTERNER: OnceLock<Interner> = OnceLock::new();
18
19impl Symbol {
20    /// Create a symbol from a raw ID (use with caution!)
21    pub const fn from_u32(id: u32) -> Self {
22        Self(id)
23    }
24
25    /// Get the raw ID
26    pub const fn as_u32(self) -> u32 {
27        self.0
28    }
29
30    /// Intern a string using the global interner
31    pub fn intern(s: &str) -> Self {
32        let interner = GLOBAL_INTERNER.get_or_init(Interner::new);
33        interner.intern(s)
34    }
35
36    /// Resolve this symbol to its string representation
37    pub fn as_str(&self) -> String {
38        let interner = GLOBAL_INTERNER.get_or_init(Interner::new);
39        interner
40            .resolve(*self)
41            .unwrap_or_else(|| format!("Symbol({})", self.0))
42    }
43}
44
45impl fmt::Display for Symbol {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        write!(f, "{}", self.as_str())
48    }
49}
50
51/// String interner
52///
53/// This is a thread-safe string interner that assigns unique IDs to strings.
54/// Strings are never removed (for simplicity and performance).
55pub struct Interner {
56    strings: RwLock<IndexSet<String>>,
57}
58
59impl Interner {
60    /// Create a new interner
61    pub fn new() -> Self {
62        Self {
63            strings: RwLock::new(IndexSet::new()),
64        }
65    }
66
67    /// Intern a string, returning its symbol
68    pub fn intern(&self, s: &str) -> Symbol {
69        // Fast path: check if already interned (read lock)
70        {
71            let strings = self.strings.read().unwrap();
72            if let Some(index) = strings.get_index_of(s) {
73                return Symbol(index as u32);
74            }
75        }
76
77        // Slow path: insert new string (write lock)
78        let mut strings = self.strings.write().unwrap();
79        let (index, _) = strings.insert_full(s.to_string());
80        Symbol(index as u32)
81    }
82
83    /// Get the string for a symbol (returns None if invalid)
84    pub fn resolve(&self, symbol: Symbol) -> Option<String> {
85        let strings = self.strings.read().unwrap();
86        strings.get_index(symbol.0 as usize).cloned()
87    }
88
89    /// Get a reference to the string for a symbol
90    pub fn get(&self, symbol: Symbol) -> Option<String> {
91        self.resolve(symbol)
92    }
93
94    /// Get the number of interned strings
95    pub fn len(&self) -> usize {
96        self.strings.read().unwrap().len()
97    }
98
99    /// Check if the interner is empty
100    pub fn is_empty(&self) -> bool {
101        self.len() == 0
102    }
103}
104
105impl Default for Interner {
106    fn default() -> Self {
107        Self::new()
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn test_intern() {
117        let interner = Interner::new();
118        let sym1 = interner.intern("hello");
119        let sym2 = interner.intern("world");
120        let sym3 = interner.intern("hello");
121
122        assert_eq!(sym1, sym3); // Same string => same symbol
123        assert_ne!(sym1, sym2); // Different strings => different symbols
124    }
125
126    #[test]
127    fn test_resolve() {
128        let interner = Interner::new();
129        let sym = interner.intern("hello");
130        assert_eq!(interner.resolve(sym), Some("hello".to_string()));
131    }
132
133    #[test]
134    fn test_thread_safety() {
135        use std::sync::Arc;
136        use std::thread;
137
138        let interner = Arc::new(Interner::new());
139        let handles: Vec<_> = (0..10)
140            .map(|i| {
141                let interner = Arc::clone(&interner);
142                thread::spawn(move || {
143                    interner.intern(&format!("string_{}", i % 3));
144                })
145            })
146            .collect();
147
148        for handle in handles {
149            handle.join().unwrap();
150        }
151
152        // Should have 3 unique strings
153        assert_eq!(interner.len(), 3);
154    }
155}