smart_config/
pat.rs

1//! String pattern matching.
2//!
3//! # Overview
4//!
5//! In the library, pattern matching is used to initialize delimiters for deserializers, like [`Delimited`]
6//! and [`DelimitedEntries`], and for [validating](crate::validation) string config params. Pattern matching
7//! essentially generalizes the [`Pattern`](std::str::pattern::Pattern) trait from the standard library.
8//! Since the trait itself is unstable, we don't use it directly; instead, we define the [`Split`] trait
9//! and implement it for `&str` and `[char; _]` (via `Pattern`) and for [`Regex`]es from the eponymous crate
10//! (via the [`LazyRegex`] wrapper; see its docs for why this wrapper is needed).
11//!
12//! [`Delimited`]: crate::de::Delimited
13//! [`DelimitedEntries`]: crate::de::DelimitedEntries
14//!
15//! # Examples
16//!
17//! - See [`Delimited`](crate::de::Delimited#examples) and [`DelimitedEntries`](crate::de::DelimitedEntries#examples)
18//!   for the examples of usage of delimiters.
19//! - See the [`validation`](crate::validation) module for the examples of string validation using [`LazyRegex`].
20
21use std::{fmt, ops, sync::LazyLock};
22
23pub use regex::Regex;
24
25/// Human-readable (for people familiar with regexes) representation of a compiled pattern.
26#[doc(hidden)] // not stable yet
27#[non_exhaustive]
28#[derive(Debug, Clone)]
29pub enum PatternDisplay {
30    /// Pattern is an exact string match.
31    Exact(&'static str),
32    /// Pattern is a regular expression conforming to the syntax supported by the `regex` crate.
33    Regex(String),
34    /// Pattern is generic `Debug` representation (e.g., an array of chars).
35    Generic(String),
36}
37
38impl fmt::Display for PatternDisplay {
39    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
40        match self {
41            Self::Exact(s) => write!(formatter, "{s:?}"),
42            Self::Regex(regex) => write!(formatter, "Regex({})", RawStr(regex)),
43            Self::Generic(s) => formatter.write_str(s),
44        }
45    }
46}
47
48/// Wrapper for strings that outputs a string as a raw string literal, like `r"\s+"`.
49#[doc(hidden)] // reused in the `commands` crate; logically private
50#[derive(Clone, Copy)]
51pub struct RawStr<'a>(pub &'a str);
52
53impl fmt::Debug for RawStr<'_> {
54    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
55        fmt::Display::fmt(self, formatter)
56    }
57}
58
59impl fmt::Display for RawStr<'_> {
60    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
61        let hash_count = self.hash_count();
62        write!(formatter, "r")?;
63        for _ in 0..hash_count {
64            write!(formatter, "#")?;
65        }
66        write!(formatter, "\"{}\"", self.0)?;
67        for _ in 0..hash_count {
68            write!(formatter, "#")?;
69        }
70        Ok(())
71    }
72}
73
74impl RawStr<'_> {
75    // Determine the number of necessary `#` for the raw string specifier.
76    fn hash_count(self) -> usize {
77        let has_double_quotes = self.0.chars().any(|ch| ch == '"');
78        if has_double_quotes {
79            let mut max_hashes = 0;
80            let mut hash_start = None;
81            for (i, ch) in self.0.chars().enumerate() {
82                if ch == '#' {
83                    if hash_start.is_none() {
84                        hash_start = Some(i);
85                    }
86                } else if let Some(hash_start) = hash_start.take() {
87                    max_hashes = max_hashes.max(i - hash_start);
88                }
89            }
90            max_hashes + 1
91        } else {
92            0
93        }
94    }
95}
96
97/// Pattern usable for splitting strings. Used in [`Delimited`](crate::de::Delimited)
98/// and [`DelimitedEntries`](crate::de::DelimitedEntries) deserializers.
99///
100/// # Standard implementations
101///
102/// - `&str`: matches a string exactly
103/// - `[char; _]`: matches any of the chars
104/// - [`LazyRegex`]\: matches a regular expression
105pub trait Split: Send + Sync + 'static {
106    /// Splits the given `haystack` at most once from its start. This generalizes [`str::split_once()`].
107    fn split_once<'s>(&self, haystack: &'s str) -> Option<(&'s str, &'s str)>;
108    /// Splits the given `haystack`. This generalizes [`str::split()`].
109    fn split<'s>(&self, haystack: &'s str) -> impl Iterator<Item = &'s str>;
110
111    #[doc(hidden)]
112    fn display(&self) -> PatternDisplay;
113}
114
115impl<const N: usize> Split for [char; N] {
116    fn split_once<'s>(&self, haystack: &'s str) -> Option<(&'s str, &'s str)> {
117        haystack.split_once(self)
118    }
119
120    fn split<'s>(&self, haystack: &'s str) -> impl Iterator<Item = &'s str> {
121        haystack.split(self)
122    }
123
124    fn display(&self) -> PatternDisplay {
125        PatternDisplay::Generic(format!("{self:?}"))
126    }
127}
128
129impl Split for &'static str {
130    fn split_once<'s>(&self, haystack: &'s str) -> Option<(&'s str, &'s str)> {
131        haystack.split_once(self)
132    }
133
134    fn split<'s>(&self, haystack: &'s str) -> impl Iterator<Item = &'s str> {
135        haystack.split(self)
136    }
137
138    fn display(&self) -> PatternDisplay {
139        PatternDisplay::Exact(self)
140    }
141}
142
143/// Transparent wrapper around a type dereferencing to a [`Regex`]. Can be used as [a separator](Split),
144/// or in [param validation](crate::validation).
145///
146/// # Why a separate type?
147///
148/// A separate type is necessary to circumvent orphaning rules. We want to implement [`Split`]
149/// and [`Validate`](crate::validation::Validate) for any type (e.g., [`LazyLock`]) that lazily initializes a `Regex`,
150/// since a `Regex` on its own cannot be initialized in compile time. Similarly, such a type cannot
151/// be dereferenced in compile time, which rules out implementing these traits for `&'static Regex`.
152///
153/// # Examples
154///
155/// The easiest way to initialize a wrapper is the [`lazy_regex!`] macro.
156///
157/// ```
158/// use smart_config::{de::Delimited, pat::{lazy_regex, LazyRegex}};
159/// # use smart_config::{DescribeConfig, DeserializeConfig};
160///
161/// static NAME_REGEX: LazyRegex = lazy_regex!(r"^[a-z][-a-z0-9]*$");
162///
163/// #[derive(DescribeConfig, DeserializeConfig)]
164/// struct TestConfig {
165///     #[config(validate(NAME_REGEX))]
166///     app: String,
167///     // The macro also can be inlined!
168///     #[config(with = Delimited::new(lazy_regex!(ref r"\s*,\s*")))]
169///     numbers: Vec<u64>,
170/// }
171/// ```
172pub struct LazyRegex<T = LazyLock<Regex>>(pub T);
173
174/// Creates a [`LazyRegex`].
175///
176/// - If supplied a string literal, it will create [`LazyRegex`] from it.
177/// - If the literal is prepended with `ref`, this will create a private static and reference it
178///   (i.e., return `&'static LazyRegex`). This is useful for single-use regexes inlined into `config` attributes.
179///
180/// # Examples
181///
182/// See [`LazyRegex` docs](LazyRegex#examples) for the examples of usage.
183#[macro_export]
184macro_rules! lazy_regex {
185    ($regex:tt) => {
186        $crate::pat::LazyRegex(::std::sync::LazyLock::new(|| {
187            $crate::pat::Regex::new($regex).unwrap()
188        }))
189    };
190    (ref $regex:tt) => {{
191        static __REGEX: $crate::pat::LazyRegex = $crate::pat::lazy_regex!($regex);
192        const { &__REGEX }
193    }};
194}
195
196pub use lazy_regex;
197
198impl<T: fmt::Debug> fmt::Debug for LazyRegex<T> {
199    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
200        fmt::Debug::fmt(&self.0, formatter)
201    }
202}
203
204impl<T> Split for &'static LazyRegex<T>
205where
206    T: ops::Deref<Target = Regex> + Send + Sync,
207{
208    fn split_once<'s>(&self, haystack: &'s str) -> Option<(&'s str, &'s str)> {
209        let mut it = self.0.splitn(haystack, 2);
210        let head = it.next()?;
211        let tail = it.next()?;
212        Some((head, tail))
213    }
214
215    fn split<'s>(&self, haystack: &'s str) -> impl Iterator<Item = &'s str> {
216        Regex::split(&self.0, haystack)
217    }
218
219    fn display(&self) -> PatternDisplay {
220        PatternDisplay::Regex(self.0.as_str().to_owned())
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn hash_count_for_raw_strings_is_correct() {
230        let s = RawStr("Hello, world!");
231        assert_eq!(s.hash_count(), 0);
232        assert_eq!(s.to_string(), "r\"Hello, world!\"");
233
234        let s = RawStr("####");
235        assert_eq!(RawStr("####").hash_count(), 0);
236        assert_eq!(s.to_string(), "r\"####\"");
237
238        let s = RawStr(r#"x="1""#);
239        assert_eq!(s.hash_count(), 1);
240        assert_eq!(s.to_string(), "r#\"x=\"1\"\"#");
241
242        let s = RawStr(r##"x="#1""##);
243        assert_eq!(s.hash_count(), 2);
244        assert_eq!(s.to_string(), "r##\"x=\"#1\"\"##");
245    }
246}