smart_config/source/
mod.rs

1use std::{
2    any,
3    collections::{BTreeMap, HashSet},
4    iter,
5    marker::PhantomData,
6    sync::Arc,
7};
8
9pub use self::{env::Environment, json::Json, yaml::Yaml};
10use crate::{
11    DescribeConfig, DeserializeConfig, DeserializeConfigError, ParseError, ParseErrors,
12    de::{DeserializeContext, DeserializerOptions},
13    fallback::Fallbacks,
14    metadata::{BasicTypes, ConfigTag, ConfigVariant, TypeSuffixes},
15    schema::{ConfigData, ConfigRef, ConfigSchema},
16    utils::{EnumVariant, JsonObject, merge_json},
17    value::{Map, Pointer, Value, ValueOrigin, WithOrigin},
18    visit::Serializer,
19};
20
21#[macro_use]
22mod macros;
23mod env;
24mod json;
25#[cfg(test)]
26mod tests;
27mod yaml;
28
29/// Kind of a [`ConfigSource`].
30pub trait ConfigSourceKind: crate::utils::Sealed {
31    #[doc(hidden)] // implementation detail
32    const IS_FLAT: bool;
33}
34
35/// Marker for hierarchical configuration sources (e.g. JSON or YAML files).
36#[derive(Debug)]
37pub struct Hierarchical(());
38
39impl crate::utils::Sealed for Hierarchical {}
40impl ConfigSourceKind for Hierarchical {
41    const IS_FLAT: bool = false;
42}
43
44/// Marker for key–value / flat configuration sources (e.g., env variables or command-line args).
45#[derive(Debug)]
46pub struct Flat(());
47
48impl crate::utils::Sealed for Flat {}
49impl ConfigSourceKind for Flat {
50    const IS_FLAT: bool = true;
51}
52
53/// Source of configuration parameters that can be added to a [`ConfigRepository`].
54pub trait ConfigSource {
55    /// Kind of the source.
56    type Kind: ConfigSourceKind;
57    /// Converts this source into config contents.
58    fn into_contents(self) -> WithOrigin<Map>;
59}
60
61/// Wraps a hierarchical source into a prefix.
62#[derive(Debug, Clone)]
63pub struct Prefixed<T> {
64    inner: T,
65    prefix: String,
66}
67
68impl<T: ConfigSource<Kind = Hierarchical>> Prefixed<T> {
69    /// Wraps the provided source.
70    pub fn new(inner: T, prefix: impl Into<String>) -> Self {
71        Self {
72            inner,
73            prefix: prefix.into(),
74        }
75    }
76}
77
78impl<T: ConfigSource<Kind = Hierarchical>> ConfigSource for Prefixed<T> {
79    type Kind = Hierarchical;
80
81    fn into_contents(self) -> WithOrigin<Map> {
82        let contents = self.inner.into_contents();
83
84        let origin = Arc::new(ValueOrigin::Synthetic {
85            source: contents.origin.clone(),
86            transform: format!("prefixed with `{}`", self.prefix),
87        });
88
89        if let Some((parent, key_in_parent)) = Pointer(&self.prefix).split_last() {
90            let mut root = WithOrigin::new(Value::Object(Map::new()), origin.clone());
91            root.ensure_object(parent, |_| origin.clone())
92                .insert(key_in_parent.to_owned(), contents.map(Value::Object));
93            root.map(|value| match value {
94                Value::Object(map) => map,
95                _ => unreachable!(), // guaranteed by `ensure_object`
96            })
97        } else {
98            contents
99        }
100    }
101}
102
103/// Prioritized list of configuration sources. Can be used to push multiple sources at once
104/// into a [`ConfigRepository`].
105#[derive(Debug, Clone, Default)]
106pub struct ConfigSources {
107    inner: Vec<(WithOrigin<Map>, bool)>,
108}
109
110impl ConfigSources {
111    /// Pushes a configuration source at the end of the list.
112    pub fn push<S: ConfigSource>(&mut self, source: S) {
113        self.inner
114            .push((source.into_contents(), <S::Kind>::IS_FLAT));
115    }
116}
117
118/// Information about a source returned from [`ConfigRepository::sources()`].
119#[derive(Debug, Clone)]
120#[non_exhaustive]
121pub struct SourceInfo {
122    /// Origin of the source.
123    pub origin: Arc<ValueOrigin>,
124    /// Number of params in the source after it has undergone preprocessing (i.e., merging aliases etc.).
125    pub param_count: usize,
126}
127
128/// Configuration serialization options.
129#[derive(Debug, Clone, Default)]
130pub struct SerializerOptions {
131    pub(crate) diff_with_default: bool,
132    pub(crate) secret_placeholder: Option<String>,
133    pub(crate) flat: bool,
134}
135
136impl SerializerOptions {
137    /// Will serialize only params with values differing from the default value.
138    pub fn diff_with_default() -> Self {
139        Self {
140            diff_with_default: true,
141            secret_placeholder: None,
142            flat: false,
143        }
144    }
145
146    /// Use flat config structure, as opposed to the default hierarchical one.
147    ///
148    /// In the flat structure, all params are placed in a single JSON object with full dot-separated param paths
149    /// (e.g., `api.http.port`) used as keys. Because param serializations can still be objects or arrays,
150    /// the produced object may not be completely flat.
151    ///
152    /// Use
153    #[must_use]
154    pub fn flat(mut self, flat: bool) -> Self {
155        self.flat = flat;
156        self
157    }
158
159    /// Sets the placeholder string value for secret params. By default, secrets will be output as-is.
160    #[must_use]
161    pub fn with_secret_placeholder(mut self, placeholder: impl Into<String>) -> Self {
162        self.secret_placeholder = Some(placeholder.into());
163        self
164    }
165
166    /// Serializes a config to JSON, recursively visiting its nested configs.
167    pub fn serialize<C: DescribeConfig>(self, config: &C) -> JsonObject {
168        let mut visitor = Serializer::new(&C::DESCRIPTION, "", self);
169        config.visit_config(&mut visitor);
170        visitor.into_inner()
171    }
172}
173
174/// Configuration repository containing zero or more [configuration sources](ConfigSource).
175/// Sources are preprocessed and merged according to the provided [`ConfigSchema`].
176///
177/// # Merging sources
178///
179/// [`Self::with()`] merges a new source into this repo. The new source has higher priority and will overwrite
180/// values defined in old sources, including via parameter aliases.
181///
182/// # Type coercion
183///
184/// When processing [`ConfigSource`]s, values can be *coerced* depending on the [expected type](BasicTypes)
185/// at the corresponding location [as indicated](crate::de::DeserializeParam::EXPECTING) by the param deserializer.
186/// Currently, coercion only happens if the original value is a string.
187///
188/// - If the expected type is [`BasicTypes::INTEGER`], [`BasicTypes::FLOAT`], or [`BasicTypes::BOOL`],
189///   the number / Boolean is [parsed](str::parse()) from the string. If parsing succeeds, the value is replaced.
190///
191/// Coercion is not performed if the param deserializer doesn't specify an expected type.
192///
193/// This means that it's possible to supply values for structured params from env vars without much hassle:
194///
195/// ```rust
196/// # use std::collections::HashMap;
197/// use smart_config::{testing, DescribeConfig, DeserializeConfig, Environment};
198///
199/// #[derive(Debug, DescribeConfig, DeserializeConfig)]
200/// struct CoercingConfig {
201///     flag: bool,
202///     ints: Vec<u64>,
203///     map: HashMap<String, u32>,
204/// }
205///
206/// let mut env = Environment::from_iter("APP_", [
207///     ("APP_FLAG", "true"),
208///     ("APP_INTS__JSON", "[2, 3, 5]"),
209///     ("APP_MAP__JSON", r#"{ "value": 5 }"#),
210/// ]);
211/// // Coerce `__json`-suffixed env vars to JSON
212/// env.coerce_json()?;
213/// // `testing` functions create a repository internally
214/// let config: CoercingConfig = testing::test(env)?;
215/// assert!(config.flag);
216/// assert_eq!(config.ints, [2, 3, 5]);
217/// assert_eq!(config.map, HashMap::from([("value".into(), 5)]));
218/// # anyhow::Ok(())
219/// ```
220///
221/// # Other preprocessing
222///
223/// Besides type coercion, sources undergo a couple of additional transforms:
224///
225/// - **Garbage collection:** All values not corresponding to params or their ancestor objects
226///   are removed.
227/// - **Hiding secrets:** Values corresponding to [secret params](crate::de#secrets) are wrapped in
228///   opaque, zero-on-drop wrappers.
229#[derive(Debug, Clone)]
230pub struct ConfigRepository<'a> {
231    schema: &'a ConfigSchema,
232    prefixes_for_canonical_configs: HashSet<Pointer<'a>>,
233    de_options: DeserializerOptions,
234    sources: Vec<SourceInfo>,
235    merged: WithOrigin,
236}
237
238impl<'a> ConfigRepository<'a> {
239    /// Creates an empty config repo based on the provided schema.
240    pub fn new(schema: &'a ConfigSchema) -> Self {
241        let prefixes_for_canonical_configs: HashSet<_> = schema
242            .iter_ll()
243            .flat_map(|(path, _)| path.with_ancestors())
244            .chain([Pointer("")])
245            .collect();
246
247        let this = Self {
248            schema,
249            prefixes_for_canonical_configs,
250            de_options: DeserializerOptions::default(),
251            sources: vec![],
252            merged: WithOrigin {
253                inner: Value::Object(Map::default()),
254                origin: Arc::default(),
255            },
256        };
257        if let Some(fallbacks) = Fallbacks::new(schema) {
258            this.with(fallbacks)
259        } else {
260            this
261        }
262    }
263
264    /// Returns the wrapped configuration schema.
265    pub fn schema(&self) -> &'a ConfigSchema {
266        self.schema
267    }
268
269    /// Accesses options used during `serde`-powered deserialization.
270    pub fn deserializer_options(&mut self) -> &mut DeserializerOptions {
271        &mut self.de_options
272    }
273
274    /// Extends this environment with a new configuration source.
275    #[must_use]
276    pub fn with<S: ConfigSource>(mut self, source: S) -> Self {
277        self.insert_inner(source.into_contents(), <S::Kind>::IS_FLAT);
278        self
279    }
280
281    #[tracing::instrument(
282        level = "debug",
283        name = "ConfigRepository::insert",
284        skip(self, contents)
285    )]
286    fn insert_inner(&mut self, contents: WithOrigin<Map>, is_flat: bool) {
287        let mut source_value = if is_flat {
288            WithOrigin::nest_kvs(contents.inner, self.schema, &contents.origin)
289        } else {
290            WithOrigin {
291                inner: Value::Object(contents.inner),
292                origin: contents.origin.clone(),
293            }
294        };
295
296        let param_count =
297            source_value.preprocess_source(self.schema, &self.prefixes_for_canonical_configs);
298        tracing::debug!(param_count, "Inserted source into config repo");
299        self.merged
300            .guided_merge(source_value, self.schema, Pointer(""));
301        self.sources.push(SourceInfo {
302            origin: contents.origin,
303            param_count,
304        });
305    }
306
307    ///  Extends this environment with a multiple configuration sources.
308    #[must_use]
309    pub fn with_all(mut self, sources: ConfigSources) -> Self {
310        for (contents, is_flat) in sources.inner {
311            self.insert_inner(contents, is_flat);
312        }
313        self
314    }
315
316    /// Provides information about sources merged in this repository.
317    pub fn sources(&self) -> &[SourceInfo] {
318        &self.sources
319    }
320
321    #[doc(hidden)] // not stable yet
322    pub fn merged(&self) -> &WithOrigin {
323        &self.merged
324    }
325
326    /// Returns canonical JSON for all configurations contained in the schema, with values filled both from the contained sources
327    /// and from defaults.
328    ///
329    /// This method differs from [`Self::merged()`] by taking defaults into account.
330    ///
331    /// # Errors
332    ///
333    /// If parsing any of the configs in the schema fails, returns parsing errors early (i.e., errors are **not** exhaustive).
334    /// Importantly, missing config / parameter errors are swallowed provided this is the only kind of errors for the config,
335    /// and the corresponding config serialization is skipped.
336    #[doc(hidden)] // not stable yet
337    pub fn canonicalize(&self, options: &SerializerOptions) -> Result<JsonObject, ParseErrors> {
338        let mut json = serde_json::Map::new();
339        for config_parser in self.iter() {
340            if !config_parser.config().is_top_level() {
341                // The config should be serialized as a part of the parent config.
342                continue;
343            }
344
345            let parsed = match config_parser.parse_opt() {
346                Ok(Some(config)) => config,
347                Ok(None) => continue,
348                Err(err) => return Err(err),
349            };
350
351            let metadata = config_parser.config().metadata();
352            let prefix = config_parser.config().prefix();
353            let mut visitor = Serializer::new(metadata, prefix, options.clone());
354            (metadata.visitor)(parsed.as_ref(), &mut visitor);
355            let serialized = visitor.into_inner();
356
357            if options.flat {
358                json.extend(serialized);
359            } else {
360                merge_json(&mut json, metadata, prefix, serialized);
361            }
362        }
363        Ok(json)
364    }
365
366    /// Iterates over parsers for all configs in the schema.
367    pub fn iter(&self) -> impl Iterator<Item = ConfigParser<'_, ()>> + '_ {
368        self.schema.iter().map(|config_ref| ConfigParser {
369            repo: self,
370            config_ref,
371            _config: PhantomData,
372        })
373    }
374
375    /// Returns a parser for the single configuration of the specified type.
376    ///
377    /// # Errors
378    ///
379    /// Errors if the config is not a part of the schema or is mounted to multiple locations.
380    pub fn single<C: DeserializeConfig>(&self) -> anyhow::Result<ConfigParser<'_, C>> {
381        let config_ref = self.schema.single(&C::DESCRIPTION)?;
382        Ok(ConfigParser {
383            repo: self,
384            config_ref,
385            _config: PhantomData,
386        })
387    }
388
389    /// Gets a parser for a configuration of the specified type mounted at the canonical `prefix`.
390    /// If the config is not present at `prefix`, returns `None`.
391    pub fn get<'s, C: DeserializeConfig>(&'s self, prefix: &'s str) -> Option<ConfigParser<'s, C>> {
392        let config_ref = self.schema.get(&C::DESCRIPTION, prefix)?;
393        Some(ConfigParser {
394            repo: self,
395            config_ref,
396            _config: PhantomData,
397        })
398    }
399}
400
401/// Parser of configuration input in a [`ConfigRepository`].
402#[derive(Debug)]
403pub struct ConfigParser<'a, C> {
404    repo: &'a ConfigRepository<'a>,
405    config_ref: ConfigRef<'a>,
406    _config: PhantomData<C>,
407}
408
409impl ConfigParser<'_, ()> {
410    /// Attempts to parse the related config from the repository input. Returns the boxed parsed config.
411    ///
412    /// # Errors
413    ///
414    /// Returns parsing errors if any.
415    #[doc(hidden)] // not stable yet
416    #[allow(clippy::redundant_closure_for_method_calls)] // false positive because of lifetimes
417    pub fn parse(&self) -> Result<Box<dyn any::Any>, ParseErrors> {
418        self.with_context(|ctx| ctx.deserialize_any_config())
419    }
420
421    /// Attempts to parse an optional config from the repository input. Returns the boxed parsed config.
422    /// If there's no data for the config, returns `Ok(None)`. This includes the case when some required params are missing,
423    /// and this is the only type of errors encountered.
424    ///
425    /// # Errors
426    ///
427    /// Returns parsing errors if any.
428    #[doc(hidden)] // not stable yet
429    #[allow(clippy::redundant_closure_for_method_calls)] // false positive because of lifetimes
430    pub fn parse_opt(&self) -> Result<Option<Box<dyn any::Any>>, ParseErrors> {
431        self.with_context(|ctx| ctx.deserialize_any_config_opt())
432    }
433}
434
435impl<'a, C> ConfigParser<'a, C> {
436    /// Returns a reference to the configuration.
437    pub fn config(&self) -> ConfigRef<'a> {
438        self.config_ref
439    }
440
441    fn with_context<R>(
442        &self,
443        action: impl FnOnce(DeserializeContext<'_>) -> Result<R, DeserializeConfigError>,
444    ) -> Result<R, ParseErrors> {
445        let mut errors = ParseErrors::default();
446        let prefix = self.config_ref.prefix();
447        let metadata = self.config_ref.data.metadata;
448        let ctx = DeserializeContext::new(
449            &self.repo.de_options,
450            &self.repo.merged,
451            prefix.to_owned(),
452            metadata,
453            &mut errors,
454        );
455        action(ctx).map_err(|_| {
456            if errors.len() == 0 {
457                errors.push(ParseError::generic(prefix.to_owned(), metadata));
458            }
459            errors
460        })
461    }
462}
463
464impl<C: DeserializeConfig> ConfigParser<'_, C> {
465    /// Performs parsing.
466    ///
467    /// # Errors
468    ///
469    /// Returns errors encountered during parsing. This list of errors is as full as possible (i.e.,
470    /// there is no short-circuiting on encountering an error).
471    #[allow(clippy::redundant_closure_for_method_calls)] // doesn't work as an fn pointer because of the context lifetime
472    pub fn parse(self) -> Result<C, ParseErrors> {
473        self.with_context(|ctx| ctx.deserialize_config::<C>())
474    }
475
476    /// Parses an optional config. Returns `None` if the config object is not present (i.e., none of the config params / sub-configs
477    /// are set); otherwise, tries to perform parsing.
478    ///
479    /// # Errors
480    ///
481    /// Returns errors encountered during parsing.
482    #[allow(clippy::redundant_closure_for_method_calls)] // doesn't work as an fn pointer because of the context lifetime
483    pub fn parse_opt(self) -> Result<Option<C>, ParseErrors> {
484        self.with_context(|ctx| ctx.deserialize_config_opt::<C>())
485    }
486}
487
488impl WithOrigin {
489    fn preprocess_source(
490        &mut self,
491        schema: &ConfigSchema,
492        prefixes_for_canonical_configs: &HashSet<Pointer<'_>>,
493    ) -> usize {
494        self.copy_aliased_values(schema);
495        self.mark_secrets(schema);
496        self.convert_serde_enums(schema);
497        self.nest_object_params_and_sub_configs(schema);
498        self.nest_array_params(schema);
499        self.collect_garbage(schema, prefixes_for_canonical_configs, Pointer(""))
500    }
501
502    #[tracing::instrument(level = "debug", skip_all)]
503    fn copy_aliased_values(&mut self, schema: &ConfigSchema) {
504        for (prefix, config_data) in schema.iter_ll() {
505            let (new_values, new_map_origin) = self.copy_aliases_for_config(config_data);
506            if new_values.is_empty() {
507                continue;
508            }
509
510            let new_map_origin = new_map_origin.map(|source| {
511                Arc::new(ValueOrigin::Synthetic {
512                    source,
513                    transform: format!("copy to '{prefix}' per aliasing rules"),
514                })
515            });
516            // `unwrap()` below is safe: if there is no `current_map`, `new_values` are obtained from the alias maps,
517            // meaning that `new_map_origin` has been set.
518            self.ensure_object(prefix, |_| new_map_origin.clone().unwrap())
519                .extend(new_values);
520        }
521    }
522
523    #[must_use = "returned map should be inserted into the config"]
524    fn copy_aliases_for_config(&self, config: &ConfigData) -> (Map, Option<Arc<ValueOrigin>>) {
525        let prefix = config.prefix();
526        let canonical_map = match self.get(prefix).map(|val| &val.inner) {
527            Some(Value::Object(map)) => Some(map),
528            Some(_) => {
529                tracing::warn!(
530                    prefix = prefix.0,
531                    config = ?config.metadata.ty,
532                    "canonical config location contains a non-object"
533                );
534                return (Map::new(), None);
535            }
536            None => None,
537        };
538
539        let mut new_values = Map::new();
540        let mut new_map_origin = None;
541
542        for param in config.metadata.params {
543            // Create a prioritized iterator of all candidate paths
544            let all_paths = config.all_paths_for_param(param);
545
546            for (path, alias_options) in all_paths {
547                let (prefix, name) = Pointer(&path)
548                    .split_last()
549                    .expect("param paths are never empty");
550                let Some(map) = self.get(prefix) else {
551                    continue;
552                };
553                let map_origin = &map.origin;
554                let Some(map) = map.inner.as_object() else {
555                    continue;
556                };
557
558                // Find all values in `map` that either match `name` exactly, or have the `{name}_{type_suffix}` form.
559                let matching_values: Vec<_> =
560                    if let Some(suffixes) = param.type_description().suffixes() {
561                        let matching_values = map.iter().filter_map(|(key, val)| {
562                            let suffix = if key == name {
563                                None // Exact match
564                            } else {
565                                let key_suffix = Self::strip_prefix(key, name)?;
566                                if !suffixes.contains(key_suffix) {
567                                    return None;
568                                }
569                                Some(key_suffix)
570                            };
571                            Some((suffix, val))
572                        });
573                        matching_values.collect()
574                    } else if let Some(val) = map.get(name) {
575                        // Shortcut: we only need to check the exact param name if no suffixes are defined by the param deserializer.
576                        vec![(None, val)]
577                    } else {
578                        vec![]
579                    };
580
581                // Copy the found values.
582                for (suffix, val) in matching_values {
583                    let canonical_key_string;
584                    let canonical_key = if let Some(suffix) = suffix {
585                        canonical_key_string = format!("{}_{suffix}", param.name);
586                        &canonical_key_string
587                    } else {
588                        param.name
589                    };
590
591                    if canonical_map.is_some_and(|map| map.contains_key(canonical_key)) {
592                        // Key is already present in the original map
593                        continue;
594                    }
595
596                    if !new_values.contains_key(canonical_key) {
597                        if alias_options.is_deprecated {
598                            tracing::warn!(
599                                path,
600                                origin = %val.origin,
601                                config = ?config.metadata.ty,
602                                param = param.rust_field_name,
603                                canonical_path = prefix.join(canonical_key),
604                                "using deprecated alias; please use canonical_path instead"
605                            );
606                        }
607
608                        tracing::trace!(
609                            prefix = prefix.0,
610                            config = ?config.metadata.ty,
611                            param = param.rust_field_name,
612                            name,
613                            origin = ?map_origin,
614                            canonical_key,
615                            "copied aliased param"
616                        );
617                        new_values.insert(canonical_key.to_owned(), val.clone());
618                        if new_map_origin.is_none() {
619                            new_map_origin = Some(map_origin.clone());
620                        }
621                    }
622                }
623            }
624        }
625
626        (new_values, new_map_origin)
627    }
628
629    fn strip_prefix<'s>(s: &'s str, prefix: &str) -> Option<&'s str> {
630        s.strip_prefix(prefix)?
631            .strip_prefix('_')
632            .filter(|suffix| !suffix.is_empty())
633    }
634
635    /// Wraps secret string values into `Value::SecretString(_)`.
636    fn mark_secrets(&mut self, schema: &ConfigSchema) {
637        for (prefix, config_data) in schema.iter_ll() {
638            let Some(Self {
639                inner: Value::Object(config_object),
640                ..
641            }) = self.get_mut(prefix)
642            else {
643                continue;
644            };
645
646            for param in config_data.metadata.params {
647                if !param.type_description().contains_secrets() {
648                    continue;
649                }
650                let Some(value) = config_object.get_mut(param.name) else {
651                    continue;
652                };
653
654                if let Value::String(str) = &mut value.inner {
655                    tracing::trace!(
656                        prefix = prefix.0,
657                        config = ?config_data.metadata.ty,
658                        param = param.rust_field_name,
659                        "marked param as secret"
660                    );
661                    str.make_secret();
662                } else {
663                    tracing::warn!(
664                        prefix = prefix.0,
665                        config = ?config_data.metadata.ty,
666                        param = param.rust_field_name,
667                        "param marked as secret has non-string value"
668                    );
669                }
670            }
671        }
672    }
673
674    #[tracing::instrument(level = "debug", skip_all)]
675    fn convert_serde_enums(&mut self, schema: &ConfigSchema) {
676        for config_data in schema.iter() {
677            let config_meta = config_data.metadata();
678            let prefix = Pointer(config_data.prefix());
679
680            let Some(tag) = &config_meta.tag else {
681                continue; // Not an enum config, nothing to do.
682            };
683            if !config_data.data.coerce_serde_enums {
684                continue;
685            }
686
687            let canonical_map = self.get(prefix).and_then(|val| val.inner.as_object());
688            let alias_maps = config_data
689                .aliases()
690                .filter_map(|(alias, _)| self.get(Pointer(alias))?.inner.as_object());
691
692            if canonical_map.is_some_and(|map| map.contains_key(tag.param.name)) {
693                // The source contains the relevant tag. It's sufficient to check the canonical map only since we've performed de-aliasing for tags already.
694                continue;
695            }
696
697            let _span_guard = tracing::info_span!(
698                "convert_serde_enum",
699                config = ?config_meta.ty,
700                prefix = prefix.0,
701                tag = tag.param.name,
702            )
703            .entered();
704
705            if let Some((variant, variant_content)) =
706                Self::detect_serde_enum_variant(canonical_map, alias_maps, tag)
707            {
708                tracing::debug!(
709                    variant = variant.name,
710                    origin = %variant_content.origin,
711                    "adding detected tag variant"
712                );
713                let origin = ValueOrigin::Synthetic {
714                    source: variant_content.origin.clone(),
715                    transform: "coercing serde enum".to_owned(),
716                };
717
718                let canonical_map = self.ensure_object(prefix, |_| {
719                    Arc::new(ValueOrigin::Synthetic {
720                        source: Arc::default(),
721                        transform: "enum coercion".to_string(),
722                    })
723                });
724                canonical_map.insert(
725                    tag.param.name.to_owned(),
726                    WithOrigin::new(variant.name.to_owned().into(), Arc::new(origin)),
727                );
728            }
729        }
730    }
731
732    fn detect_serde_enum_variant<'a>(
733        canonical_map: Option<&'a Map>,
734        alias_maps: impl Iterator<Item = &'a Map>,
735        tag: &'static ConfigTag,
736    ) -> Option<(&'static ConfigVariant, &'a Self)> {
737        let all_variant_names = tag.variants.iter().flat_map(|variant| {
738            iter::once(variant.name)
739                .chain(variant.aliases.iter().copied())
740                .filter_map(move |name| Some((EnumVariant::new(name)?.to_snake_case(), variant)))
741        });
742
743        // We need to look for variant fields in the alias maps because they were not copied during de-aliasing.
744        let mut variant_match = None;
745        for map in canonical_map.into_iter().chain(alias_maps) {
746            for (candidate_field_name, variant) in all_variant_names.clone() {
747                if map.contains_key(&candidate_field_name) {
748                    if let Some((_, prev_field, _)) = &variant_match {
749                        if *prev_field != candidate_field_name {
750                            tracing::info!(
751                                prev_field,
752                                field = candidate_field_name,
753                                "multiple serde-like variant fields present"
754                            );
755                            return None;
756                        }
757                    }
758                    variant_match = Some((map, candidate_field_name, variant));
759                }
760            }
761        }
762
763        let Some((map, field_name, variant)) = variant_match else {
764            return None; // No matches found
765        };
766        let variant_content = map.get(&field_name).unwrap();
767        if !matches!(&variant_content.inner, Value::Object(_)) {
768            tracing::info!(
769                field = field_name,
770                "variant contents is not an object, skipping"
771            );
772            return None;
773        }
774        Some((variant, variant_content))
775    }
776
777    /// Removes all values that do not correspond to canonical params or their ancestors.
778    fn collect_garbage(
779        &mut self,
780        schema: &ConfigSchema,
781        prefixes_for_canonical_configs: &HashSet<Pointer<'_>>,
782        at: Pointer<'_>,
783    ) -> usize {
784        if schema.contains_canonical_param(at) {
785            1
786        } else if prefixes_for_canonical_configs.contains(&at) {
787            if let Value::Object(map) = &mut self.inner {
788                let mut count = 0;
789                map.retain(|key, value| {
790                    let child_path = at.join(key);
791                    let descendant_count = value.collect_garbage(
792                        schema,
793                        prefixes_for_canonical_configs,
794                        Pointer(&child_path),
795                    );
796                    count += descendant_count;
797                    descendant_count > 0
798                });
799                count
800            } else {
801                // Retain a (probably erroneous) non-object value at config location to provide more intelligent errors.
802                1
803            }
804        } else {
805            // The object is neither a param nor a config or a config ancestor; remove it.
806            0
807        }
808    }
809
810    /// Nests values inside matching object params that have defined suffixes, or nested configs.
811    ///
812    /// For example, we have an object param at `test.param` and a source with a value at `test.param_ms`.
813    /// This transform will copy this value to `test.param.ms` (i.e., inside the param object), provided that
814    /// the source doesn't contain `test.param` or contains an object at this path.
815    #[tracing::instrument(level = "debug", skip_all)]
816    fn nest_object_params_and_sub_configs(&mut self, schema: &ConfigSchema) {
817        for (prefix, config_data) in schema.iter_ll() {
818            let Some(config_object) = self.get_mut(prefix) else {
819                continue;
820            };
821            let config_origin = &config_object.origin;
822            let Value::Object(config_object) = &mut config_object.inner else {
823                continue;
824            };
825
826            let params_with_suffixes = config_data.metadata.params.iter().filter_map(|param| {
827                let suffixes = param.type_description().suffixes()?;
828                Some((param.name, suffixes))
829            });
830            let nested_configs = config_data
831                .metadata
832                .nested_configs
833                .iter()
834                .filter_map(|nested| {
835                    (!nested.name.is_empty()).then_some((nested.name, TypeSuffixes::All))
836                });
837            let mut insertions = vec![];
838
839            // ms: $value -> $value // suffix: 'ms'
840
841            for (child_name, suffixes) in params_with_suffixes.chain(nested_configs) {
842                let target_object = match config_object.get(child_name) {
843                    None => None,
844                    Some(WithOrigin {
845                        inner: Value::Object(obj),
846                        ..
847                    }) => Some(obj),
848                    // Never overwrite non-objects with an object value.
849                    Some(_) => continue,
850                };
851
852                let matching_fields: Vec<_> = config_object
853                    .iter()
854                    .filter_map(|(name, field)| {
855                        let suffix = Self::strip_prefix(name, child_name)?;
856                        if !suffixes.contains(suffix) {
857                            return None;
858                        }
859                        if let Some(param_object) = target_object {
860                            if param_object.contains_key(suffix) {
861                                return None; // Never overwrite existing fields
862                            }
863                        }
864                        Some((suffix.to_owned(), field.clone()))
865                    })
866                    .collect();
867                if matching_fields.is_empty() {
868                    continue;
869                }
870
871                tracing::trace!(
872                    prefix = prefix.0,
873                    config = ?config_data.metadata.ty,
874                    child_name,
875                    fields = ?matching_fields.iter().map(|(name, _)| name).collect::<Vec<_>>(),
876                    "nesting for object param / config"
877                );
878                insertions.push((child_name, matching_fields));
879            }
880
881            for (child_name, matching_fields) in insertions {
882                if !config_object.contains_key(child_name) {
883                    let origin = Arc::new(ValueOrigin::Synthetic {
884                        source: config_origin.clone(),
885                        transform: format!("nesting for object param '{child_name}'"),
886                    });
887                    let val = Self::new(Value::Object(Map::new()), origin);
888                    config_object.insert(child_name.to_owned(), val);
889                }
890
891                let Value::Object(target_object) =
892                    &mut config_object.get_mut(child_name).unwrap().inner
893                else {
894                    unreachable!(); // Due to the checks above
895                };
896                target_object.extend(matching_fields);
897            }
898        }
899    }
900
901    /// Nests values inside matching array params.
902    ///
903    /// For example, we have an array param at `test.param` and a source with values at `test.param_0`, `test.param_1`, `test.param_2`
904    /// (and no `test.param`). This transform will copy these values as a 3-element array at `test.param`.
905    #[tracing::instrument(level = "debug", skip_all)]
906    fn nest_array_params(&mut self, schema: &ConfigSchema) {
907        for (prefix, config_data) in schema.iter_ll() {
908            let Some(config_object) = self.get_mut(prefix) else {
909                continue;
910            };
911            let config_origin = &config_object.origin;
912            let Value::Object(config_object) = &mut config_object.inner else {
913                continue;
914            };
915
916            for param in config_data.metadata.params {
917                if !param.expecting.contains(BasicTypes::ARRAY)
918                    || param.expecting.contains(BasicTypes::OBJECT)
919                {
920                    // If a param expects an object, a transform is ambiguous; `_${i}` suffix could be either an array index
921                    // or an object key.
922                    continue;
923                }
924                if config_object.contains_key(param.name) {
925                    // Unlike objects, we never extend existing arrays.
926                    continue;
927                }
928
929                let matching_fields: BTreeMap<_, _> = config_object
930                    .iter()
931                    .filter_map(|(name, field)| {
932                        let stripped_name = Self::strip_prefix(name, param.name)?;
933                        let idx: usize = stripped_name.parse().ok()?;
934                        Some((idx, field.clone()))
935                    })
936                    .collect();
937                let Some(&last_idx) = matching_fields.keys().next_back() else {
938                    continue; // No matching fields
939                };
940
941                if last_idx != matching_fields.len() - 1 {
942                    tracing::info!(
943                        prefix = prefix.0,
944                        config = ?config_data.metadata.ty,
945                        param = param.rust_field_name,
946                        indexes = ?matching_fields.keys().copied().collect::<Vec<_>>(),
947                        "indexes for array nesting are not sequential"
948                    );
949                    continue;
950                }
951
952                tracing::trace!(
953                    prefix = prefix.0,
954                    config = ?config_data.metadata.ty,
955                    param = param.rust_field_name,
956                    len = matching_fields.len(),
957                    "nesting for array param"
958                );
959
960                let origin = Arc::new(ValueOrigin::Synthetic {
961                    source: config_origin.clone(),
962                    transform: format!("nesting for array param '{}'", param.name),
963                });
964                let array_items = matching_fields.into_values().collect();
965                let val = Self::new(Value::Array(array_items), origin);
966                config_object.insert(param.name.to_owned(), val);
967            }
968        }
969    }
970
971    /// Nests a flat key–value map into a structured object using the provided `schema`.
972    ///
973    /// Has complexity `O(kvs.len() * log(n_params))`, which seems about the best possible option if `kvs` is not presorted.
974    #[tracing::instrument(level = "debug", skip_all)]
975    fn nest_kvs(kvs: Map, schema: &ConfigSchema, source_origin: &Arc<ValueOrigin>) -> Self {
976        let mut dest = Self {
977            inner: Value::Object(Map::new()),
978            origin: source_origin.clone(),
979        };
980
981        for (key, value) in kvs {
982            // Get all params with full paths matching a prefix of `key` split on one of `_`s. E.g.,
983            // for `key = "very_long_prefix_value"`, we'll try "very_long_prefix_value", "very_long_prefix", ..., "very".
984            // If any of these prefixes corresponds to a param, we'll nest the value to align with the param.
985            // For example, if `very.long_prefix.value` is a param, we'll nest the value to `very.long_prefix.value`,
986            // and if `very_long.prefix.value` is a param as well, we'll copy the value to both places.
987            //
988            // For prefixes, we only copy the value if the param supports objects; e.g. if `very_long.prefix` is a param,
989            // then we'll copy the value to `very_long.prefix_value`.
990            let mut key_prefix = key.as_str();
991            while !key_prefix.is_empty() {
992                for (param_path, expecting) in schema.params_with_kv_path(key_prefix) {
993                    let should_copy = key_prefix == key || expecting.contains(BasicTypes::OBJECT);
994                    if should_copy {
995                        tracing::trace!(
996                            param_path = param_path.0,
997                            ?expecting,
998                            key,
999                            key_prefix,
1000                            "copied key–value entry"
1001                        );
1002                        dest.copy_kv_entry(source_origin, param_path, &key, value.clone());
1003                    }
1004                }
1005
1006                key_prefix = match key_prefix.rsplit_once('_') {
1007                    Some((prefix, _)) => prefix,
1008                    None => break,
1009                };
1010            }
1011
1012            // Allow for array params.
1013            let Some((key_prefix, maybe_idx)) = key.rsplit_once('_') else {
1014                continue;
1015            };
1016            if !maybe_idx.bytes().all(|ch| ch.is_ascii_digit()) {
1017                continue;
1018            }
1019            for (param_path, expecting) in schema.params_with_kv_path(key_prefix) {
1020                if expecting.contains(BasicTypes::ARRAY) && !expecting.contains(BasicTypes::OBJECT)
1021                {
1022                    dest.copy_kv_entry(source_origin, param_path, &key, value.clone());
1023                }
1024            }
1025        }
1026        dest
1027    }
1028
1029    fn copy_kv_entry(
1030        &mut self,
1031        source_origin: &Arc<ValueOrigin>,
1032        param_path: Pointer<'_>,
1033        key: &str,
1034        value: WithOrigin,
1035    ) {
1036        // `unwrap()` is safe: params have non-empty paths
1037        let (parent, _) = param_path.split_last().unwrap();
1038        let field_name_start = if parent.0.is_empty() {
1039            parent.0.len()
1040        } else {
1041            parent.0.len() + 1 // skip `_` after the parent
1042        };
1043        let field_name = key[field_name_start..].to_owned();
1044
1045        let origin = Arc::new(ValueOrigin::Synthetic {
1046            source: source_origin.clone(),
1047            transform: format!("nesting kv entries for '{param_path}'"),
1048        });
1049        self.ensure_object(parent, |_| origin.clone())
1050            .insert(field_name, value);
1051    }
1052
1053    /// Deep merge stopped at params (i.e., params are always merged atomically).
1054    fn guided_merge(&mut self, overrides: Self, schema: &ConfigSchema, current_path: Pointer<'_>) {
1055        match (&mut self.inner, overrides.inner) {
1056            (Value::Object(this), Value::Object(other))
1057                if !schema.contains_canonical_param(current_path) =>
1058            {
1059                for (key, value) in other {
1060                    if let Some(existing_value) = this.get_mut(&key) {
1061                        let child_path = current_path.join(&key);
1062                        existing_value.guided_merge(value, schema, Pointer(&child_path));
1063                    } else {
1064                        this.insert(key, value);
1065                    }
1066                }
1067            }
1068            (this, value) => {
1069                *this = value;
1070                self.origin = overrides.origin;
1071            }
1072        }
1073    }
1074}