clu_middleware_tron/
parser.rs

1/*
2 *  @Author: José Sánchez-Gallego (gallegoj@uw.edu)
3 *  @Date: 2025-11-22
4 *  @Filename: parser.rs
5 *  @License: BSD 3-clause (http://www.opensource.org/licenses/BSD-3-Clause)
6 */
7
8use regex::bytes::Regex;
9use serde_json::Value;
10use std::{collections::BTreeMap, str::FromStr, string::FromUtf8Error};
11
12/// Represents a parsed reply from a Tron-style bytes string.
13#[derive(Debug)]
14pub struct Reply {
15    /// The user ID. This is the internal actor ID
16    ///   for the TCP client that sent the message. 0 for broadcast messages.
17    pub user_id: u16,
18    /// The command ID associated with the reply.
19    pub command_id: u32,
20    /// The reply code as a char.
21    pub code: char,
22    /// A map of keyword-value pairs parsed from the reply.
23    pub keywords: BTreeMap<String, serde_json::Value>,
24}
25
26/// Adds a key and its associated values to the `keywords` field of a [Reply] struct.
27///
28/// # Arguments
29/// * `reply` - The Reply struct to which the keyword will be added.
30/// * `key` - The key as a mutable byte vector.
31/// * `raw_values` - A mutable vector of byte arrays representing the raw values of the keyword.
32/// * `has_equal` - A boolean indicating whether the keyword had an equal sign (i.e., had a value).
33///   If false, the keyword is treated as having a Null value.
34///
35pub fn add_to_reply_keywords(
36    reply: &mut Reply,
37    key: &mut Vec<u8>,
38    raw_values: &mut Vec<Vec<u8>>,
39    has_equal: bool,
40) -> Result<(), FromUtf8Error> {
41    // If the key is empty, do nothing.
42    if key.is_empty() {
43        return Ok(());
44    }
45
46    // Convert key to string.
47    let key_string = String::from_utf8(key.clone())?;
48
49    // If there are no values or if the keyword was of the form "key;" (no equal sign), set to Null.
50    // Otherwise, parse each raw value into a serde_json::Value.
51    if raw_values.is_empty() || !has_equal {
52        reply.keywords.insert(key_string, Value::Null);
53    } else {
54        let mut values: Vec<Value> = Vec::new();
55        for raw_value in raw_values.iter() {
56            let raw_str = String::from_utf8(raw_value.clone())?;
57            let value = if raw_str.eq_ignore_ascii_case("None") {
58                Value::Null
59            } else if raw_str.eq_ignore_ascii_case("T") {
60                Value::Bool(true)
61            } else if raw_str.eq_ignore_ascii_case("F") {
62                Value::Bool(false)
63            } else if let Ok(int_val) = i64::from_str(&raw_str) {
64                Value::Number(int_val.into())
65            } else if let Ok(float_val) = f64::from_str(&raw_str) {
66                Value::Number(serde_json::Number::from_f64(float_val).unwrap())
67            } else {
68                Value::String(raw_str)
69            };
70            values.push(value);
71        }
72
73        // Insert the keyword-value pair into the reply. If there is only one value,
74        // insert it directly; otherwise, insert the array of values.
75        reply.keywords.insert(
76            key_string,
77            if values.len() == 1 {
78                values.remove(0)
79            } else {
80                Value::Array(values)
81            },
82        );
83    }
84
85    // Clear temporary storage.
86    key.clear();
87    raw_values.clear();
88
89    Ok(())
90}
91
92/// Processes the keywords section of a reply line and fills the `keyword` field in the [Reply] struct.
93///
94/// # Arguments
95/// * `keywords` - A byte slice containing the keywords section of the reply.
96/// * `reply` - A mutable reference to the Reply struct to be populated.
97///
98pub fn process_keywords(keywords: &[u8], reply: &mut Reply) -> Result<(), FromUtf8Error> {
99    // Track if we are inside quotes or parsing a value (as opposed to a key).
100    let mut in_double_quotes = false;
101    let mut in_single_quotes = false;
102    let mut in_value = false;
103    let mut has_equal = false;
104
105    // Temporary values.
106    let mut key: Vec<u8> = Vec::new();
107    let mut raw_values: Vec<Vec<u8>> = Vec::new(); // In case the value is a list.
108    let mut raw_value: Vec<u8> = Vec::new();
109
110    // Iterate over each character in the keywords byte slice and populate the key-value pairs.
111    // In particular we need to pay attention to double quotes, semicolons (which, if outside
112    // a quote indicate the keyword is over), commas (which separate elements in a list),
113    // equal signs (separate keys and values, but we could have a key without a value in which case
114    // there won't be an equal), and spaces (which are ignored except if inside a quote).
115    for char in keywords {
116        match char {
117            b'=' => {
118                if !in_double_quotes && !in_single_quotes && !in_value {
119                    in_value = true;
120                    has_equal = true;
121                } else if in_value {
122                    raw_value.push(*char);
123                }
124            }
125            b'"' => {
126                if !in_single_quotes {
127                    in_double_quotes = !in_double_quotes;
128                } else if in_value {
129                    raw_value.push(*char);
130                }
131            }
132            b'\'' => {
133                if !in_double_quotes {
134                    in_single_quotes = !in_single_quotes;
135                } else if in_value {
136                    raw_value.push(*char);
137                }
138            }
139            b';' => {
140                if !in_double_quotes && !in_single_quotes && !key.is_empty() {
141                    // End of keyword. Add the current value to the list of raw values and
142                    // process the keyword. Reset temporary variables.
143                    raw_values.push(raw_value.clone());
144                    add_to_reply_keywords(reply, &mut key, &mut raw_values, has_equal)?;
145                    in_value = false;
146                    has_equal = false;
147                    raw_value.clear();
148                } else {
149                    raw_value.push(*char);
150                }
151            }
152            b' ' => {
153                if in_double_quotes || in_single_quotes {
154                    raw_value.push(*char);
155                }
156            }
157            b',' => {
158                if in_double_quotes || in_single_quotes {
159                    raw_value.push(*char);
160                } else if in_value {
161                    raw_values.push(raw_value.clone());
162                    raw_value.clear();
163                }
164            }
165            _ => {
166                if in_value {
167                    raw_value.push(*char);
168                } else {
169                    key.push(*char);
170                }
171            }
172        }
173    }
174
175    // Handle the final keyword if any.
176    if !key.is_empty() {
177        raw_values.push(raw_value.clone());
178        add_to_reply_keywords(reply, &mut key, &mut raw_values, has_equal)?;
179    }
180
181    Ok(())
182}
183
184/// Parses a raw reply byte slice into a [Reply] struct.
185///
186/// # Arguments
187/// * `raw_reply` - A byte slice containing the raw reply line.
188///
189pub fn parse_reply(raw_reply: &[u8]) -> Option<Reply> {
190    // Regular expression to parse the main components of the reply line.
191    let line_regex = Regex::new(
192        r"^(?<user_id>\d+)\s+(?<commandId>\d+)\s+(?<code>[diwfe:DIWFE>])\s*(?<keywords>.+)?$",
193    )
194    .unwrap();
195
196    let caps = line_regex.captures(raw_reply)?;
197
198    // Extract user_id and command ID, and initialize the Reply struct with an empty keywords map.
199    let user_id = String::from_utf8(caps.name("user_id")?.as_bytes().to_vec()).ok()?;
200    let command_id = String::from_utf8(caps.name("commandId")?.as_bytes().to_vec()).ok()?;
201
202    let mut reply = Reply {
203        user_id: user_id.parse::<u16>().unwrap(),
204        command_id: command_id.parse::<u32>().unwrap(),
205        code: caps.name("code")?.as_bytes()[0] as char,
206        keywords: BTreeMap::new(),
207    };
208
209    // If there are no keywords, return the reply as is.
210    let keywords_raw = match caps.name("keywords") {
211        None => return Some(reply),
212        Some(kws) => kws.as_bytes(),
213    };
214
215    // Process keywords.
216    if let Err(err) = process_keywords(keywords_raw, &mut reply) {
217        log::error!("Failed to parse keywords: {}", err);
218        return None;
219    }
220
221    Some(reply)
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn test_parse_reply() {
230        let test_reply =
231            parse_reply(b"1 20 : key1=value1; key2=42; key3=3.14; key4=T; key5=None; key6=1,F,three,\"a string; with; semicolons\"; key7=\"A string with spaces\";key8 ; key9=\"A string; with; semicolons\"; key10=\"A sentence with 'a quotation'\"").unwrap();
232
233        assert_eq!(test_reply.user_id, 1);
234        assert_eq!(test_reply.command_id, 20);
235        assert_eq!(test_reply.code, ':');
236
237        assert!(!test_reply.keywords.is_empty());
238
239        assert!(test_reply.keywords.contains_key("key1"));
240        assert_eq!(
241            test_reply.keywords.get("key1").unwrap(),
242            &Value::String("value1".to_string())
243        );
244
245        assert!(test_reply.keywords.contains_key("key2"));
246        assert_eq!(
247            test_reply.keywords.get("key2").unwrap(),
248            &Value::Number(42.into())
249        );
250
251        assert!(test_reply.keywords.contains_key("key3"));
252        assert_eq!(
253            test_reply.keywords.get("key3").unwrap(),
254            &Value::Number(serde_json::Number::from_f64(3.14).unwrap())
255        );
256
257        assert!(test_reply.keywords.contains_key("key4"));
258        assert_eq!(test_reply.keywords.get("key4").unwrap(), &Value::Bool(true));
259
260        assert!(test_reply.keywords.contains_key("key5"));
261        assert_eq!(test_reply.keywords.get("key5").unwrap(), &Value::Null);
262
263        assert!(test_reply.keywords.contains_key("key6"));
264        assert_eq!(
265            test_reply.keywords.get("key6").unwrap(),
266            &Value::Array(vec![
267                Value::Number(1.into()),
268                Value::Bool(false),
269                Value::String("three".to_string()),
270                Value::String("a string; with; semicolons".to_string())
271            ])
272        );
273
274        assert!(test_reply.keywords.contains_key("key7"));
275        assert_eq!(
276            test_reply.keywords.get("key7").unwrap(),
277            &Value::String("A string with spaces".to_string())
278        );
279
280        assert!(test_reply.keywords.contains_key("key8"));
281        assert_eq!(test_reply.keywords.get("key8").unwrap(), &Value::Null);
282
283        assert!(test_reply.keywords.contains_key("key9"));
284        assert_eq!(
285            test_reply.keywords.get("key9").unwrap(),
286            &Value::String("A string; with; semicolons".to_string())
287        );
288
289        assert!(test_reply.keywords.contains_key("key10"));
290        assert_eq!(
291            test_reply.keywords.get("key10").unwrap(),
292            &Value::String("A sentence with 'a quotation'".to_string())
293        );
294    }
295}