1use std::collections::HashMap;
34use std::fs;
35use std::path::Path;
36
37static BUNDLED_EMOJI_DATA: &str =
40 include_str!("../data/emoji-data.txt");
41
42pub fn bundled_emoji_sequences() -> HashMap<String, String> {
57 parse_emoji_sequences(BUNDLED_EMOJI_DATA)
58}
59
60pub fn parse_emoji_sequences(
86 contents: &str,
87) -> HashMap<String, String> {
88 let mut map = HashMap::new();
89
90 for raw_line in contents.lines() {
91 let line = raw_line.trim();
92
93 if line.is_empty() || line.starts_with('#') {
95 continue;
96 }
97
98 let (data_part, comment_part) = match line.split_once('#') {
100 Some((before, after)) => (before.trim(), after.trim()),
101 None => (line, ""),
102 };
103
104 let raw_label_after_paren =
106 if let Some(close_paren_idx) = comment_part.find(')') {
107 &comment_part[close_paren_idx + 1..]
108 } else {
109 comment_part
110 };
111
112 let short_label = raw_label_after_paren
114 .trim()
115 .to_lowercase()
116 .split_whitespace()
117 .collect::<Vec<_>>()
118 .join("-");
119
120 let data_fields: Vec<&str> =
122 data_part.split(';').map(|s| s.trim()).collect();
123 if data_fields.is_empty() {
124 continue;
125 }
126
127 let hex_seq = data_fields[0];
129
130 let emoji_string: String = hex_seq
132 .split_whitespace()
133 .filter_map(|hex| u32::from_str_radix(hex, 16).ok())
134 .flat_map(char::from_u32)
135 .collect();
136
137 if emoji_string.is_empty() {
138 continue; }
140
141 let _ = map.insert(emoji_string, short_label);
143 }
144
145 map
146}
147
148pub fn load_emoji_sequences<P: AsRef<Path>>(
177 filepath: P,
178) -> Result<HashMap<String, String>, std::io::Error> {
179 let contents = fs::read_to_string(filepath)?;
180 Ok(parse_emoji_sequences(&contents))
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use std::io::Write;
187 use tempfile::NamedTempFile;
188
189 fn create_temp_file(content: &str) -> NamedTempFile {
191 let mut file = NamedTempFile::new()
192 .expect("Failed to create temporary file");
193 file.write_all(content.as_bytes())
194 .expect("Failed to write to temporary file");
195 file
196 }
197
198 #[test]
199 fn test_load_emoji_sequences_basic() {
200 let test_data = r#"
201 26A1 ; emoji ; L1 ; none ; a j # V4.0 (⚡) HIGH VOLTAGE SIGN
202 1F600 ; emoji ; L1 ; none ; j # V6.0 (😀) GRINNING FACE
203 "#;
204
205 let file = create_temp_file(test_data);
206
207 let result = load_emoji_sequences(file.path()).unwrap();
208
209 let mut expected = HashMap::new();
210 let _ = expected
211 .insert("⚡".to_string(), "high-voltage-sign".to_string());
212 let _ = expected
213 .insert("😀".to_string(), "grinning-face".to_string());
214
215 assert_eq!(result, expected);
216 }
217
218 #[test]
219 fn test_load_emoji_sequences_empty_file() {
220 let test_data = "";
221
222 let file = create_temp_file(test_data);
223
224 let result = load_emoji_sequences(file.path());
225
226 assert!(result.unwrap().is_empty());
227 }
228
229 #[test]
230 fn test_load_emoji_sequences_with_comments_and_blanks() {
231 let test_data = r#"
232 # This is a comment
233
234 1F44D ; emoji ; L1 ; none ; j # V6.0 (👍) THUMBS UP SIGN
235
236 # Another comment here
237
238"#;
239
240 let file = create_temp_file(test_data);
241
242 let result = load_emoji_sequences(file.path());
243
244 let mut expected = HashMap::new();
245 let _ = expected
246 .insert("👍".to_string(), "thumbs-up-sign".to_string());
247
248 assert_eq!(result.unwrap(), expected);
249 }
250
251 #[test]
252 fn test_load_emoji_sequences_no_comment_label() {
253 let test_data = r#"
254 1F4AF ; emoji ; L1 ; none ; j # V6.0 (💯) HUNDRED POINTS SYMBOL
255 1F602 ; emoji ; L1 ; none ; j
256"#;
257
258 let file = create_temp_file(test_data);
259
260 let result = load_emoji_sequences(file.path());
261
262 let mut expected = HashMap::new();
263 let _ = expected.insert(
264 "💯".to_string(),
265 "hundred-points-symbol".to_string(),
266 );
267 let _ = expected.insert("😂".to_string(), "".to_string()); assert_eq!(result.unwrap(), expected);
270 }
271
272 #[test]
273 fn test_load_emoji_sequences_invalid_hex_code() {
274 let test_data = r#"
275 26A1 ; emoji ; L1 ; none ; a j # V4.0 (⚡) HIGH VOLTAGE SIGN
276 INVALID_HEX ; emoji ; L1 ; none ; j # Invalid hex code
277"#;
278
279 let file = create_temp_file(test_data);
280
281 let result = load_emoji_sequences(file.path());
282
283 let mut expected = HashMap::new();
284 let _ = expected
285 .insert("⚡".to_string(), "high-voltage-sign".to_string());
286
287 assert_eq!(result.unwrap(), expected);
288 }
289
290 #[test]
291 fn test_load_emoji_sequences_multi_codepoint() {
292 let test_data = r#"
293 1F1E6 1F1FA ; emoji ; L1 ; none ; j # V6.0 (🇦🇺) FLAG FOR AUSTRALIA
294"#;
295
296 let file = create_temp_file(test_data);
297
298 let result = load_emoji_sequences(file.path());
299
300 let mut expected = HashMap::new();
301 let _ = expected
302 .insert("🇦🇺".to_string(), "flag-for-australia".to_string());
303
304 assert_eq!(result.unwrap(), expected);
305 }
306
307 #[test]
308 fn test_load_emoji_sequences_missing_label() {
309 let test_data = r#"
310 1F44D ; emoji ; L1 ; none ; j # V6.0 (👍) THUMBS UP SIGN
311 1F602 ; emoji ; L1 ; none ; j
312 1F600 ; emoji ; L1 ; none ; j #
313"#;
314
315 let file = create_temp_file(test_data);
316
317 let result = load_emoji_sequences(file.path());
318
319 let mut expected = HashMap::new();
320 let _ = expected
321 .insert("👍".to_string(), "thumbs-up-sign".to_string());
322 let _ = expected.insert("😂".to_string(), "".to_string()); let _ = expected.insert("😀".to_string(), "".to_string()); assert_eq!(result.unwrap(), expected);
326 }
327
328 #[test]
329 fn test_load_emoji_sequences_handles_empty_and_whitespace() {
330 let test_data = r#"
331
332 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
333
334 "#;
335
336 let file = create_temp_file(test_data);
337
338 let result = load_emoji_sequences(file.path());
339
340 let mut expected = HashMap::new();
341 let _ = expected.insert(
342 "😂".to_string(),
343 "face-with-tears-of-joy".to_string(),
344 );
345
346 assert_eq!(result.unwrap(), expected);
347 }
348
349 #[test]
350 fn test_load_emoji_sequences_handles_trailing_whitespace() {
351 let test_data = r#"
352 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
353 "#;
354
355 let file = create_temp_file(test_data);
356
357 let result = load_emoji_sequences(file.path());
358
359 let mut expected = HashMap::new();
360 let _ = expected.insert(
361 "😂".to_string(),
362 "face-with-tears-of-joy".to_string(),
363 );
364
365 assert_eq!(result.unwrap(), expected);
366 }
367
368 #[test]
369 fn test_load_emoji_sequences_skip_invalid_lines() {
370 let test_data = r#"
371 # Comment line
372 ; invalid line ; no hex code ; # Just semicolons
373 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
374 "#;
375
376 let file = create_temp_file(test_data);
377 let result = load_emoji_sequences(file.path()).unwrap();
378
379 let mut expected = HashMap::new();
381 let _ = expected.insert(
382 "😂".to_string(),
383 "face-with-tears-of-joy".to_string(),
384 );
385 assert_eq!(result, expected);
386 }
387
388 #[test]
389 fn test_load_emoji_sequences_split_behavior() {
390 let test_data = r#"
391 26A1;emoji;L1;none;a j# V4.0 (⚡) HIGH VOLTAGE SIGN
392 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
393 26A1 ; emoji ; L1 ; none ; a j # V4.0 (⚡) HIGH VOLTAGE SIGN
394 "#;
395
396 let file = create_temp_file(test_data);
397 let result = load_emoji_sequences(file.path()).unwrap();
398
399 let mut expected = HashMap::new();
400 let _ = expected
401 .insert("⚡".to_string(), "high-voltage-sign".to_string());
402 let _ = expected.insert(
403 "😂".to_string(),
404 "face-with-tears-of-joy".to_string(),
405 );
406 assert_eq!(result, expected);
407 }
408
409 #[test]
410 fn test_load_emoji_sequences_parenthesis_variations() {
411 let test_data = r#"
412 26A1 ; emoji ; L1 ; none ; a j # (⚡) HIGH VOLTAGE
413 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS
414 1F603 ; emoji ; L1 ; none ; j # V6.0 (😃) SMILEY FACE
415 1F604 ; emoji ; L1 ; none ; j # V6.0 (😄) GRINNING FACE
416 "#;
417
418 let file = create_temp_file(test_data);
419 let result = load_emoji_sequences(file.path()).unwrap();
420
421 let mut expected = HashMap::new();
422 let _ = expected
423 .insert("⚡".to_string(), "high-voltage".to_string());
424 let _ = expected
425 .insert("😂".to_string(), "face-with-tears".to_string());
426 let _ = expected
427 .insert("😃".to_string(), "smiley-face".to_string());
428 let _ = expected
429 .insert("😄".to_string(), "grinning-face".to_string());
430 assert_eq!(result, expected);
431 }
432
433 #[test]
434 fn test_load_emoji_sequences_unparseable_sequences() {
435 let test_data = r#"
436 110000 ; emoji ; L1 ; none ; j # Above Unicode range INVALID
437 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
438 D800 ; emoji ; L1 ; none ; j # Surrogate code point
439 "#;
440
441 let file = create_temp_file(test_data);
442 let result = load_emoji_sequences(file.path()).unwrap();
443
444 let mut expected = HashMap::new();
446 let _ = expected.insert(
447 "😂".to_string(),
448 "face-with-tears-of-joy".to_string(),
449 );
450 assert_eq!(result, expected);
451 }
452
453 #[test]
454 fn test_load_emoji_sequences_empty_fields() {
455 let test_data = r#"
456 ; ; ; ; ; # Empty fields should be skipped
457 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
458 #
459 "#;
460
461 let file = create_temp_file(test_data);
462 let result = load_emoji_sequences(file.path()).unwrap();
463
464 let mut expected = HashMap::new();
465 let _ = expected.insert(
466 "😂".to_string(),
467 "face-with-tears-of-joy".to_string(),
468 );
469 assert_eq!(result, expected);
470 }
471
472 #[test]
473 fn test_load_emoji_sequences_whitespace_variations() {
474 let test_data = r#"
475 1F602;emoji;L1;none;j# V6.0 (😂) FACE WITH TEARS OF JOY
476 1F603 ; emoji ; L1 ; none ; j # V6.0 (😃) SMILEY FACE
477 "#;
478
479 let file = create_temp_file(test_data);
480 let result = load_emoji_sequences(file.path()).unwrap();
481
482 let mut expected = HashMap::new();
483 let _ = expected.insert(
484 "😂".to_string(),
485 "face-with-tears-of-joy".to_string(),
486 );
487 let _ = expected
488 .insert("😃".to_string(), "smiley-face".to_string());
489 assert_eq!(result, expected);
490 }
491}