-
Hi all, I'm having some trouble matching a UTF-8 string that ends in partial bytes (due to the incoming string being split on a number of bytes). use regex::bytes::Regex;
fn main() {
let pattern = r"^(?P<timestamp>.*) (?P<stream>(stdout|stderr)) (?P<multiline_tag>(P|F)) (?P<message>(.*))$";
let pattern = Regex::new(&pattern).expect("Failed to build pattern");
let message = "2021-08-05T17:35:26.640507539Z stdout P metaDescription\":\"Пицца от 245 рублей. Быстрая бесплатная доставка д";
let bytes = message.as_bytes();
println!("{:?}", bytes);
// this is the same bytes with additional two - d0 0a, (208, 10)
let vector: Vec<u8> = vec![
50, 48, 50, 49, 45, 48, 56, 45, 48, 53, 84, 49, 55, 58, 51, 53, 58, 50, 54, 46, 54, 52, 48,
53, 48, 55, 53, 51, 57, 90, 32, 115, 116, 100, 111, 117, 116, 32, 80, 32, 109, 101, 116,
97, 68, 101, 115, 99, 114, 105, 112, 116, 105, 111, 110, 34, 58, 34, 208, 159, 208, 184,
209, 134, 209, 134, 208, 176, 32, 208, 190, 209, 130, 32, 50, 52, 53, 32, 209, 128, 209,
131, 208, 177, 208, 187, 208, 181, 208, 185, 46, 32, 208, 145, 209, 139, 209, 129, 209,
130, 209, 128, 208, 176, 209, 143, 32, 208, 177, 208, 181, 209, 129, 208, 191, 208, 187,
208, 176, 209, 130, 208, 189, 208, 176, 209, 143, 32, 208, 180, 208, 190, 209, 129, 209,
130, 208, 176, 208, 178, 208, 186, 208, 176, 32, 208, 180, 208, 10,
];
let bytes = vector.as_slice();
println!("{:?}", bytes);
let captures = pattern.captures(bytes).expect("no match");
println!("{:?}", captures)
} In this particular case, I'm expecting the string to match given I am using I also tried to simplify the pattern to just I feel like I'm missing something simple. Any help is appreciated! |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 4 replies
-
You probably missed the important part from the beginning of cited example: The description below explains further:
That is, by default the |
Beta Was this translation helpful? Give feedback.
You probably missed the important part from the beginning of cited example:
(?-u)
The description below explains further:
That is, by default the
bytes::Regex
only allows UTF-8, just like regularRegex
, the difference is that it allows you to opt-out and search for bytes too.