From 334a9174b9e50baf5e0d5824bb14b0e5f1bb8fb3 Mon Sep 17 00:00:00 2001 From: Jessica Rodriguez Date: Sat, 30 Nov 2024 11:37:51 -0500 Subject: [PATCH 1/2] fix versionsort chunk split on non-ASCII numerics --- src/sort.rs | 11 ++++++- .../source/versionsort_non_ascii_numerics.rs | 33 +++++++++++++++++++ .../target/versionsort_non_ascii_numerics.rs | 33 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tests/source/versionsort_non_ascii_numerics.rs create mode 100644 tests/target/versionsort_non_ascii_numerics.rs diff --git a/src/sort.rs b/src/sort.rs index 670f664a119..0cdd937b4f5 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -65,7 +65,7 @@ impl<'a> VersionChunkIter<'a> { break; } - if !c.is_numeric() { + if !c.is_ascii_digit() { continue; } @@ -283,6 +283,10 @@ mod test { source: "009" }) ); + + // '๙' = U+0E59 THAI DIGIT NINE, General Category Nd + let mut iter = VersionChunkIter::new("x๙v"); + assert_eq!(iter.next(), Some(VersionChunk::Str("x๙v"))); } #[test] @@ -297,6 +301,11 @@ mod test { input.sort_by(|a, b| version_sort(a, b)); assert_eq!(input, expected); + let mut input = vec!["x๙x", "xéx", "x0x"]; + let expected = vec!["x0x", "xéx", "x๙x"]; + input.sort_by(|a, b| version_sort(a, b)); + assert_eq!(input, expected); + let mut input = vec!["applesauce", "apple"]; let expected = vec!["apple", "applesauce"]; input.sort_by(|a, b| version_sort(a, b)); diff --git a/tests/source/versionsort_non_ascii_numerics.rs b/tests/source/versionsort_non_ascii_numerics.rs new file mode 100644 index 00000000000..a12a0361789 --- /dev/null +++ b/tests/source/versionsort_non_ascii_numerics.rs @@ -0,0 +1,33 @@ +use std::cmp::Ordering; +use print๙msg::print as first_print; +use print0msg::print as second_print; +use printémsg::print as third_print; + +fn main() { + first_print(); + second_print(); + third_print(); + + assert_eq!("print๙msg".cmp("printémsg"), Ordering::Greater); +} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; +mod print๙msg { + pub fn print() { + println!("Non-ASCII Decimal_Number") + } +} + +/// '0' = 0030;DIGIT ZERO;Nd; +mod print0msg { + pub fn print() { + println!("ASCII Decimal_Number") + } +} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; +mod printémsg { + pub fn print() { + println!("Lowercase_Letter") + } +} \ No newline at end of file diff --git a/tests/target/versionsort_non_ascii_numerics.rs b/tests/target/versionsort_non_ascii_numerics.rs new file mode 100644 index 00000000000..f027ea93869 --- /dev/null +++ b/tests/target/versionsort_non_ascii_numerics.rs @@ -0,0 +1,33 @@ +use print0msg::print as second_print; +use printémsg::print as third_print; +use print๙msg::print as first_print; +use std::cmp::Ordering; + +fn main() { + first_print(); + second_print(); + third_print(); + + assert_eq!("print๙msg".cmp("printémsg"), Ordering::Greater); +} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; +mod print๙msg { + pub fn print() { + println!("Non-ASCII Decimal_Number") + } +} + +/// '0' = 0030;DIGIT ZERO;Nd; +mod print0msg { + pub fn print() { + println!("ASCII Decimal_Number") + } +} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; +mod printémsg { + pub fn print() { + println!("Lowercase_Letter") + } +} From e87447dba47ff661747399fbd503aab7abd2afef Mon Sep 17 00:00:00 2001 From: Jessica Rodriguez Date: Sat, 30 Nov 2024 17:34:13 -0500 Subject: [PATCH 2/2] update non-ascii digit import sorting tests --- ...on_ascii_numerics_import_asciibetically.rs | 17 ++++++++++ .../non_ascii_numerics_import_versionsort.rs | 23 +++++++++++++ .../source/versionsort_non_ascii_numerics.rs | 33 ------------------- ...on_ascii_numerics_import_asciibetically.rs | 17 ++++++++++ .../non_ascii_numerics_import_versionsort.rs | 23 +++++++++++++ .../target/versionsort_non_ascii_numerics.rs | 33 ------------------- 6 files changed, 80 insertions(+), 66 deletions(-) create mode 100644 tests/source/non_ascii_numerics_import_asciibetically.rs create mode 100644 tests/source/non_ascii_numerics_import_versionsort.rs delete mode 100644 tests/source/versionsort_non_ascii_numerics.rs create mode 100644 tests/target/non_ascii_numerics_import_asciibetically.rs create mode 100644 tests/target/non_ascii_numerics_import_versionsort.rs delete mode 100644 tests/target/versionsort_non_ascii_numerics.rs diff --git a/tests/source/non_ascii_numerics_import_asciibetically.rs b/tests/source/non_ascii_numerics_import_asciibetically.rs new file mode 100644 index 00000000000..bada80dc2fb --- /dev/null +++ b/tests/source/non_ascii_numerics_import_asciibetically.rs @@ -0,0 +1,17 @@ +// rustfmt-style_edition: 2015 + +// ascii-betically sorted +pub use print๙msg; +pub use print0msg; +pub use printémsg; + +fn main() {} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, sorts third) +mod print๙msg {} + +/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, sorts first) +mod print0msg {} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, sorts second) +mod printémsg {} \ No newline at end of file diff --git a/tests/source/non_ascii_numerics_import_versionsort.rs b/tests/source/non_ascii_numerics_import_versionsort.rs new file mode 100644 index 00000000000..e82c4a12aa8 --- /dev/null +++ b/tests/source/non_ascii_numerics_import_versionsort.rs @@ -0,0 +1,23 @@ +// rustfmt-style_edition: 2024 + +// versionsorted +pub use print๙msg; +pub use print0msg; +pub use printémsg; + +fn main() {} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, one string chunk) +/// +/// U+0E59 > U+00E9, sorts third +mod print๙msg {} + +/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, splits into 3 chunks ("print",0,"msg")) +/// +/// shortest chunk "print", sorts first +mod print0msg {} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, one string chunk) +/// +/// U+00E9 < U+0E59, sorts second +mod printémsg {} \ No newline at end of file diff --git a/tests/source/versionsort_non_ascii_numerics.rs b/tests/source/versionsort_non_ascii_numerics.rs deleted file mode 100644 index a12a0361789..00000000000 --- a/tests/source/versionsort_non_ascii_numerics.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::cmp::Ordering; -use print๙msg::print as first_print; -use print0msg::print as second_print; -use printémsg::print as third_print; - -fn main() { - first_print(); - second_print(); - third_print(); - - assert_eq!("print๙msg".cmp("printémsg"), Ordering::Greater); -} - -/// '๙' = 0E59;THAI DIGIT NINE;Nd; -mod print๙msg { - pub fn print() { - println!("Non-ASCII Decimal_Number") - } -} - -/// '0' = 0030;DIGIT ZERO;Nd; -mod print0msg { - pub fn print() { - println!("ASCII Decimal_Number") - } -} - -/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; -mod printémsg { - pub fn print() { - println!("Lowercase_Letter") - } -} \ No newline at end of file diff --git a/tests/target/non_ascii_numerics_import_asciibetically.rs b/tests/target/non_ascii_numerics_import_asciibetically.rs new file mode 100644 index 00000000000..1e5718a01ff --- /dev/null +++ b/tests/target/non_ascii_numerics_import_asciibetically.rs @@ -0,0 +1,17 @@ +// rustfmt-style_edition: 2015 + +// ascii-betically sorted +pub use print0msg; +pub use printémsg; +pub use print๙msg; + +fn main() {} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, sorts third) +mod print๙msg {} + +/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, sorts first) +mod print0msg {} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, sorts second) +mod printémsg {} diff --git a/tests/target/non_ascii_numerics_import_versionsort.rs b/tests/target/non_ascii_numerics_import_versionsort.rs new file mode 100644 index 00000000000..88b5dff8177 --- /dev/null +++ b/tests/target/non_ascii_numerics_import_versionsort.rs @@ -0,0 +1,23 @@ +// rustfmt-style_edition: 2024 + +// versionsorted +pub use print0msg; +pub use printémsg; +pub use print๙msg; + +fn main() {} + +/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, one string chunk) +/// +/// U+0E59 > U+00E9, sorts third +mod print๙msg {} + +/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, splits into 3 chunks ("print",0,"msg")) +/// +/// shortest chunk "print", sorts first +mod print0msg {} + +/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, one string chunk) +/// +/// U+00E9 < U+0E59, sorts second +mod printémsg {} diff --git a/tests/target/versionsort_non_ascii_numerics.rs b/tests/target/versionsort_non_ascii_numerics.rs deleted file mode 100644 index f027ea93869..00000000000 --- a/tests/target/versionsort_non_ascii_numerics.rs +++ /dev/null @@ -1,33 +0,0 @@ -use print0msg::print as second_print; -use printémsg::print as third_print; -use print๙msg::print as first_print; -use std::cmp::Ordering; - -fn main() { - first_print(); - second_print(); - third_print(); - - assert_eq!("print๙msg".cmp("printémsg"), Ordering::Greater); -} - -/// '๙' = 0E59;THAI DIGIT NINE;Nd; -mod print๙msg { - pub fn print() { - println!("Non-ASCII Decimal_Number") - } -} - -/// '0' = 0030;DIGIT ZERO;Nd; -mod print0msg { - pub fn print() { - println!("ASCII Decimal_Number") - } -} - -/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; -mod printémsg { - pub fn print() { - println!("Lowercase_Letter") - } -}