Thanks for the above answers. Special thanks to those who helped in the comment section, it helped a lot.
I forgot to post the answer I did that time. Here's the rust implementation below. I also did a nextjs implementation too. My idea was to develop it for other similar languages. But got busy with other projects. :(
use unicode_segmentation::UnicodeSegmentation;
// Define a struct that holds a grapheme iterator
struct DevanagariSplitter<'a> {
graphemes: std::iter::Peekable<unicode_segmentation::Graphemes<'a>>,
}
// Implement Iterator trait for DevanagariSplitter
impl<'a> Iterator for DevanagariSplitter<'a> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
// Get the next grapheme from the iterator
let mut akshara = match self.graphemes.next() {
Some(g) => g.to_string(),
None => return None,
};
// Check if the grapheme ends with a virama
if akshara.ends_with('\u{094D}') {
// Peek at the next grapheme and see if it starts with a letter
if let Some(next) = self.graphemes.peek() {
if next.starts_with(|c: char| c.is_alphabetic()) {
// Append the next grapheme to the current one
akshara.push_str(self.graphemes.next().unwrap());
}
}
}
// Return the akshara as an option
Some(akshara)
}
}
// Define a function that takes a string and returns an DevanagariSplitter
fn aksharas(s: &str) -> DevanagariSplitter {
// Use UnicodeSegmentation to split the string into graphemes
let graphemes = s.graphemes(true).peekable();
// Create and return an DevanagariSplitter from the graphemes
DevanagariSplitter { graphemes }
}
fn main() {
// Define an input string in devanagari script
let input = "हिन्दी मुख्यमंत्री हिमंत";
// Print each akshara separated by spaces using aksharas function
for akshara in aksharas(input) {
print!("{} ", akshara);
}
}
// The output of this code is:
// "हि न्दी मु ख्य मं त्री हि मं त"