Skip to content

Double pinyin query #2427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 22 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions Flow.Launcher.Infrastructure/IAlphabet.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace Flow.Launcher.Infrastructure
{
/// <summary>
/// Translate a language to English letters using a given rule.
/// </summary>
public interface IAlphabet
{
/// <summary>
/// Translate a string to English letters, using a given rule.
/// </summary>
/// <param name="stringToTranslate">String to translate.</param>
/// <returns></returns>
public (string translation, TranslationMapping map) Translate(string stringToTranslate);

/// <summary>
/// Determine if a string can be translated to English letter with this Alphabet.
/// </summary>
/// <param name="stringToTranslate">String to translate.</param>
/// <returns></returns>
public bool ShouldTranslate(string stringToTranslate);
}
}
290 changes: 144 additions & 146 deletions Flow.Launcher.Infrastructure/PinyinAlphabet.cs
Original file line number Diff line number Diff line change
@@ -1,209 +1,207 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Collections.ObjectModel;
using System.Text;
using JetBrains.Annotations;
using CommunityToolkit.Mvvm.DependencyInjection;
using Flow.Launcher.Infrastructure.UserSettings;
using ToolGood.Words.Pinyin;
using CommunityToolkit.Mvvm.DependencyInjection;

namespace Flow.Launcher.Infrastructure
{
public class TranslationMapping
public class PinyinAlphabet : IAlphabet
{
private bool constructed;
private readonly ConcurrentDictionary<string, (string translation, TranslationMapping map)> _pinyinCache =
new();

private List<int> originalIndexs = new List<int>();
private List<int> translatedIndexs = new List<int>();
private int translatedLength = 0;
private readonly Settings _settings;

public string key { get; private set; }

public void setKey(string key)
public PinyinAlphabet()
{
this.key = key;
_settings = Ioc.Default.GetRequiredService<Settings>();
}

public void AddNewIndex(int originalIndex, int translatedIndex, int length)
public bool ShouldTranslate(string stringToTranslate)
{
if (constructed)
throw new InvalidOperationException("Mapping shouldn't be changed after constructed");

originalIndexs.Add(originalIndex);
translatedIndexs.Add(translatedIndex);
translatedIndexs.Add(translatedIndex + length);
translatedLength += length - 1;
return _settings.UseDoublePinyin ?
(!WordsHelper.HasChinese(stringToTranslate) && stringToTranslate.Length % 2 == 0) :
!WordsHelper.HasChinese(stringToTranslate);
}

public int MapToOriginalIndex(int translatedIndex)
public (string translation, TranslationMapping map) Translate(string content)
{
if (translatedIndex > translatedIndexs.Last())
return translatedIndex - translatedLength - 1;

int lowerBound = 0;
int upperBound = originalIndexs.Count - 1;

int count = 0;

// Corner case handle
if (translatedIndex < translatedIndexs[0])
return translatedIndex;
if (translatedIndex > translatedIndexs.Last())
if (_settings.ShouldUsePinyin)
{
int indexDef = 0;
for (int k = 0; k < originalIndexs.Count; k++)
if (!_pinyinCache.TryGetValue(content, out var value))
{
return BuildCacheFromContent(content);
}
else
{
indexDef += translatedIndexs[k * 2 + 1] - translatedIndexs[k * 2];
return value;
}
}
return (content, null);
}

return translatedIndex - indexDef - 1;
private (string translation, TranslationMapping map) BuildCacheFromContent(string content)
{
if (!WordsHelper.HasChinese(content))
{
return (content, null);
}

// Binary Search with Range
for (int i = originalIndexs.Count / 2;; count++)
var resultList = WordsHelper.GetPinyinList(content);

var resultBuilder = new StringBuilder();
var map = new TranslationMapping();

var pre = false;

for (var i = 0; i < resultList.Length; i++)
{
if (translatedIndex < translatedIndexs[i * 2])
{
// move to lower middle
upperBound = i;
i = (i + lowerBound) / 2;
}
else if (translatedIndex > translatedIndexs[i * 2 + 1] - 1)
if (content[i] >= 0x3400 && content[i] <= 0x9FD5)
{
lowerBound = i;
// move to upper middle
// due to floor of integer division, move one up on corner case
i = (i + upperBound + 1) / 2;
string dp = _settings.UseDoublePinyin ? ToDoublePin(resultList[i]) : resultList[i];
map.AddNewIndex(i, resultBuilder.Length, dp.Length + 1);
resultBuilder.Append(' ');
resultBuilder.Append(dp);
pre = true;
}
else
return originalIndexs[i];

if (upperBound - lowerBound <= 1 &&
translatedIndex > translatedIndexs[lowerBound * 2 + 1] &&
translatedIndex < translatedIndexs[upperBound * 2])
{
int indexDef = 0;

for (int j = 0; j < upperBound; j++)
if (pre)
{
indexDef += translatedIndexs[j * 2 + 1] - translatedIndexs[j * 2];
pre = false;
resultBuilder.Append(' ');
}

return translatedIndex - indexDef - 1;
resultBuilder.Append(resultList[i]);
}
}
}

public void endConstruct()
{
if (constructed)
throw new InvalidOperationException("Mapping has already been constructed");
constructed = true;
}
}

/// <summary>
/// Translate a language to English letters using a given rule.
/// </summary>
public interface IAlphabet
{
/// <summary>
/// Translate a string to English letters, using a given rule.
/// </summary>
/// <param name="stringToTranslate">String to translate.</param>
/// <returns></returns>
public (string translation, TranslationMapping map) Translate(string stringToTranslate);

/// <summary>
/// Determine if a string can be translated to English letter with this Alphabet.
/// </summary>
/// <param name="stringToTranslate">String to translate.</param>
/// <returns></returns>
public bool CanBeTranslated(string stringToTranslate);
}

public class PinyinAlphabet : IAlphabet
{
private ConcurrentDictionary<string, (string translation, TranslationMapping map)> _pinyinCache =
new ConcurrentDictionary<string, (string translation, TranslationMapping map)>();
map.endConstruct();

private Settings _settings;
var key = resultBuilder.ToString();

public PinyinAlphabet()
{
Initialize(Ioc.Default.GetRequiredService<Settings>());
return _pinyinCache[content] = (key, map);
}

private void Initialize([NotNull] Settings settings)
#region Double Pinyin

private static readonly ReadOnlyDictionary<string, string> special = new(new Dictionary<string, string>(){
{"A", "aa"},
{"Ai", "ai"},
{"An", "an"},
{"Ang", "ah"},
{"Ao", "ao"},
{"E", "ee"},
{"Ei", "ei"},
{"En", "en"},
{"Er", "er"},
{"O", "oo"},
{"Ou", "ou"}
});

private static readonly ReadOnlyDictionary<string, string> first = new(new Dictionary<string, string>(){
{"Ch", "i"},
{"Sh", "u"},
{"Zh", "v"}
});

private static readonly ReadOnlyDictionary<string, string> second = new(new Dictionary<string, string>()
{
_settings = settings ?? throw new ArgumentNullException(nameof(settings));
}

public bool CanBeTranslated(string stringToTranslate)
{"ua", "x"},
{"ei", "w"},
{"e", "e"},
{"ou", "z"},
{"iu", "q"},
{"ve", "t"},
{"ue", "t"},
{"u", "u"},
{"i", "i"},
{"o", "o"},
{"uo", "o"},
{"ie", "p"},
{"a", "a"},
{"ong", "s"},
{"iong", "s"},
{"ai", "d"},
{"ing", "k"},
{"uai", "k"},
{"ang", "h"},
{"uan", "r"},
{"an", "j"},
{"en", "f"},
{"ia", "x"},
{"iang", "l"},
{"uang", "l"},
{"eng", "g"},
{"in", "b"},
{"ao", "c"},
{"v", "v"},
{"ui", "v"},
{"un", "y"},
{"iao", "n"},
{"ian", "m"}
});

private static string ToDoublePin(string fullPinyin)
{
return WordsHelper.HasChinese(stringToTranslate);
}
// Assuming s is valid
var fullPinyinSpan = fullPinyin.AsSpan();
var doublePin = new StringBuilder();

public (string translation, TranslationMapping map) Translate(string content)
{
if (_settings.ShouldUsePinyin)
// Handle special cases (a, o, e)
if (fullPinyin.Length <= 3 && (fullPinyinSpan[0] == 'a' || fullPinyinSpan[0] == 'e' || fullPinyinSpan[0] == 'o'))
{
if (!_pinyinCache.ContainsKey(content))
{
return BuildCacheFromContent(content);
}
else
if (special.TryGetValue(fullPinyin, out var value))
{
return _pinyinCache[content];
return value;
}
}
return (content, null);
}

private (string translation, TranslationMapping map) BuildCacheFromContent(string content)
{
if (WordsHelper.HasChinese(content))
// Check for initials that are two characters long (zh, ch, sh)
if (fullPinyin.Length >= 2)
{
var resultList = WordsHelper.GetPinyinList(content);

StringBuilder resultBuilder = new StringBuilder();
TranslationMapping map = new TranslationMapping();

bool pre = false;

for (int i = 0; i < resultList.Length; i++)
var firstTwo = fullPinyinSpan[..2];
var firstTwoString = firstTwo.ToString();
if (first.ContainsKey(firstTwoString))
{
if (content[i] >= 0x3400 && content[i] <= 0x9FD5)
doublePin.Append(firstTwoString);

var lastTwo = fullPinyinSpan[2..];
var lastTwoString = lastTwo.ToString();
if (second.TryGetValue(lastTwoString, out var tmp))
{
map.AddNewIndex(i, resultBuilder.Length, resultList[i].Length + 1);
resultBuilder.Append(' ');
resultBuilder.Append(resultList[i]);
pre = true;
doublePin.Append(tmp);
}
else
{
if (pre)
{
pre = false;
resultBuilder.Append(' ');
}

resultBuilder.Append(resultList[i]);
doublePin.Append(lastTwo);
}
}

map.endConstruct();

var key = resultBuilder.ToString();
map.setKey(key);

return _pinyinCache[content] = (key, map);
}
// Handle single-character initials
else
{
return (content, null);
doublePin.Append(fullPinyinSpan[0]);

var lastOne = fullPinyinSpan[1..];
var lastOneString = lastOne.ToString();
if (second.TryGetValue(lastOneString, out var tmp))
{
doublePin.Append(tmp);
}
else
{
doublePin.Append(lastOne);
}
}

return doublePin.ToString();
}

#endregion
}
}
Loading
Loading