Erste Was ist Ihre Datenbanksortierung? Ich gehe davon aus, dass es SQL_Latin1_General_CP1_CS_AS
oder SQL_Latin1_General_CP1_CI_AS
ist. Wenn ja, dann sollte folgendes funktionieren (noch nicht vollständig getestet).
Es sieht aus wie ein schreiben True Unicode-Sortierer ist ein großes Unternehmen. Ich habe Steuercodes gesehen, die einfacher waren als die Spezifikationen. ;-) Es scheint immer um Lookup-Tabelle (n) und mindestens eine 3-Level-Sortierung zu gehen - mit zu ändernden Zeichen und Kontraktionen.
Ich habe folgendes auf die Latin 1, Latin Extended-A beschränkt und Latin Extended-B Tabellen/Sortierung. Der Algorithmus sollte mit diesen Sets ziemlich gut funktionieren, aber ich habe ihn noch nicht vollständig getestet und nicht richtig für die Änderung der Zeichen berücksichtigt (um Geschwindigkeit und Komplexität zu sparen).
Siehe in action at jsbin.com.
Funktion:
function bIgnoreForPrimarySort (iCharCode)
{
/*--- A bunch of characters get ignored for the primary sort weight.
The most important ones are the hyphen and apostrophe characters.
A bunch of control characters and a couple of odds and ends, make up
the rest.
*/
if (iCharCode < 9) return true;
if (iCharCode >= 14 && iCharCode <= 31) return true;
if (iCharCode >= 127 && iCharCode <= 159) return true;
if (iCharCode == 39 || iCharCode == 45 || iCharCode == 173) return true;
return false;
}
function SortByRoughSQL_Latin1_General_CP1_CS_AS (sA, sB)
{
/*--- This Sorts Latin1 and extended Latin1 unicode with an approximation
of SQL's SQL_Latin1_General_CP1_CS_AS collation.
Certain modifying characters or contractions my be off (not tested), we trade-off
perfect accuracy for speed and relative simplicity.
True unicode sorting is devilishly complex and we're not getting paid enough to
fully implement it in Javascript. ;-)
It looks like a definative sort would require painstaking exegesis of documents
such as: http://unicode.org/reports/tr10/
*/
//--- This is the master lookup table for Latin1 code-points. Here through the extended set \u02AF
//--- Make this static?
var aSortOrder = [
-1, 151, 152, 153, 154, 155, 156, 157, 158, 2, 3, 4, 5, 6, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 0, 7, 8, 9, 10, 11, 12, 210,
13, 14, 15, 41, 16, 211, 17, 18, 65, 69, 71, 74, 76, 77, 80, 81, 82, 83, 19, 20,
42, 43, 44, 21, 22, 214, 257, 266, 284, 308, 347, 352, 376, 387, 419, 427, 438, 459, 466, 486,
529, 534, 538, 559, 576, 595, 636, 641, 647, 650, 661, 23, 24, 25, 26, 27, 28, 213, 255, 265,
283, 307, 346, 350, 374, 385, 418, 426, 436, 458, 464, 485, 528, 533, 536, 558, 575, 594, 635, 640,
646, 648, 660, 29, 30, 31, 32, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
1, 33, 53, 54, 55, 56, 34, 57, 35, 58, 215, 46, 59, 212, 60, 36, 61, 45, 72, 75,
37, 62, 63, 64, 38, 70, 487, 47, 66, 67, 68, 39, 219, 217, 221, 231, 223, 233, 250, 276,
312, 310, 316, 318, 392, 390, 395, 397, 295, 472, 491, 489, 493, 503, 495, 48, 511, 599, 597, 601,
603, 652, 590, 573, 218, 216, 220, 230, 222, 232, 249, 275, 311, 309, 315, 317, 391, 389, 394, 396,
294, 471, 490, 488, 492, 502, 494, 49, 510, 598, 596, 600, 602, 651, 589, 655, 229, 228, 227, 226,
235, 234, 268, 267, 272, 271, 270, 269, 274, 273, 286, 285, 290, 287, 324, 323, 322, 321, 314, 313,
326, 325, 320, 319, 358, 357, 362, 361, 356, 355, 364, 363, 378, 377, 380, 379, 405, 404, 403, 402,
401, 400, 407, 406, 393, 388, 417, 416, 421, 420, 432, 431, 428, 440, 439, 447, 446, 444, 443, 442,
441, 450, 449, 468, 467, 474, 473, 470, 469, 477, 484, 483, 501, 500, 499, 498, 507, 506, 527, 526,
540, 539, 544, 543, 542, 541, 561, 560, 563, 562, 567, 566, 565, 564, 580, 579, 578, 577, 593, 592,
611, 610, 609, 608, 607, 606, 613, 612, 617, 616, 615, 614, 643, 642, 654, 653, 656, 663, 662, 665,
664, 667, 666, 574, 258, 260, 262, 261, 264, 263, 281, 278, 277, 304, 292, 289, 288, 297, 335, 337,
332, 348, 349, 369, 371, 382, 415, 409, 434, 433, 448, 451, 462, 476, 479, 509, 521, 520, 524, 523,
531, 530, 552, 572, 571, 569, 570, 583, 582, 581, 585, 632, 631, 634, 638, 658, 657, 669, 668, 673,
677, 676, 678, 73, 79, 78, 680, 644, 50, 51, 52, 40, 303, 302, 301, 457, 456, 455, 482, 481,
480, 225, 224, 399, 398, 497, 496, 605, 604, 626, 625, 620, 619, 624, 623, 622, 621, 334, 241, 240,
237, 236, 254, 253, 366, 365, 360, 359, 430, 429, 505, 504, 515, 514, 675, 674, 422, 300, 299, 298,
354, 353, 84, 85, 86, 87, 239, 238, 252, 251, 513, 512, 243, 242, 245, 244, 328, 327, 330, 329,
411, 410, 413, 412, 517, 516, 519, 518, 547, 546, 549, 548, 628, 627, 630, 629, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 246, 247, 248, 259, 279, 280, 293, 291,
339, 336, 338, 331, 340, 341, 342, 423, 367, 373, 351, 370, 372, 383, 381, 384, 408, 414, 386, 445,
453, 452, 454, 461, 463, 460, 475, 478, 465, 508, 522, 525, 532, 550, 553, 554, 555, 545, 556, 557,
537, 551, 568, 333, 424, 343, 344, 586, 584, 618, 633, 637, 639, 645, 659, 649, 670, 671, 672, 679,
681, 682, 683, 282, 686, 256, 345, 368, 375, 425, 435, 437, 535, 684, 685, 305, 296, 306, 591, 587,
588, 144, 145, 146, 147, 148, 149, 150
];
var iLenA = sA.length, iLenB = sB.length;
var jA = 0, jB = 0;
var sIgnoreBuff_A = [], sIgnoreBuff_B = [];
function iSortIgnoreBuff()
{
var iIgLenA = sIgnoreBuff_A.length, iIgLenB = sIgnoreBuff_B.length;
var kA = 0, kB = 0;
while (kA < iIgLenA && kB < iIgLenB)
{
var igA = sIgnoreBuff_A [kA++], igB = sIgnoreBuff_B [kB++];
if (aSortOrder[igA] > aSortOrder[igB]) return 1;
if (aSortOrder[igA] < aSortOrder[igB]) return -1;
}
//--- All else equal, longest string loses
if (iIgLenA > iIgLenB) return 1;
if (iIgLenA < iIgLenB) return -1;
return 0;
}
while (jA < iLenA && jB < iLenB)
{
var cA = sA.charCodeAt (jA++);
var cB = sB.charCodeAt (jB++);
if (cA == cB)
{
continue;
}
while (bIgnoreForPrimarySort (cA))
{
sIgnoreBuff_A.push (cA);
if (jA < iLenA)
cA = sA.charCodeAt (jA++);
else
break;
}
while (bIgnoreForPrimarySort (cB))
{
sIgnoreBuff_B.push (cB);
if (jB < iLenB)
cB = sB.charCodeAt (jB++);
else
break;
}
/*--- Have we reached the end of one or both strings, ending on an ignore char?
The strings were equal, up to that point.
If one of the strings is NOT an ignore char, while the other is, it wins.
*/
if (bIgnoreForPrimarySort (cA))
{
if (! bIgnoreForPrimarySort (cB)) return -1;
}
else if (bIgnoreForPrimarySort (cB))
{
return 1;
}
else
{
if (aSortOrder[cA] > aSortOrder[cB])
return 1;
if (aSortOrder[cA] < aSortOrder[cB])
return -1;
//--- We are equal, so far, on the main chars. Where there ignore chars?
var iBuffSort = iSortIgnoreBuff();
if (iBuffSort) return iBuffSort;
//--- Still here? Reset the ignore arrays.
sIgnoreBuff_A = [];
sIgnoreBuff_B = [];
}
} //-- while (jA < iLenA && jB < iLenB)
/*--- We have gone through all of at least one string and they are still both
equal barring ignore chars or unequal lengths.
*/
var iBuffSort = iSortIgnoreBuff();
if (iBuffSort) return iBuffSort;
//--- All else equal, longest string loses
if (iLenA > iLenB) return 1;
if (iLenA < iLenB) return -1;
return 0;
} //-- function SortByRoughSQL_Latin1_General_CP1_CS_AS
Test:
var aPhrases = [
'Grails Found',
'--Exhibit Visitors',
'-Exhibit Visitors',
'Exhibit Visitors',
'Calls Received',
'Ëxhibit Visitors',
'Brochures distributed',
'exhibit visitors',
'bags of Garbage',
'^&$Grails Found',
'?test'
];
aPhrases.sort (SortByRoughSQL_Latin1_General_CP1_CS_AS);
console.log (aPhrases.join ('\n'));
Ergebnisse:
?test
^&$Grails Found
bags of Garbage
Brochures distributed
Calls Received
exhibit visitors
Exhibit Visitors
-Exhibit Visitors
--Exhibit Visitors
Ëxhibit Visitors
Grails Found
Also, aus dieser Frage, möchten Sie die JavaScript jetzt sortieren Unicode 'A' vor Unicode' -a'? –
@Brock - richtig, obwohl genauer gesagt, ich möchte einen Javascript Sortieralgorithmus, der das auf der Serverseite (ich stelle mir vor, es gibt mehr als "-" Zeichen) – DanP