2016-08-23 2 views

Antwort

0

Mit Scalar JS UDF (Standard-SQL) < - wäre meine Wahl

CREATE TEMPORARY FUNCTION collapse_repeated(pathway STRING) 
RETURNS STRING LANGUAGE js AS """ 
    var items = pathway.split('->'); 
    short = ''; elem = items[0]; count = 0; 
    for (var i = 0; i < items.length; i++) { 
    if (items[i] !== elem) { 
     if (short.length > 0) {short += '->'} 
     short += elem; if (count > 1) {short += '(x' + count.toString() + ')';} 
     elem = items[i]; count = 1; 
    } else { 
     count++; 
    } 
    } 
    if (short.length > 0) {short += '->'} 
    short += elem; if (count > 1) {short += '(x' + count.toString() + ')';} 
return short; 
"""; 

WITH YourTable AS (
    SELECT "Item1->Item2->Item2->Item2->Item3->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item2->Item3->Item3->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item4" AS pathway 
    UNION ALL SELECT "Item1->Item2->Item2->Item3->Item1->Item1->Item1->Item2->Item3->Item3->Item2->Item2->Item2->Item1->Item4" AS pathway 
    UNION ALL SELECT "Item1->Item1->Item1" AS pathway 
    UNION ALL SELECT "Item1->Item2->Item2" AS pathway 
) 
SELECT collapse_repeated(pathway) AS shorten_pathway, pathway 
FROM YourTable 

Hinweis sein: Same JS kann in Legacy-leicht „übersetzt“ JS UDF sein SQL

Window Functions (Legacy-SQL)

SELECT GROUP_CONCAT_UNQUOTED(IF(repeats=1, item, CONCAT(item, "(x", STRING(repeats), ")")), "->"), pathway 
FROM (
    SELECT MIN(pos) AS ord, MIN(item) AS item, COUNT(1) AS repeats, pathway 
    FROM (
    SELECT item, pos, IFNULL(grp, 0)AS grp, pathway FROM (
     SELECT item, pos, SUM(change) OVER(PARTITION BY pathway ORDER BY pos ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS grp, pathway 
     FROM (
     SELECT item, pos, IF(item=next_item, 0, 1) AS change, pathway FROM (
      SELECT item, pos, LEAD(item) OVER(PARTITION BY pathway ORDER BY pos) AS next_item, pathway 
      FROM (
      SELECT item, POSITION(item) AS pos, pathway FROM (
       SELECT SPLIT(pathway, "->") AS item, pathway FROM 
       (SELECT "Item1->Item2->Item2->Item2->Item3->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item2->Item3->Item3->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item4" AS pathway), 
       (SELECT "Item1->Item2->Item2->Item3->Item1->Item1->Item1->Item2->Item3->Item3->Item2->Item2->Item2->Item1->Item4" AS pathway), 
       (SELECT "Item1->Item1->Item1" AS pathway), 
       (SELECT "Item1->Item2->Item2" AS pathway)    
      ) 
     ) 
     ) 
    ) 
    ) 
) 
    GROUP BY grp, pathway 
    ORDER BY ord 
) 
GROUP BY pathway 
1

Mit wollte ich mich davon zu überzeugen, dass dies nur durch Array-Bearbeitung möglich war (Standard-SQL verwenden), und ich kam mit einer Lösung auf. Eine alternative Möglichkeit, das Problem zu lösen, wäre die Verwendung von Analysefunktionen, bei denen Sie Änderungen an Elementen entlang des Pfads erkennen können.

CREATE TEMPORARY FUNCTION PartsToString(
    parts_and_offsets ARRAY<STRUCT<part STRING, off INT64>>) AS ((
    SELECT 
    STRING_AGG(
     CONCAT(part_and_offset.part, 
      IF(parts_and_offsets[OFFSET(off + 1)].off - part_and_offset.off = 1, 
      "", 
      CONCAT("(x", CAST(parts_and_offsets[OFFSET(off + 1)].off - part_and_offset.off AS STRING), ")")))) 
    FROM UNNEST(parts_and_offsets) AS part_and_offset WITH OFFSET off 
    WHERE off + 1 < ARRAY_LENGTH(parts_and_offsets) 
)); 

CREATE TEMPORARY FUNCTION PathwayToParts(pathway STRING) AS ((
    SELECT 
    ARRAY_CONCAT(
     ARRAY_AGG(
     STRUCT(part, off)), 
     [STRUCT("" AS part, ARRAY_LENGTH(ANY_VALUE(parts)) AS off)]) AS parts_and_offsets 
    FROM (SELECT SPLIT(pathway, "->") AS parts), 
    UNNEST(parts) AS part WITH OFFSET off 
    WHERE off = 0 OR part != parts[OFFSET(off - 1)] 
)); 

WITH YourTable AS (
    SELECT "Item1->Item2->Item2->Item2->Item3->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item2->Item3->Item3->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item2->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item1->Item4" AS pathway 
    UNION ALL SELECT "Item1->Item2->Item2->Item3->Item1->Item1->Item1->Item2->Item3->Item3->Item2->Item2->Item2->Item1->Item4" AS pathway 
    UNION ALL SELECT "Item1->Item1->Item1" AS pathway 
    UNION ALL SELECT "Item1->Item2->Item2" AS pathway 
    UNION ALL SELECT "Item1->Item1->Item2" AS pathway 
    UNION ALL SELECT "Item1->Item2->Item3" AS pathway 
) 
SELECT PartsToString(PathwayToParts(pathway)) AS parts_string 
FROM YourTable; 
+0

Ja, ich weiß, das ist dein Lieblingsbereich: o), also habe ich es für dich gelassen. Ich bevorzuge immer noch JS UDF-Version als am besten lesbar und sauber, aber trotzdem +1 für Sie für Ihre Version. Übrigens, das war eine Frage von meinen internen Benutzern, also hoffe, dass sie diese Option auch ausprobieren werden –

Verwandte Themen