关键路径转化


  • 需求

    在一条指定的业务流程中,各个步骤的完成人数及相对上一个步骤的百分比

    【Hadoop离线基础总结】关键路径转化率分析(漏斗模型)-LMLPHP
  • 模型设计

    定义好业务流程中的页面标识
    Step1、  /item
    Step2、 /category
    Step3、 /index
    Step4、 /order
    CREATE TABLE dw_oute_numbs AS
    SELECT 'step1' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews
    WHERE datestr = '20130920'
    AND request LIKE '/item%'
    UNION ALL
    SELECT 'step2' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews
    WHERE datestr = '20130920'
    AND request LIKE '/category%'
    UNION ALL
    SELECT 'step3' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews
    WHERE datestr = '20130920'
    AND request LIKE '/order%'
    UNION ALL
    SELECT 'step4' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews
    WHERE datestr = '20130920'
    AND request LIKE '/index%';
    +---------------------+----------------------+--+
    | dw_oute_numbs.step | dw_oute_numbs.numbs |
    +---------------------+----------------------+--+
    | step1 | 1029 |
    | step2 | 1029 |
    | step3 | 1028 |
    | step4 | 1018 |
    +---------------------+----------------------+--+

  • 求取每一步相对于上一步的转化率

    SELECT t.rnnumbs/t.rrnumbs AS rate
    FROM (
    SELECT
    rn.step AS rnstep,
    rn.numbs AS rnnumbs,
    rr.step AS rrstep,
    rr.numbs AS rrnumbs
    FROM dw_oute_numbs rn
    INNER JOIN dw_oute_numbs rr
    WHERE rr.step = 'step1' ) t;
  • 求取每一步相对于第一步的转化率

    SELECT t.rrnumbs/t.rnnumbs AS rate
    FROM (
    SELECT
    rn.step AS rnstep,
    rn.numbs AS rnnumbs,
    rr.step AS rrstep,
    rr.numbs AS rrnumbs
    FROM dw_oute_numbs rn
    INNER JOIN dw_oute_numbs rr
    WHERE CAST(substr(rn.step,5,1) AS INT) = CAST(SUBSTR(rr.step,5,1) AS INT) - 1 ) t;
  • 也可以将两个合并到一起

    SELECT abs.step,abs.numbs,abs.rate AS abs_ratio,
    rel.rate AS leakage_rate
    FROM (
    SELECT tmp.rnstep AS step,tmp.rnnumbs AS numbs, tmp.rnnumbs/tmp.rrnumbs AS rate
    FROM (
    SELECT
    rn.step AS rnstep,
    rn.numbs AS rnnumbs,
    rr.step AS rrstep,
    rr.numbs AS rrnumbs
    FROM dw_oute_numbs rn
    INNER JOIN dw_oute_numbs rr ) tmp
    WHERE tmp.rrstep = 'step1' ) abs
    LEFT OUTER JOIN (
    SELECT tmp.rrstep AS step,tmp.rrnumbs/tmp.rnnumbs AS rate
    FROM (
    SELECT
    rn.step AS rnstep,
    rn.numbs AS rnnumbs,
    rr.step AS rrstep,
    rr.numbs AS rrnumbs
    FROM dw_oute_numbs rn
    INNER JOIN dw_oute_numbs rr ) tmp
    WHERE CAST(substr(tmp.rnstep,5,1) AS INT) = CAST(SUBSTR(tmp.rrstep,5,1) AS INT) - 1 ) rel
    ON abs.step=rel.step;
05-18 11:11