Research:Rolling surviving new active editor
- = 5 edits
- = 30 days
SET @n = 5; /* edits threshold */
SET @u = 30; /* activity unit in days */
SET @T = "20140102"; /* February 1st, 2014 before midnight */
/* Create a temp table of last month's new active editors */
CREATE TEMPORARY TABLE staging.last_new_actives
SELECT
user_id,
user_name,
user_registration
FROM
(
/* Get revisions to content pages that are still visible */
SELECT
user_id,
user_name,
user_registration,
SUM(rev_id IS NOT NULL) AS revisions
FROM user
INNER JOIN logging ON /* Filter users not created manually */
log_user = user_id AND
log_type = "newusers" AND
log_action = "create"
LEFT JOIN revision ON
rev_user = user_id
WHERE
user_registration BETWEEN
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u*2 DAY), "%Y%m%d%H%i%S") AND
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u DAY), "%Y%m%d%H%i%S") AND
rev_timestamp BETWEEN
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u*2 DAY), "%Y%m%d%H%i%S") AND
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u DAY), "%Y%m%d%H%i%S")
GROUP BY 1,2,3
UNION ALL
/* Get revisions to content pages that have been archived */
SELECT
user_id,
user_name,
user_registration,
SUM(ar_id IS NOT NULL) AS revisions /* Note that ar_rev_id is sometimes set to NULL :( */
FROM user
INNER JOIN logging ON /* Filter users not created manually */
log_user = user_id AND
log_type = "newusers" AND
log_action = "create"
LEFT JOIN archive ON
ar_user = user_id
WHERE
user_registration BETWEEN
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u*2 DAY), "%Y%m%d%H%i%S") AND
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u DAY), "%Y%m%d%H%i%S") AND
ar_timestamp BETWEEN
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u*2 DAY), "%Y%m%d%H%i%S") AND
DATE_FORMAT(DATE_SUB(@T, INTERVAL @u DAY), "%Y%m%d%H%i%S")
GROUP BY 1,2,3
) AS last_newcomer_activity
GROUP BY 1,2,3
HAVING SUM(revisions) >= @n;
CREATE TEMPORARY TABLE staging.last_new_actives2 SELECT * FROM staging.last_new_actives;
SELECT
user_id,
user_name,
user_registration
FROM (
/* Get revisions to content pages that are still visible */
SELECT
user_id,
user_name,
user_registration,
SUM(rev_id IS NOT NULL) AS revisions
FROM staging.last_new_actives
LEFT JOIN revision ON
rev_user = user_id
WHERE
rev_timestamp BETWEEN DATE_FORMAT(DATE_SUB(@T, INTERVAL @u DAY), "%Y%m%d%H%i%S") AND @T
GROUP BY 1,2,3
UNION ALL
/* Get revisions to content pages that have been archived */
SELECT
user_id,
user_name,
user_registration,
SUM(ar_id IS NOT NULL) AS revisions /* Note that ar_rev_id is sometimes set to NULL :( */
FROM staging.last_new_actives2
LEFT JOIN archive ON
ar_user = user_id
WHERE
ar_timestamp BETWEEN DATE_FORMAT(DATE_SUB(@T, INTERVAL @u DAY), "%Y%m%d%H%i%S") AND @T
GROUP BY 1,2,3
) AS surviving_new_activity
WHERE user_id NOT IN (SELECT ug_user FROM user_groups WHERE ug_group = "bot")
GROUP BY 1,2,3
HAVING SUM(revisions) >= @n;
Rolling surviving new active editors are non-bot editors who registered recently and saved a minimum threshold of edits in the previous time period and continue to be active in the current time period. Specifically, editors who registered between and days and saved at least edits in both time periods are classified.
Discussion
[edit]The n edits threshold
[edit]The edits necessary to cross this threshold, the fewer editors will meet the criteria. Historically, "active editors" were considered to be editors who made 5 edits to specific types of pages over the course of a calendar month. So for historical purposes, we recommend .
The u activity unit
[edit]The wider this unit is set, the more casually editors will be considered "active". Historically, "active editors" were considered during calendar months. So for historical purposes, we recommend .
Time lag
[edit]This metric has a built-in time lag of for the time period in question. However, new users who registered up to days ago may be included.
Bot filtering
[edit]Bots are filtered using the bot flag method.