-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.

\c :TEST_DBNAME :ROLE_SUPERUSER

CREATE OR REPLACE FUNCTION ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(timeout INT = -1, mock_start_time INT = 0) RETURNS VOID
AS :MODULE_PATHNAME LANGUAGE C VOLATILE;
CREATE OR REPLACE FUNCTION ts_bgw_params_create() RETURNS VOID
AS :MODULE_PATHNAME LANGUAGE C VOLATILE;
CREATE OR REPLACE FUNCTION ts_bgw_params_destroy() RETURNS VOID
AS :MODULE_PATHNAME LANGUAGE C VOLATILE;
CREATE OR REPLACE FUNCTION ts_bgw_params_reset_time(set_time BIGINT = 0, wait BOOLEAN = false) RETURNS VOID
AS :MODULE_PATHNAME LANGUAGE C VOLATILE;

-- Create a user with specific timezone and mock time
CREATE ROLE test_cagg_refresh_policy_user WITH LOGIN;
ALTER ROLE test_cagg_refresh_policy_user SET timezone TO 'UTC';
ALTER ROLE test_cagg_refresh_policy_user SET timescaledb.current_timestamp_mock TO '2025-03-11 00:00:00+00';
GRANT ALL ON SCHEMA public TO test_cagg_refresh_policy_user;

\c :TEST_DBNAME test_cagg_refresh_policy_user

CREATE TABLE public.bgw_log(
    msg_no INT,
    mock_time BIGINT,
    application_name TEXT,
    msg TEXT
);

CREATE VIEW sorted_bgw_log AS
SELECT
    msg_no,
    mock_time,
    application_name,
    regexp_replace(regexp_replace(msg, '(Wait until|started at|execution time) [0-9]+(\.[0-9]+)?', '\1 (RANDOM)', 'g'), 'background worker "[^"]+"','connection') AS msg
FROM
    bgw_log
ORDER BY
    mock_time,
    application_name COLLATE "C",
    msg_no;

CREATE TABLE public.bgw_dsm_handle_store(
    handle BIGINT
);
INSERT INTO public.bgw_dsm_handle_store VALUES (0);
SELECT ts_bgw_params_create();

CREATE TABLE conditions (
    time         TIMESTAMP WITH TIME ZONE NOT NULL,
    device_id    INTEGER,
    temperature  NUMERIC
);

SELECT FROM create_hypertable('conditions', by_range('time'));

INSERT INTO conditions
SELECT
    t, d, 10
FROM
    generate_series(
        '2025-02-05 00:00:00+00',
        '2025-03-05 00:00:00+00',
        '1 hour'::interval) AS t,
    generate_series(1,5) AS d;

CREATE MATERIALIZED VIEW conditions_by_day
WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS
SELECT
    time_bucket('1 day', time),
    device_id,
    count(*),
    min(temperature),
    max(temperature),
    avg(temperature),
    sum(temperature)
FROM
    conditions
GROUP BY
    1, 2
WITH NO DATA;

SELECT
    add_continuous_aggregate_policy(
        'conditions_by_day',
        start_offset => NULL,
        end_offset => NULL,
        schedule_interval => INTERVAL '1 h',
        buckets_per_batch => 10
    ) AS job_id \gset

SELECT
    config
FROM
    timescaledb_information.jobs
WHERE
    job_id = :'job_id' \gset

SELECT ts_bgw_params_reset_time(0, true);
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

CREATE MATERIALIZED VIEW conditions_by_day_manual_refresh
WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS
SELECT
    time_bucket('1 day', time),
    device_id,
    count(*),
    min(temperature),
    max(temperature),
    avg(temperature),
    sum(temperature)
FROM
    conditions
GROUP BY
    1, 2
WITH NO DATA;

CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL);

SELECT count(*) FROM conditions_by_day;
SELECT count(*) FROM conditions_by_day_manual_refresh;

-- Should have no differences
SELECT
    count(*) > 0 AS has_diff
FROM
    ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2)
    EXCEPT
    (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff;

TRUNCATE bgw_log, conditions_by_day;

SELECT
    config
FROM
    alter_job(
        :'job_id',
        config => jsonb_set(:'config', '{max_batches_per_execution}', '2')
    );

-- advance time by 1h so that job runs one more time
SELECT ts_bgw_params_reset_time(extract(epoch from interval '1 hour')::bigint * 1000000, true);

SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

SELECT count(*) FROM conditions_by_day;
SELECT count(*) FROM conditions_by_day_manual_refresh;

-- Should have differences
SELECT
    count(*) > 0 AS has_diff
FROM
    ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2)
    EXCEPT
    (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff;

-- advance time by 2h so that job runs one more time
SELECT ts_bgw_params_reset_time(extract(epoch from interval '2 hour')::bigint * 1000000, true);

SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

-- Should have no differences
SELECT
    count(*) > 0 AS has_diff
FROM
    ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2)
    EXCEPT
    (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff;

-- Set max_batches_per_execution to 10
SELECT
    config
FROM
    alter_job(
        :'job_id',
        config => jsonb_set(:'config', '{max_batches_per_execution}', '10')
    );

TRUNCATE bgw_log;

-- Insert data into the past
INSERT INTO conditions
SELECT
    t, d, 10
FROM
    generate_series(
        '2020-02-05 00:00:00+00',
        '2020-03-05 00:00:00+00',
        '1 hour'::interval) AS t,
    generate_series(1,5) AS d;

-- advance time by 3h so that job runs one more time
SELECT ts_bgw_params_reset_time(extract(epoch from interval '3 hour')::bigint * 1000000, true);

-- Should process all four batches in the past
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

SELECT count(*) FROM conditions_by_day;
SELECT count(*) FROM conditions_by_day_manual_refresh;

CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL);

SELECT count(*) FROM conditions_by_day;
SELECT count(*) FROM conditions_by_day_manual_refresh;

-- Should have no differences
SELECT
    count(*) > 0 AS has_diff
FROM
    ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2)
    EXCEPT
    (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff;

-- Check invalid configurations
\set ON_ERROR_STOP 0
\set VERBOSITY default
SELECT
    config
FROM
    alter_job(
        :'job_id',
        config => jsonb_set(:'config', '{max_batches_per_execution}', '-1')
    );
SELECT
    config
FROM
    alter_job(
        :'job_id',
        config => jsonb_set(:'config', '{buckets_per_batch}', '-1')
    );
\set VERBOSITY terse
\set ON_ERROR_STOP 1

-- Truncate all data from the original hypertable
TRUNCATE bgw_log, conditions;

-- advance time by 4h so that job runs one more time
SELECT ts_bgw_params_reset_time(extract(epoch from interval '4 hour')::bigint * 1000000, true);

-- Should fallback to single batch processing because there's no data to be refreshed on the original hypertable
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

-- Should return zero rows
SELECT count(*) FROM conditions_by_day;

-- 1 day of data
INSERT INTO conditions
SELECT
    t, d, 10
FROM
    generate_series(
        '2020-02-05 00:00:00+00',
        '2020-02-06 00:00:00+00',
        '1 hour'::interval) AS t,
    generate_series(1,5) AS d;

TRUNCATE bgw_log;

-- advance time by 5h so that job runs one more time
SELECT ts_bgw_params_reset_time(extract(epoch from interval '5 hour')::bigint * 1000000, true);

-- Should fallback to single batch processing because the refresh size is too small
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

-- Should return 10 rows because the bucket width is `1 day` and buckets per batch is `10`
SELECT count(*) FROM conditions_by_day;

TRUNCATE conditions_by_day, conditions, bgw_log;

-- Less than 1 day of data (smaller than the bucket width)
INSERT INTO conditions
VALUES ('2020-02-05 00:00:00+00', 1, 10);

-- advance time by 6h so that job runs one more time
SELECT ts_bgw_params_reset_time(extract(epoch from interval '6 hour')::bigint * 1000000, true);

-- Should fallback to single batch processing because the refresh size is too small
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

-- Should return 1 row
SELECT count(*) FROM conditions_by_day;

SELECT delete_job(:job_id);

SELECT
    add_continuous_aggregate_policy(
        'conditions_by_day',
        start_offset => INTERVAL '15 days',
        end_offset => NULL,
        schedule_interval => INTERVAL '1 h',
        buckets_per_batch => 5,
        refresh_newest_first => true -- explicitly set to true to test the default behavior
    ) AS job_id \gset

SELECT
    add_continuous_aggregate_policy(
        'conditions_by_day_manual_refresh',
        start_offset => INTERVAL '15 days',
        end_offset => NULL,
        schedule_interval => INTERVAL '1 h',
        buckets_per_batch => 0 -- 0 means no batching, so it will refresh all buckets in one go
    ) AS job_id_manual \gset

TRUNCATE bgw_log, conditions_by_day, conditions_by_day_manual_refresh, conditions;

INSERT INTO conditions
SELECT
    t, d, 10
FROM
    generate_series(
        '2025-03-11 00:00:00+00'::timestamptz - INTERVAL '30 days',
        '2025-03-11 00:00:00+00'::timestamptz,
        '1 hour'::interval) AS t,
    generate_series(1,5) AS d;

SELECT ts_bgw_params_reset_time(0, true);
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

-- Both continuous aggregates should have the same data
SELECT count(*) FROM conditions_by_day;
SELECT count(*) FROM conditions_by_day_manual_refresh;

-- Should have no differences
SELECT
    count(*) > 0 AS has_diff
FROM
    ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2)
    EXCEPT
    (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff;

-- Testing with explicit refresh_newest_first = false (from oldest to newest)
SELECT delete_job(:job_id);
SELECT delete_job(:job_id_manual);

SELECT
    add_continuous_aggregate_policy(
        'conditions_by_day',
        start_offset => INTERVAL '15 days',
        end_offset => NULL,
        schedule_interval => INTERVAL '1 h',
        buckets_per_batch => 5,
        refresh_newest_first => false
    ) AS job_id \gset

SELECT
    config
FROM
    timescaledb_information.jobs
WHERE
    job_id = :'job_id';

TRUNCATE bgw_log, conditions_by_day;

SELECT ts_bgw_params_reset_time(0, true);
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

-- Both continuous aggregates should have the same data
SELECT count(*) FROM conditions_by_day;
SELECT count(*) FROM conditions_by_day_manual_refresh;

-- Should have no differences
SELECT
    count(*) > 0 AS has_diff
FROM
    ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2)
    EXCEPT
    (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff;


-- Tests with Variable sized bucket
SELECT delete_job(:job_id);
TRUNCATE conditions;

INSERT INTO conditions
SELECT
    t, d, 10
FROM
    generate_series(
        '2025-01-01 00:00:00+00',
        '2025-10-08 00:00:00+00',
        '1 hour'::interval) AS t,
    generate_series(1,5) AS d;

CREATE MATERIALIZED VIEW conditions_by_month
WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS
SELECT
    time_bucket('1 month', time),
    device_id,
    count(*),
    min(temperature),
    max(temperature),
    avg(temperature),
    sum(temperature)
FROM
    conditions
GROUP BY
    1, 2
WITH NO DATA;

SELECT
    add_continuous_aggregate_policy(
        'conditions_by_month',
        start_offset => INTERVAL '600 days',
        end_offset => INTERVAL '7 days',
        schedule_interval => INTERVAL '1 day',
        refresh_newest_first => false
    ) AS job_id \gset

SELECT
    config
FROM
    timescaledb_information.jobs
WHERE
    job_id = :'job_id';

TRUNCATE bgw_log, conditions_by_day;

SELECT ts_bgw_params_reset_time(0, true);
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;
SELECT * FROM _timescaledb_catalog.continuous_aggs_materialization_ranges;

SELECT delete_job(:job_id);
------------------------------------------------------------------------------------------
--Test that batched refresh with variable-length buckets doesn't leave remainders
-------------------------------------------------------------------------------------------
CREATE TABLE test_data (
    time TIMESTAMPTZ NOT NULL,
    value INT
);

SELECT public.create_hypertable(
        relation => 'test_data',
        time_column_name => 'time',
        chunk_time_interval => interval '1 months'
);
-- Insert initial data
INSERT INTO test_data
SELECT time, 1
FROM generate_series('2024-01-01'::timestamptz, '2024-12-31'::timestamptz, '1 day'::interval) time;

-- Create continuous aggregate with monthly buckets and timezone (variable-length buckets)
CREATE MATERIALIZED VIEW batch_test_cagg
WITH (timescaledb.continuous) AS
SELECT
    time_bucket('1 month'::interval, time) AS bucket,
    count(*) as count
FROM test_data
GROUP BY bucket
WITH NO DATA;


-- Add a policy to enable batched refresh (batch size is 30 days by default for monthly buckets)
SELECT add_continuous_aggregate_policy('batch_test_cagg',
    start_offset =>null,
    end_offset => INTERVAL '1 month',
    schedule_interval => INTERVAL '1 hour',
    buckets_per_batch => 1

) AS job_id \gset

-- Run the policy job - this uses batched processing, 1 bucket per batch
TRUNCATE bgw_log;
SELECT ts_bgw_params_reset_time(0, true);
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
-- Verify that invalidation log has no entries other than the left and right ends with -/+ infinity
SELECT materialization_id,
       _timescaledb_functions.to_timestamp(lowest_modified_value) as low,
       _timescaledb_functions.to_timestamp(greatest_modified_value) as high
FROM _timescaledb_catalog.continuous_aggs_materialization_invalidation_log
WHERE materialization_id IN
      (SELECT mat_hypertable_id FROM _timescaledb_catalog.continuous_agg
       WHERE user_view_name = 'batch_test_cagg')
  AND lowest_modified_value != -9223372036854775808 --  -infinity
  AND greatest_modified_value != 9223372036854775807 -- +infinity
ORDER BY low;

--verify that there is no duplicate/overlapping refreshes.
--Note that batch 1 and batch 12 contains 2 buckets instead of 1 bucket as set in the policy.
--This is due to the fact that we currently cut a batch of 30 days for monthly cagg,
--so first batch and batch containing February can have 2 buckets. After we have a cleaner solution to
--cut an exact batch size for variable-length buckets, this should be fixed.

SELECT * FROM sorted_bgw_log;

--now run the refresh again, should not do anything
TRUNCATE bgw_log;
SELECT ts_bgw_params_reset_time(extract(epoch from interval '1 hour')::bigint * 1000000, true);
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
SELECT * FROM sorted_bgw_log;

--clean up
DROP TABLE test_data CASCADE;

\c :TEST_DBNAME :ROLE_SUPERUSER
REASSIGN OWNED BY test_cagg_refresh_policy_user TO :ROLE_SUPERUSER;
REVOKE ALL ON SCHEMA public FROM test_cagg_refresh_policy_user;
DROP ROLE test_cagg_refresh_policy_user;