From 85ba069bb67bd9e397c528aabb7199914a974690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Tue, 31 Mar 2026 22:22:29 +0100 Subject: [PATCH 1/6] feat: repository analytics & repo populated & repo health score & health score refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- .../project_insights_copy_ds.datasource | 8 +- .../repo_health_score_copy_ds.datasource | 71 ++++++++++ .../repositories_populated_ds.datasource | 27 ++++ .../health_score_active_contributors.incl | 16 +++ .../includes/health_score_active_days.incl | 16 +++ ...core_contributions_outside_work_hours.incl | 16 +++ .../health_score_contributor_dependency.incl | 53 +++++++ .../tinybird/includes/health_score_forks.incl | 16 +++ .../health_score_issues_resolution.incl | 16 +++ .../health_score_merge_lead_time.incl | 16 +++ .../health_score_organization_dependency.incl | 53 +++++++ .../includes/health_score_pull_requests.incl | 16 +++ .../includes/health_score_retention.incl | 34 +++++ .../tinybird/includes/health_score_stars.incl | 16 +++ .../health_score_active_contributors.pipe | 22 +-- .../pipes/health_score_active_days.pipe | 22 +-- ...core_contributions_outside_work_hours.pipe | 22 +-- .../health_score_contributor_dependency.pipe | 59 +------- .../tinybird/pipes/health_score_forks.pipe | 22 +-- .../pipes/health_score_issues_resolution.pipe | 22 +-- .../pipes/health_score_merge_lead_time.pipe | 22 +-- .../health_score_organization_dependency.pipe | 58 +------- .../pipes/health_score_pull_requests.pipe | 22 +-- .../pipes/health_score_retention.pipe | 47 +------ .../tinybird/pipes/health_score_stars.pipe | 22 +-- .../libs/tinybird/pipes/project_insights.pipe | 2 +- .../tinybird/pipes/project_insights_copy.pipe | 107 +++++++++++++- .../tinybird/pipes/project_repo_insights.pipe | 90 ++++++++++++ ...repo_health_score_active_contributors.pipe | 43 ++++++ .../pipes/repo_health_score_active_days.pipe | 39 ++++++ ...core_contributions_outside_work_hours.pipe | 50 +++++++ ...o_health_score_contributor_dependency.pipe | 45 ++++++ .../pipes/repo_health_score_copy.pipe | 131 ++++++++++++++++++ .../pipes/repo_health_score_forks.pipe | 37 +++++ .../repo_health_score_issues_resolution.pipe | 42 ++++++ .../repo_health_score_merge_lead_time.pipe | 40 ++++++ ..._health_score_organization_dependency.pipe | 43 ++++++ .../repo_health_score_pull_requests.pipe | 41 ++++++ .../pipes/repo_health_score_retention.pipe | 91 ++++++++++++ .../pipes/repo_health_score_security.pipe | 48 +++++++ .../pipes/repo_health_score_stars.pipe | 37 +++++ .../pipes/repositories_populated_copy.pipe | 75 ++++++++++ 42 files changed, 1342 insertions(+), 333 deletions(-) create mode 100644 services/libs/tinybird/datasources/repo_health_score_copy_ds.datasource create mode 100644 services/libs/tinybird/datasources/repositories_populated_ds.datasource create mode 100644 services/libs/tinybird/includes/health_score_active_contributors.incl create mode 100644 services/libs/tinybird/includes/health_score_active_days.incl create mode 100644 services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl create mode 100644 services/libs/tinybird/includes/health_score_contributor_dependency.incl create mode 100644 services/libs/tinybird/includes/health_score_forks.incl create mode 100644 services/libs/tinybird/includes/health_score_issues_resolution.incl create mode 100644 services/libs/tinybird/includes/health_score_merge_lead_time.incl create mode 100644 services/libs/tinybird/includes/health_score_organization_dependency.incl create mode 100644 services/libs/tinybird/includes/health_score_pull_requests.incl create mode 100644 services/libs/tinybird/includes/health_score_retention.incl create mode 100644 services/libs/tinybird/includes/health_score_stars.incl create mode 100644 services/libs/tinybird/pipes/project_repo_insights.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_active_days.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_copy.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_forks.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_retention.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_security.pipe create mode 100644 services/libs/tinybird/pipes/repo_health_score_stars.pipe create mode 100644 services/libs/tinybird/pipes/repositories_populated_copy.pipe diff --git a/services/libs/tinybird/datasources/project_insights_copy_ds.datasource b/services/libs/tinybird/datasources/project_insights_copy_ds.datasource index 0a48399a57..bcc867c41c 100644 --- a/services/libs/tinybird/datasources/project_insights_copy_ds.datasource +++ b/services/libs/tinybird/datasources/project_insights_copy_ds.datasource @@ -2,7 +2,9 @@ DESCRIPTION > - `project_insights_copy_ds` contains materialized project insights data. - Populated by `project_insights_copy.pipe` copy pipe. - Includes project metadata, health score, first commit, and activity metrics for last 365 days and previous 365 days. - - `id` column is the primary key identifier for the project. + - `id` column is the primary key identifier for the project or repository. + - `type` column indicates the record type: 'project' for project insights or 'repo' for repository insights. + - `repoUrl` column is the full repository URL for repo type records (empty string for project type). - `name` column is the human-readable project name. - `slug` column is the URL-friendly identifier used in routing and filtering. - `logoUrl` column is the URL to the project's logo image. @@ -35,6 +37,8 @@ TAGS "Project insights", "Metrics" SCHEMA > `id` String, + `type` String, + `repoUrl` String, `name` String, `slug` String, `logoUrl` String, @@ -64,4 +68,4 @@ SCHEMA > `activeOrganizationsPrevious365Days` UInt64 ENGINE MergeTree -ENGINE_SORTING_KEY id +ENGINE_SORTING_KEY type, id diff --git a/services/libs/tinybird/datasources/repo_health_score_copy_ds.datasource b/services/libs/tinybird/datasources/repo_health_score_copy_ds.datasource new file mode 100644 index 0000000000..168ed13032 --- /dev/null +++ b/services/libs/tinybird/datasources/repo_health_score_copy_ds.datasource @@ -0,0 +1,71 @@ +DESCRIPTION > + - `repo_health_score_copy_ds` contains comprehensive health score metrics and benchmarks per repository. + - Created via copy pipe with computed health metrics for repository-level analytics. + - Aggregates multiple health dimensions including contributors, popularity, development activity, and security. + - `channel` is the repository URL used as the primary key. + - `activeContributors` is the unique contributor count for the previous quarter. + - `activeContributorsBenchmark` is the benchmark score (0-5) for active contributors. + - `contributorDependencyCount` measures contributor concentration risk (bus factor). + - `contributorDependencyPercentage` is the combined contribution percentage of dependent contributors. + - `contributorDependencyBenchmark` is the benchmark score (0-5) for contributor dependency. + - `organizationDependencyCount` measures organizational concentration risk. + - `organizationDependencyPercentage` is the combined contribution percentage of dependent organizations. + - `organizationDependencyBenchmark` is the benchmark score (0-5) for organization dependency. + - `retentionRate` is the quarter-over-quarter contributor retention percentage. + - `retentionBenchmark` is the benchmark score (0-5) for retention. + - `stars` is the total star count for the repository. + - `starsBenchmark` is the benchmark score (0-5) for stars. + - `forks` is the total fork count for the repository. + - `forksBenchmark` is the benchmark score (0-5) for forks. + - `issueResolution` is the average days to close issues (nullable for repos without issues). + - `issueResolutionBenchmark` is the benchmark score (0-5) for issue resolution. + - `pullRequests` is the PR count in the last 365 days. + - `pullRequestsBenchmark` is the benchmark score (0-5) for pull requests. + - `mergeLeadTime` is the average days to merge PRs (nullable for repos without PRs). + - `mergeLeadTimeBenchmark` is the benchmark score (0-5) for merge lead time. + - `activeDaysCount` is the count of distinct active days in the last 365 days. + - `activeDaysBenchmark` is the benchmark score (0-5) for active days. + - `contributionsOutsideWorkHours` is the percentage of contributions outside work hours. + - `contributionsOutsideWorkHoursBenchmark` is the benchmark score (0-5) for outside work hours. + - `securityPercentage` is the health score percentage for the security category (0-100). + - `contributorPercentage` is the health score percentage for the contributors category (0-100). + - `popularityPercentage` is the health score percentage for the popularity category (0-100). + - `developmentPercentage` is the health score percentage for the development category (0-100). + - `overallScore` is the computed overall health score combining all dimensions. + +TAGS "Repository health", "Metrics" + +SCHEMA > + `channel` String, + `activeContributors` UInt64, + `activeContributorsBenchmark` UInt64, + `contributorDependencyCount` UInt64, + `contributorDependencyPercentage` Float64, + `contributorDependencyBenchmark` UInt64, + `organizationDependencyCount` UInt64, + `organizationDependencyPercentage` Float64, + `organizationDependencyBenchmark` UInt64, + `retentionRate` Float64, + `retentionBenchmark` UInt64, + `stars` UInt64, + `starsBenchmark` UInt64, + `forks` UInt64, + `forksBenchmark` UInt64, + `issueResolution` Nullable(Float64), + `issueResolutionBenchmark` UInt64, + `pullRequests` UInt64, + `pullRequestsBenchmark` UInt64, + `mergeLeadTime` Nullable(Float64), + `mergeLeadTimeBenchmark` UInt64, + `activeDaysCount` UInt64, + `activeDaysBenchmark` UInt64, + `contributionsOutsideWorkHours` Float64, + `contributionsOutsideWorkHoursBenchmark` UInt64, + `securityPercentage` Float64, + `contributorPercentage` Float64, + `popularityPercentage` Float64, + `developmentPercentage` Float64, + `overallScore` Float64 + +ENGINE MergeTree +ENGINE_SORTING_KEY channel diff --git a/services/libs/tinybird/datasources/repositories_populated_ds.datasource b/services/libs/tinybird/datasources/repositories_populated_ds.datasource new file mode 100644 index 0000000000..7efd132d0b --- /dev/null +++ b/services/libs/tinybird/datasources/repositories_populated_ds.datasource @@ -0,0 +1,27 @@ +DESCRIPTION > + - `repositories_populated_ds` contains enriched repository data with computed metrics. + - Populated by `repositories_populated_copy.pipe` copy pipe. + - Extends base repository data with contributor counts, software valuation, and first commit timestamp. + - `id` is the primary key identifier for the repository record. + - `url` is the full repository URL. + - `segmentId` links to the segment this repository belongs to. + - `insightsProjectId` links to the insights project this repository is associated with. + - `contributorCount` is the total number of unique contributors for the repository. + - `organizationCount` is the total number of unique organizations for the repository. + - `softwareValue` is the estimated economic value of the repository software. + - `firstCommit` is the timestamp of the first commit in the repository (nullable). + +TAGS "Repository metadata", "Analytics enrichment" + +SCHEMA > + `id` String, + `url` String, + `segmentId` String, + `insightsProjectId` String, + `contributorCount` UInt64, + `organizationCount` UInt64, + `softwareValue` UInt64, + `firstCommit` Nullable(DateTime64(3)) + +ENGINE MergeTree +ENGINE_SORTING_KEY id, url diff --git a/services/libs/tinybird/includes/health_score_active_contributors.incl b/services/libs/tinybird/includes/health_score_active_contributors.incl new file mode 100644 index 0000000000..7ef04b49b4 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_active_contributors.incl @@ -0,0 +1,16 @@ +NODE health_score_active_contributors_benchmark +SQL > + % + SELECT + $GROUP_COL, + activeContributors, + CASE + WHEN activeContributors BETWEEN 0 AND 1 THEN 0 + WHEN activeContributors BETWEEN 2 AND 3 THEN 1 + WHEN activeContributors BETWEEN 4 AND 6 THEN 2 + WHEN activeContributors BETWEEN 7 AND 10 THEN 3 + WHEN activeContributors BETWEEN 11 AND 20 THEN 4 + WHEN activeContributors > 20 THEN 5 + ELSE 0 + END AS activeContributorsBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_active_days.incl b/services/libs/tinybird/includes/health_score_active_days.incl new file mode 100644 index 0000000000..3e1500dd0a --- /dev/null +++ b/services/libs/tinybird/includes/health_score_active_days.incl @@ -0,0 +1,16 @@ +NODE health_score_active_days_benchmark +SQL > + % + SELECT + $GROUP_COL, + activeDaysCount, + CASE + WHEN activeDaysCount BETWEEN 0 AND 5 THEN 0 + WHEN activeDaysCount BETWEEN 6 AND 10 THEN 1 + WHEN activeDaysCount BETWEEN 11 AND 15 THEN 2 + WHEN activeDaysCount BETWEEN 16 AND 20 THEN 3 + WHEN activeDaysCount BETWEEN 21 AND 26 THEN 4 + WHEN activeDaysCount > 26 THEN 5 + ELSE 0 + END AS activeDaysBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl b/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl new file mode 100644 index 0000000000..c52cee5a41 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl @@ -0,0 +1,16 @@ +NODE health_score_contributions_outside_work_hours_benchmark +SQL > + % + SELECT + $GROUP_COL, + contributionsOutsideWorkHours, + CASE + WHEN contributionsOutsideWorkHours >= 75 THEN 0 + WHEN contributionsOutsideWorkHours BETWEEN 50 AND 74 THEN 1 + WHEN contributionsOutsideWorkHours BETWEEN 40 AND 49 THEN 2 + WHEN contributionsOutsideWorkHours BETWEEN 30 AND 39 THEN 3 + WHEN contributionsOutsideWorkHours BETWEEN 20 AND 29 THEN 4 + WHEN contributionsOutsideWorkHours BETWEEN 0 AND 19 THEN 5 + ELSE 0 + END AS contributionsOutsideWorkHoursBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_contributor_dependency.incl b/services/libs/tinybird/includes/health_score_contributor_dependency.incl new file mode 100644 index 0000000000..67c4a73be9 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_contributor_dependency.incl @@ -0,0 +1,53 @@ +NODE health_score_contributor_dependency_pct +SQL > + % + SELECT + $GROUP_COL, + memberId, + contributionCount, + ROUND(contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY $GROUP_COL), 2) AS contributionPercentage + FROM $SOURCE_NODE + ORDER BY contributionPercentage DESC + +NODE health_score_contributor_dependency_running +SQL > + % + SELECT + $GROUP_COL, + memberId, + contributionPercentage, + SUM(contributionPercentage) OVER ( + PARTITION BY $GROUP_COL ORDER BY contributionPercentage DESC, memberId + ) AS contributionPercentageRunningTotal + FROM health_score_contributor_dependency_pct + +NODE health_score_contributor_dependency_score +SQL > + % + SELECT + $GROUP_COL, + count() AS contributorDependencyCount, + round(sum(contributionPercentage)) AS contributorDependencyPercentage + FROM health_score_contributor_dependency_running + WHERE + contributionPercentageRunningTotal < 51 + OR (contributionPercentageRunningTotal - contributionPercentage < 51) + GROUP BY $GROUP_COL + +NODE health_score_contributor_dependency_benchmark +SQL > + % + SELECT + $GROUP_COL, + contributorDependencyCount, + contributorDependencyPercentage, + CASE + WHEN contributorDependencyCount BETWEEN 0 AND 1 THEN 0 + WHEN contributorDependencyCount = 2 THEN 1 + WHEN contributorDependencyCount BETWEEN 3 AND 4 THEN 2 + WHEN contributorDependencyCount BETWEEN 5 AND 6 THEN 3 + WHEN contributorDependencyCount BETWEEN 7 AND 9 THEN 4 + WHEN contributorDependencyCount > 9 THEN 5 + ELSE 0 + END AS contributorDependencyBenchmark + FROM health_score_contributor_dependency_score diff --git a/services/libs/tinybird/includes/health_score_forks.incl b/services/libs/tinybird/includes/health_score_forks.incl new file mode 100644 index 0000000000..a487d13d35 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_forks.incl @@ -0,0 +1,16 @@ +NODE health_score_forks_benchmark +SQL > + % + SELECT + $GROUP_COL, + forks, + CASE + WHEN forks BETWEEN 0 AND 4 THEN 0 + WHEN forks BETWEEN 5 AND 9 THEN 1 + WHEN forks BETWEEN 10 AND 19 THEN 2 + WHEN forks BETWEEN 20 AND 39 THEN 3 + WHEN forks BETWEEN 40 AND 79 THEN 4 + WHEN forks >= 80 THEN 5 + ELSE 0 + END AS forksBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_issues_resolution.incl b/services/libs/tinybird/includes/health_score_issues_resolution.incl new file mode 100644 index 0000000000..098cfea164 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_issues_resolution.incl @@ -0,0 +1,16 @@ +NODE health_score_issues_resolution_benchmark +SQL > + % + SELECT + $GROUP_COL, + issueResolution, + CASE + WHEN issueResolution >= 61 THEN 0 + WHEN issueResolution BETWEEN 51 AND 60 THEN 1 + WHEN issueResolution BETWEEN 36 AND 50 THEN 2 + WHEN issueResolution BETWEEN 22 AND 35 THEN 3 + WHEN issueResolution BETWEEN 8 AND 21 THEN 4 + WHEN issueResolution BETWEEN 0 AND 7 THEN 5 + ELSE 0 + END AS issueResolutionBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_merge_lead_time.incl b/services/libs/tinybird/includes/health_score_merge_lead_time.incl new file mode 100644 index 0000000000..66ba4eb496 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_merge_lead_time.incl @@ -0,0 +1,16 @@ +NODE health_score_merge_lead_time_benchmark +SQL > + % + SELECT + $GROUP_COL, + mergeLeadTime, + CASE + WHEN mergeLeadTime >= 30 THEN 0 + WHEN mergeLeadTime BETWEEN 21 AND 30 THEN 1 + WHEN mergeLeadTime BETWEEN 15 AND 20 THEN 2 + WHEN mergeLeadTime BETWEEN 7 AND 14 THEN 3 + WHEN mergeLeadTime BETWEEN 3 AND 6 THEN 4 + WHEN mergeLeadTime BETWEEN 0 AND 2 THEN 5 + ELSE 0 + END AS mergeLeadTimeBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_organization_dependency.incl b/services/libs/tinybird/includes/health_score_organization_dependency.incl new file mode 100644 index 0000000000..03c5dc960d --- /dev/null +++ b/services/libs/tinybird/includes/health_score_organization_dependency.incl @@ -0,0 +1,53 @@ +NODE health_score_organization_dependency_pct +SQL > + % + SELECT + $GROUP_COL, + organizationId, + contributionCount, + (contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY $GROUP_COL)) AS contributionPercentage + FROM $SOURCE_NODE + ORDER BY contributionPercentage DESC + +NODE health_score_organization_dependency_running +SQL > + % + SELECT + $GROUP_COL, + organizationId, + contributionPercentage, + SUM(contributionPercentage) OVER ( + PARTITION BY $GROUP_COL ORDER BY contributionPercentage DESC, organizationId + ) AS contributionPercentageRunningTotal + FROM health_score_organization_dependency_pct + +NODE health_score_organization_dependency_score +SQL > + % + SELECT + $GROUP_COL, + count() AS organizationDependencyCount, + round(sum(contributionPercentage)) AS organizationDependencyPercentage + FROM health_score_organization_dependency_running + WHERE + contributionPercentageRunningTotal < 51 + OR (contributionPercentageRunningTotal - contributionPercentage < 51) + GROUP BY $GROUP_COL + +NODE health_score_organization_dependency_benchmark +SQL > + % + SELECT + $GROUP_COL, + organizationDependencyCount, + organizationDependencyPercentage, + CASE + WHEN organizationDependencyCount BETWEEN 0 AND 1 THEN 0 + WHEN organizationDependencyCount = 2 THEN 1 + WHEN organizationDependencyCount = 3 THEN 2 + WHEN organizationDependencyCount BETWEEN 4 AND 5 THEN 3 + WHEN organizationDependencyCount BETWEEN 6 AND 7 THEN 4 + WHEN organizationDependencyCount >= 8 THEN 5 + ELSE 0 + END AS organizationDependencyBenchmark + FROM health_score_organization_dependency_score diff --git a/services/libs/tinybird/includes/health_score_pull_requests.incl b/services/libs/tinybird/includes/health_score_pull_requests.incl new file mode 100644 index 0000000000..70567d3613 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_pull_requests.incl @@ -0,0 +1,16 @@ +NODE health_score_pull_requests_benchmark +SQL > + % + SELECT + $GROUP_COL, + pullRequests, + CASE + WHEN pullRequests BETWEEN 0 AND 1 THEN 0 + WHEN pullRequests BETWEEN 2 AND 3 THEN 1 + WHEN pullRequests BETWEEN 4 AND 7 THEN 2 + WHEN pullRequests BETWEEN 8 AND 15 THEN 3 + WHEN pullRequests BETWEEN 16 AND 30 THEN 4 + WHEN pullRequests >= 31 THEN 5 + ELSE 0 + END AS pullRequestsBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_retention.incl b/services/libs/tinybird/includes/health_score_retention.incl new file mode 100644 index 0000000000..a44f848ad3 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_retention.incl @@ -0,0 +1,34 @@ +NODE health_score_retention_counts +SQL > + % + SELECT + cur.$GROUP_COL AS $GROUP_COL, + if( + length(coalesce(prev.previousQuarterMembers, [])) > 0, + round( + 100 * length(arrayIntersect( + coalesce(cur.currentQuarterMembers, []), + coalesce(prev.previousQuarterMembers, []) + )) / length(coalesce(prev.previousQuarterMembers, [])) + ), + 0 + ) AS retentionRate + FROM $SOURCE_CURRENT AS cur + LEFT JOIN $SOURCE_PREVIOUS AS prev USING ($GROUP_COL) + +NODE health_score_retention_benchmark +SQL > + % + SELECT + $GROUP_COL, + retentionRate, + CASE + WHEN retentionRate BETWEEN 0 AND 2 THEN 0 + WHEN retentionRate BETWEEN 3 AND 5 THEN 1 + WHEN retentionRate BETWEEN 6 AND 9 THEN 2 + WHEN retentionRate BETWEEN 10 AND 14 THEN 3 + WHEN retentionRate BETWEEN 15 AND 19 THEN 4 + WHEN retentionRate >= 20 THEN 5 + ELSE 0 + END AS retentionBenchmark + FROM health_score_retention_counts diff --git a/services/libs/tinybird/includes/health_score_stars.incl b/services/libs/tinybird/includes/health_score_stars.incl new file mode 100644 index 0000000000..1a87169ae0 --- /dev/null +++ b/services/libs/tinybird/includes/health_score_stars.incl @@ -0,0 +1,16 @@ +NODE health_score_stars_benchmark +SQL > + % + SELECT + $GROUP_COL, + stars, + CASE + WHEN stars BETWEEN 0 AND 9 THEN 0 + WHEN stars BETWEEN 10 AND 49 THEN 1 + WHEN stars BETWEEN 50 AND 199 THEN 2 + WHEN stars BETWEEN 200 AND 499 THEN 3 + WHEN stars BETWEEN 500 AND 999 THEN 4 + WHEN stars >= 1000 THEN 5 + ELSE 0 + END AS starsBenchmark + FROM $SOURCE_NODE diff --git a/services/libs/tinybird/pipes/health_score_active_contributors.pipe b/services/libs/tinybird/pipes/health_score_active_contributors.pipe index f7482f8751..75e2e9ec79 100644 --- a/services/libs/tinybird/pipes/health_score_active_contributors.pipe +++ b/services/libs/tinybird/pipes/health_score_active_contributors.pipe @@ -34,24 +34,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_active_contributors_with_benchmark -SQL > - SELECT - segmentId, - activeContributors, - CASE - WHEN activeContributors BETWEEN 0 AND 1 - THEN 0 - WHEN activeContributors BETWEEN 2 AND 3 - THEN 1 - WHEN activeContributors BETWEEN 4 AND 6 - THEN 2 - WHEN activeContributors BETWEEN 7 AND 10 - THEN 3 - WHEN activeContributors BETWEEN 11 AND 20 - THEN 4 - WHEN activeContributors > 20 - THEN 5 - ELSE 0 - END AS activeContributorsBenchmark - FROM health_score_active_contributors_score +INCLUDE "../includes/health_score_active_contributors.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_active_contributors_score" diff --git a/services/libs/tinybird/pipes/health_score_active_days.pipe b/services/libs/tinybird/pipes/health_score_active_days.pipe index 60edc4833a..bb3ef13fc1 100644 --- a/services/libs/tinybird/pipes/health_score_active_days.pipe +++ b/services/libs/tinybird/pipes/health_score_active_days.pipe @@ -27,24 +27,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_active_days_with_benchmark -SQL > - SELECT - segmentId, - activeDaysCount, - CASE - WHEN activeDaysCount BETWEEN 0 AND 5 - THEN 0 - WHEN activeDaysCount BETWEEN 6 AND 10 - THEN 1 - WHEN activeDaysCount BETWEEN 11 AND 15 - THEN 2 - WHEN activeDaysCount BETWEEN 16 AND 20 - THEN 3 - WHEN activeDaysCount BETWEEN 21 AND 26 - THEN 4 - WHEN activeDaysCount > 26 - THEN 5 - ELSE 0 - END AS activeDaysBenchmark - FROM health_score_active_days_score +INCLUDE "../includes/health_score_active_days.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_active_days_score" diff --git a/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe b/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe index e4384cfb2a..71169a46eb 100644 --- a/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe +++ b/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe @@ -29,24 +29,4 @@ SQL > {% end %} GROUP BY segmentId -NODE health_score_contributions_outside_work_hours_with_benchmark -SQL > - SELECT - segmentId, - contributionsOutsideWorkHours, - CASE - WHEN contributionsOutsideWorkHours >= 75 - THEN 0 - WHEN contributionsOutsideWorkHours BETWEEN 50 AND 74 - THEN 1 - WHEN contributionsOutsideWorkHours BETWEEN 40 AND 49 - THEN 2 - WHEN contributionsOutsideWorkHours BETWEEN 30 AND 39 - THEN 3 - WHEN contributionsOutsideWorkHours BETWEEN 20 AND 29 - THEN 4 - WHEN contributionsOutsideWorkHours BETWEEN 0 AND 19 - THEN 5 - ELSE 0 - END AS contributionsOutsideWorkHoursBenchmark - FROM health_score_contributions_outside_work_hours_score +INCLUDE "../includes/health_score_contributions_outside_work_hours.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_contributions_outside_work_hours_score" diff --git a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe index 605802aba3..76a47a7e94 100644 --- a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe @@ -33,61 +33,4 @@ SQL > ORDER by contributionCount DESC {% end %} -NODE health_score_contributor_dependency_contribution_percentage -SQL > - SELECT - segmentId, - memberId, - contributionCount, - ROUND( - contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY segmentId), 2 - ) AS contributionPercentage - FROM health_score_contributor_dependency_contribution_count - ORDER BY contributionPercentage DESC - -NODE health_score_contributor_dependency_contribution_runnning_total -SQL > - SELECT - segmentId, - memberId, - contributionCount, - contributionPercentage, - SUM(contributionPercentage) OVER ( - PARTITION BY segmentId ORDER BY contributionPercentage DESC, memberId - ) AS contributionPercentageRunningTotal - FROM health_score_contributor_dependency_contribution_percentage - -NODE health_score_contributor_dependency_score -SQL > - SELECT - segmentId, - count() AS contributorDependencyCount, - round(sum(contributionPercentage)) AS contributorDependencyPercentage - FROM health_score_contributor_dependency_contribution_runnning_total - WHERE - contributionPercentageRunningTotal < 51 - OR (contributionPercentageRunningTotal - contributionPercentage < 51) - GROUP BY segmentId - -NODE health_score_contributor_dependency_with_benchmark -SQL > - SELECT - segmentId, - contributorDependencyCount, - contributorDependencyPercentage, - CASE - WHEN contributorDependencyCount BETWEEN 0 AND 1 - THEN 0 - WHEN contributorDependencyCount = 2 - THEN 1 - WHEN contributorDependencyCount BETWEEN 3 AND 4 - THEN 2 - WHEN contributorDependencyCount BETWEEN 5 AND 6 - THEN 3 - WHEN contributorDependencyCount BETWEEN 7 AND 9 - THEN 4 - WHEN contributorDependencyCount > 9 - THEN 5 - ELSE 0 - END AS contributorDependencyBenchmark - FROM health_score_contributor_dependency_score +INCLUDE "../includes/health_score_contributor_dependency.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_contributor_dependency_contribution_count" diff --git a/services/libs/tinybird/pipes/health_score_forks.pipe b/services/libs/tinybird/pipes/health_score_forks.pipe index c36cc77817..244a82b995 100644 --- a/services/libs/tinybird/pipes/health_score_forks.pipe +++ b/services/libs/tinybird/pipes/health_score_forks.pipe @@ -28,24 +28,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_forks_with_benchmark -SQL > - SELECT - segmentId, - forks, - CASE - WHEN forks BETWEEN 0 AND 4 - THEN 0 - WHEN forks BETWEEN 5 AND 9 - THEN 1 - WHEN forks BETWEEN 10 AND 19 - THEN 2 - WHEN forks BETWEEN 20 AND 39 - THEN 3 - WHEN forks BETWEEN 40 AND 79 - THEN 4 - WHEN forks >= 80 - THEN 5 - ELSE 0 - END AS forksBenchmark - FROM health_score_forks_score +INCLUDE "../includes/health_score_forks.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_forks_score" diff --git a/services/libs/tinybird/pipes/health_score_issues_resolution.pipe b/services/libs/tinybird/pipes/health_score_issues_resolution.pipe index 625fdf386d..cf7cc77511 100644 --- a/services/libs/tinybird/pipes/health_score_issues_resolution.pipe +++ b/services/libs/tinybird/pipes/health_score_issues_resolution.pipe @@ -38,24 +38,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_issues_resolution_with_benchmark -SQL > - SELECT - segmentId, - issueResolution, - CASE - WHEN issueResolution >= 61 - THEN 0 - WHEN issueResolution BETWEEN 51 AND 60 - THEN 1 - WHEN issueResolution BETWEEN 36 AND 50 - THEN 2 - WHEN issueResolution BETWEEN 22 AND 35 - THEN 3 - WHEN issueResolution BETWEEN 8 AND 21 - THEN 4 - WHEN issueResolution BETWEEN 0 AND 7 - THEN 5 - ELSE 0 - END AS issueResolutionBenchmark - FROM health_score_issues_resolution_score +INCLUDE "../includes/health_score_issues_resolution.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_issues_resolution_score" diff --git a/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe b/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe index 9b1e4eb7b7..037560cf4c 100644 --- a/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe +++ b/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe @@ -26,24 +26,4 @@ SQL > {% end %} GROUP BY segmentId -NODE health_score_merge_lead_time_with_benchmark -SQL > - SELECT - segmentId, - mergeLeadTime, - CASE - WHEN mergeLeadTime >= 30 - THEN 0 - WHEN mergeLeadTime BETWEEN 21 AND 30 - THEN 1 - WHEN mergeLeadTime BETWEEN 15 AND 20 - THEN 2 - WHEN mergeLeadTime BETWEEN 7 AND 14 - THEN 3 - WHEN mergeLeadTime BETWEEN 3 AND 6 - THEN 4 - WHEN mergeLeadTime BETWEEN 0 AND 2 - THEN 5 - ELSE 0 - END AS mergeLeadTimeBenchmark - FROM health_score_merge_lead_time_score +INCLUDE "../includes/health_score_merge_lead_time.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_merge_lead_time_score" diff --git a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe index 4400d0f697..6a44308b77 100644 --- a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe @@ -31,60 +31,4 @@ SQL > GROUP BY segmentId, organizationId {% end %} -NODE health_score_organization_dependency_contribution_percentage -SQL > - SELECT - segmentId, - organizationId, - contributionCount, - ( - contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY segmentId) - ) AS contributionPercentage - FROM health_score_organization_dependency_contribution_count - -NODE health_score_organization_dependency_contribution_runnning_total -SQL > - SELECT - segmentId, - organizationId, - contributionCount, - contributionPercentage, - SUM(contributionPercentage) OVER ( - PARTITION BY segmentId ORDER BY contributionPercentage DESC, organizationId - ) AS contributionPercentageRunningTotal - FROM health_score_organization_dependency_contribution_percentage - -NODE health_score_organization_dependency_score -SQL > - SELECT - segmentId, - count() AS organizationDependencyCount, - round(sum(contributionPercentage)) AS organizationDependencyPercentage - FROM health_score_organization_dependency_contribution_runnning_total - WHERE - contributionPercentageRunningTotal < 51 - OR (contributionPercentageRunningTotal - contributionPercentage < 51) - GROUP BY segmentId - -NODE health_score_organization_dependency_with_benchmark -SQL > - SELECT - segmentId, - organizationDependencyCount, - organizationDependencyPercentage, - CASE - WHEN organizationDependencyCount BETWEEN 0 AND 1 - THEN 0 - WHEN organizationDependencyCount = 2 - THEN 1 - WHEN organizationDependencyCount = 3 - THEN 2 - WHEN organizationDependencyCount BETWEEN 4 AND 5 - THEN 3 - WHEN organizationDependencyCount BETWEEN 6 AND 7 - THEN 4 - WHEN organizationDependencyCount >= 8 - THEN 5 - ELSE 0 - END AS organizationDependencyBenchmark - FROM health_score_organization_dependency_score +INCLUDE "../includes/health_score_organization_dependency.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_organization_dependency_contribution_count" diff --git a/services/libs/tinybird/pipes/health_score_pull_requests.pipe b/services/libs/tinybird/pipes/health_score_pull_requests.pipe index 4fc5c7ca80..2045003a04 100644 --- a/services/libs/tinybird/pipes/health_score_pull_requests.pipe +++ b/services/libs/tinybird/pipes/health_score_pull_requests.pipe @@ -40,24 +40,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_pull_requests_with_benchmark -SQL > - SELECT - segmentId, - pullRequests, - CASE - WHEN pullRequests BETWEEN 0 AND 1 - THEN 0 - WHEN pullRequests BETWEEN 2 AND 3 - THEN 1 - WHEN pullRequests BETWEEN 4 AND 7 - THEN 2 - WHEN pullRequests BETWEEN 8 AND 15 - THEN 3 - WHEN pullRequests BETWEEN 16 AND 30 - THEN 4 - WHEN pullRequests >= 31 - THEN 5 - ELSE 0 - END AS pullRequestsBenchmark - FROM health_score_pull_requests_score +INCLUDE "../includes/health_score_pull_requests.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_pull_requests_score" diff --git a/services/libs/tinybird/pipes/health_score_retention.pipe b/services/libs/tinybird/pipes/health_score_retention.pipe index 48b0961fc0..b4999e2225 100644 --- a/services/libs/tinybird/pipes/health_score_retention.pipe +++ b/services/libs/tinybird/pipes/health_score_retention.pipe @@ -109,49 +109,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_retention_counts -SQL > - SELECT - health_score_retention_current_quarter.segmentId as segmentId, - length( - arrayIntersect( - coalesce(health_score_retention_current_quarter.currentQuarterMembers, []), - coalesce(health_score_retention_previous_quarter.previousQuarterMembers, []) - ) - ) AS retained_members, - length( - coalesce(health_score_retention_previous_quarter.previousQuarterMembers, []) - ) AS previous_period_total - FROM health_score_retention_current_quarter - LEFT JOIN health_score_retention_previous_quarter USING (segmentId) - -NODE health_score_retention_score -SQL > - SELECT - segmentId, - if( - previous_period_total > 0, round(100 * retained_members / previous_period_total), 0 - ) AS "retentionRate" - FROM health_score_retention_counts - -NODE health_score_retention_with_benchmark -SQL > - SELECT - segmentId, - retentionRate, - CASE - WHEN retentionRate BETWEEN 0 AND 2 - THEN 0 - WHEN retentionRate BETWEEN 3 AND 5 - THEN 1 - WHEN retentionRate BETWEEN 6 AND 9 - THEN 2 - WHEN retentionRate BETWEEN 10 AND 14 - THEN 3 - WHEN retentionRate BETWEEN 15 AND 19 - THEN 4 - WHEN retentionRate >= 20 - THEN 5 - ELSE 0 - END AS retentionBenchmark - FROM health_score_retention_score +INCLUDE "../includes/health_score_retention.incl" "GROUP_COL=segmentId" "SOURCE_CURRENT=health_score_retention_current_quarter" "SOURCE_PREVIOUS=health_score_retention_previous_quarter" diff --git a/services/libs/tinybird/pipes/health_score_stars.pipe b/services/libs/tinybird/pipes/health_score_stars.pipe index 731c463ac2..24396983bf 100644 --- a/services/libs/tinybird/pipes/health_score_stars.pipe +++ b/services/libs/tinybird/pipes/health_score_stars.pipe @@ -28,24 +28,4 @@ SQL > GROUP BY segmentId {% end %} -NODE health_score_stars_with_benchmark -SQL > - SELECT - segmentId, - stars, - CASE - WHEN stars BETWEEN 0 AND 9 - THEN 0 - WHEN stars BETWEEN 10 AND 49 - THEN 1 - WHEN stars BETWEEN 50 AND 199 - THEN 2 - WHEN stars BETWEEN 200 AND 499 - THEN 3 - WHEN stars BETWEEN 500 AND 999 - THEN 4 - WHEN stars >= 1000 - THEN 5 - ELSE 0 - END AS starsBenchmark - FROM health_score_stars_score +INCLUDE "../includes/health_score_stars.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_stars_score" diff --git a/services/libs/tinybird/pipes/project_insights.pipe b/services/libs/tinybird/pipes/project_insights.pipe index 992358a63c..5acfa51530 100644 --- a/services/libs/tinybird/pipes/project_insights.pipe +++ b/services/libs/tinybird/pipes/project_insights.pipe @@ -49,7 +49,7 @@ SQL > activeOrganizationsPrevious365Days FROM project_insights_copy_ds WHERE - 1 = 1 + type = 'project' {% if defined(slug) %} AND slug = {{ String(slug, description="Project slug", required=False) }} {% end %} diff --git a/services/libs/tinybird/pipes/project_insights_copy.pipe b/services/libs/tinybird/pipes/project_insights_copy.pipe index 2593d6782b..92ab0891c6 100644 --- a/services/libs/tinybird/pipes/project_insights_copy.pipe +++ b/services/libs/tinybird/pipes/project_insights_copy.pipe @@ -92,13 +92,15 @@ SQL > WHERE timestamp < now() - INTERVAL 365 DAY GROUP BY segmentId -NODE project_insights_copy_results +NODE project_insights_copy_project_results DESCRIPTION > Join all project insights together SQL > SELECT base.id AS id, + 'project' AS type, + '' AS repoUrl, base.name AS name, base.slug AS slug, base.logoUrl AS logoUrl, @@ -132,6 +134,109 @@ SQL > LEFT JOIN project_insights_copy_last_365_days_metrics AS l365 USING (segmentId) LEFT JOIN project_insights_copy_previous_365_days_metrics AS p365 USING (segmentId) +NODE project_insights_copy_repo_base +DESCRIPTION > + Returns base repository information with populated metrics from repositories_populated_ds + +SQL > + SELECT + rp.id AS id, + rp.url AS repoUrl, + rp.url AS name, + '' AS slug, + '' AS logoUrl, + toUInt8(0) AS isLF, + if(r.archived = true, 'archived', 'active') AS status, + rp.contributorCount AS contributorCount, + rp.organizationCount AS organizationCount, + rp.softwareValue AS softwareValue, + rp.firstCommit AS firstCommit + FROM repositories_populated_ds AS rp + JOIN repositories r FINAL ON r.id = rp.id + +NODE project_insights_copy_repo_last_365_days_metrics +DESCRIPTION > + Calculate repository-level metrics for last 365 days grouped by channel (repo URL) + +SQL > + SELECT + channel, + countIf(type = 'star') AS starsLast365Days, + countIf(type = 'fork') AS forksLast365Days, + uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsLast365Days, + uniq( + CASE WHEN organizationId != '' THEN organizationId ELSE NULL END + ) AS activeOrganizationsLast365Days + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE timestamp <= now() + GROUP BY channel + +NODE project_insights_copy_repo_previous_365_days_metrics +DESCRIPTION > + Calculate repository-level metrics for previous 365 days grouped by channel (repo URL) + +SQL > + SELECT + channel, + countIf(type = 'star') AS starsPrevious365Days, + countIf(type = 'fork') AS forksPrevious365Days, + uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsPrevious365Days, + uniq( + CASE WHEN organizationId != '' THEN organizationId ELSE NULL END + ) AS activeOrganizationsPrevious365Days + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE timestamp < now() - INTERVAL 365 DAY + GROUP BY channel + +NODE project_insights_copy_repo_results +DESCRIPTION > + Join all repository insights together + +SQL > + SELECT + base.id AS id, + 'repo' AS type, + base.repoUrl AS repoUrl, + base.name AS name, + base.slug AS slug, + base.logoUrl AS logoUrl, + base.isLF AS isLF, + base.status AS status, + base.contributorCount AS contributorCount, + base.organizationCount AS organizationCount, + base.softwareValue AS softwareValue, + toUInt64(0) AS contributorDependencyCount, + toFloat64(0) AS contributorDependencyPercentage, + toUInt64(0) AS organizationDependencyCount, + toFloat64(0) AS organizationDependencyPercentage, + CAST([] AS Array(Tuple(String, UInt64, UInt64))) AS achievements, + CAST(NULL AS Nullable(Float64)) AS healthScore, + CAST(NULL AS Nullable(Float64)) AS contributorHealthScore, + CAST(NULL AS Nullable(Float64)) AS popularityHealthScore, + CAST(NULL AS Nullable(Float64)) AS developmentHealthScore, + CAST(NULL AS Nullable(Float64)) AS securityHealthScore, + base.firstCommit AS firstCommit, + COALESCE(l365.starsLast365Days, 0) AS starsLast365Days, + COALESCE(l365.forksLast365Days, 0) AS forksLast365Days, + COALESCE(l365.activeContributorsLast365Days, 0) AS activeContributorsLast365Days, + COALESCE(l365.activeOrganizationsLast365Days, 0) AS activeOrganizationsLast365Days, + COALESCE(p365.starsPrevious365Days, 0) AS starsPrevious365Days, + COALESCE(p365.forksPrevious365Days, 0) AS forksPrevious365Days, + COALESCE(p365.activeContributorsPrevious365Days, 0) AS activeContributorsPrevious365Days, + COALESCE(p365.activeOrganizationsPrevious365Days, 0) AS activeOrganizationsPrevious365Days + FROM project_insights_copy_repo_base AS base + LEFT JOIN project_insights_copy_repo_last_365_days_metrics AS l365 ON base.repoUrl = l365.channel + LEFT JOIN project_insights_copy_repo_previous_365_days_metrics AS p365 ON base.repoUrl = p365.channel + +NODE project_insights_copy_results +DESCRIPTION > + Union of project and repository insights + +SQL > + SELECT * FROM project_insights_copy_project_results + UNION ALL + SELECT * FROM project_insights_copy_repo_results + TYPE COPY TARGET_DATASOURCE project_insights_copy_ds COPY_MODE replace diff --git a/services/libs/tinybird/pipes/project_repo_insights.pipe b/services/libs/tinybird/pipes/project_repo_insights.pipe new file mode 100644 index 0000000000..3e7f5dd93c --- /dev/null +++ b/services/libs/tinybird/pipes/project_repo_insights.pipe @@ -0,0 +1,90 @@ +DESCRIPTION > + - `project_repo_insights.pipe` serves combined project and repository insights data. + - Returns the same metrics as `project_insights.pipe` but includes both project and repository records. + - Filtering by project IDs and repository URLs uses an OR condition to return both types. + - Parameters: + - `ids`: Optional array of project IDs to filter by + - `repoUrls`: Optional array of repository URLs to filter by + - `isLfx`: Optional integer (1 = LFX, 0 = non-LFX) to filter by LFX project status + - `orderByField`: Optional string specifying sort field, defaults to 'name' + - `orderByDirection`: Optional string ('asc' or 'desc'), defaults to 'asc' + - `pageSize`: Optional integer for result limit, defaults to 10 + - `page`: Optional integer for pagination offset calculation, defaults to 0 + - Response: Project and repository records with insights metrics + +TAGS "Insights, Widget", "Project", "Repository" + +NODE project_repo_insights_endpoint +SQL > + % + SELECT + id, + type, + repoUrl, + name, + slug, + logoUrl, + isLF, + status, + contributorCount, + organizationCount, + softwareValue, + contributorDependencyCount, + contributorDependencyPercentage, + organizationDependencyCount, + organizationDependencyPercentage, + achievements, + healthScore, + contributorHealthScore, + popularityHealthScore, + developmentHealthScore, + securityHealthScore, + firstCommit, + starsLast365Days, + forksLast365Days, + activeContributorsLast365Days, + activeOrganizationsLast365Days, + starsPrevious365Days, + forksPrevious365Days, + activeContributorsPrevious365Days, + activeOrganizationsPrevious365Days + FROM project_insights_copy_ds + WHERE + 1 = 1 + {% if defined(ids) or defined(repoUrls) %} + AND ( + 1 = 0 + {% if defined(ids) %} + OR (type = 'project' AND id IN {{ Array(ids, 'String', description="Filter by project id list", required=False) }}) + {% end %} + {% if defined(repoUrls) %} + OR (type = 'repo' AND repoUrl IN {{ Array(repoUrls, 'String', description="Filter by repository URL list", required=False) }}) + {% end %} + ) + {% end %} + {% if defined(isLfx) %} + AND isLF + = {{ + UInt8( + isLfx, description="Filter by LFX project (1 = LFX, 0 = non-LFX)", required=False + ) + }} + {% end %} + ORDER BY + {{ column(String(orderByField, "name", description="Order by field.", required=False)) }} + {% if String( + orderByDirection, + 'asc', + description="Order by direction. ASC or DESC", + required=False, + ) == 'asc' or String( + orderByDirection, + 'asc', + description="Order by direction. ASC or DESC", + required=False, + ) == 'ASC' %} ASC + {% else %} DESC + {% end %}, + name ASC + LIMIT {{ Int32(pageSize, 10) }} + OFFSET {{ Int32(page, 0) * Int32(pageSize, 10) }} diff --git a/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe b/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe new file mode 100644 index 0000000000..8a3d97594e --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe @@ -0,0 +1,43 @@ +DESCRIPTION > + - Calculates repository-level active contributors health score. + - Returns unique active contributor count and benchmark (0-5) per repository for the previous quarter. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter for contributions after timestamp + - `endDate`: Optional DateTime filter for contributions before timestamp + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_active_contributors_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, COALESCE(uniq(memberId), 0) AS activeContributors + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, COALESCE(uniq(memberId), 0) AS activeContributors + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND timestamp >= toStartOfQuarter(now() - toIntervalQuarter(1)) + AND timestamp < toStartOfQuarter(now()) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_active_contributors.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_active_contributors_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_active_days.pipe b/services/libs/tinybird/pipes/repo_health_score_active_days.pipe new file mode 100644 index 0000000000..998855dfc0 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_active_days.pipe @@ -0,0 +1,39 @@ +DESCRIPTION > + - Calculates repository-level active days health score. + - Returns count of distinct active days and benchmark (0-5) per repository for the last 365 days. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_active_days_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, countDistinct(DATE(timestamp)) AS activeDaysCount + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, countDistinct(DATE(timestamp)) AS activeDaysCount + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + timestamp >= toStartOfDay(now() - toIntervalDay(365)) + AND timestamp < toStartOfDay(now()) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_active_days.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_active_days_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe b/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe new file mode 100644 index 0000000000..0f9fe47017 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe @@ -0,0 +1,50 @@ +DESCRIPTION > + - Calculates repository-level contributions outside work hours health score. + - Returns percentage of contributions outside work hours and benchmark (0-5) per repository. + - Lower percentage (more during work hours) scores higher. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_contributions_outside_work_hours_score +SQL > + % + {% if defined(repoUrl) %} + SELECT + channel, + round( + (100. * countIf((weekday >= 6) OR (two_hours_block >= 18) OR (two_hours_block < 8))) + / count(id) + ) AS contributionsOutsideWorkHours + FROM contributions_with_local_time_ds + WHERE + channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT + channel, + round( + (100. * countIf((weekday >= 6) OR (two_hours_block >= 18) OR (two_hours_block < 8))) + / count(id) + ) AS contributionsOutsideWorkHours + FROM contributions_with_local_time_ds + WHERE + timestamp >= toStartOfDay(now() - toIntervalDay(365)) + AND timestamp < toStartOfDay(now() + toIntervalDay(1)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_contributions_outside_work_hours.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_contributions_outside_work_hours_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe b/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe new file mode 100644 index 0000000000..abab682e39 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe @@ -0,0 +1,45 @@ +DESCRIPTION > + - Calculates repository-level contributor dependency (bus factor) health score. + - Returns the number of contributors making up 51% of contributions, their combined percentage, and benchmark (0-5). + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_contributor_dependency_count +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, memberId, count() AS contributionCount + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel, memberId + ORDER BY contributionCount DESC + {% else %} + SELECT channel, memberId, count() AS contributionCount + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) + AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel, memberId + ORDER BY contributionCount DESC + {% end %} + +INCLUDE "../includes/health_score_contributor_dependency.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_contributor_dependency_count" diff --git a/services/libs/tinybird/pipes/repo_health_score_copy.pipe b/services/libs/tinybird/pipes/repo_health_score_copy.pipe new file mode 100644 index 0000000000..1fdb8d1e86 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_copy.pipe @@ -0,0 +1,131 @@ +NODE repo_health_score_copy_repos +DESCRIPTION > + Returns enabled, non-excluded repository URLs linked to insights projects + +SQL > + SELECT r.url AS channel + FROM repositories r FINAL + WHERE + r.enabled = true + AND r.excluded = false + AND isNull(r.deletedAt) + AND r.insightsProjectId != '' + +NODE repo_health_score_copy_data +DESCRIPTION > + Join all metrics per repository from separate health score pipes + +SQL > + SELECT + repos.channel AS channel, + COALESCE(ac.activeContributors, 0) AS activeContributors, + COALESCE(ac.activeContributorsBenchmark, 0) AS activeContributorsBenchmark, + COALESCE(cd.contributorDependencyCount, 0) AS contributorDependencyCount, + COALESCE(cd.contributorDependencyPercentage, 0) AS contributorDependencyPercentage, + COALESCE(cd.contributorDependencyBenchmark, 0) AS contributorDependencyBenchmark, + COALESCE(od.organizationDependencyCount, 0) AS organizationDependencyCount, + COALESCE(od.organizationDependencyPercentage, 0) AS organizationDependencyPercentage, + COALESCE(od.organizationDependencyBenchmark, 0) AS organizationDependencyBenchmark, + COALESCE(ret.retentionRate, 0) AS retentionRate, + COALESCE(ret.retentionBenchmark, 0) AS retentionBenchmark, + COALESCE(st.stars, 0) AS stars, + COALESCE(st.starsBenchmark, 0) AS starsBenchmark, + COALESCE(fk.forks, 0) AS forks, + COALESCE(fk.forksBenchmark, 0) AS forksBenchmark, + ir.issueResolution AS issueResolution, + ir.issueResolutionBenchmark AS issueResolutionBenchmark, + COALESCE(pr.pullRequests, 0) AS pullRequests, + COALESCE(pr.pullRequestsBenchmark, 0) AS pullRequestsBenchmark, + mlt.mergeLeadTime AS mergeLeadTime, + mlt.mergeLeadTimeBenchmark AS mergeLeadTimeBenchmark, + COALESCE(ad.activeDaysCount, 0) AS activeDaysCount, + COALESCE(ad.activeDaysBenchmark, 0) AS activeDaysBenchmark, + COALESCE(owh.contributionsOutsideWorkHours, 0) AS contributionsOutsideWorkHours, + COALESCE(owh.contributionsOutsideWorkHoursBenchmark, 0) AS contributionsOutsideWorkHoursBenchmark, + COALESCE(sec.securityPercentage, 0) AS securityPercentage + FROM repo_health_score_copy_repos AS repos + LEFT JOIN repo_health_score_active_contributors AS ac USING (channel) + LEFT JOIN repo_health_score_contributor_dependency AS cd USING (channel) + LEFT JOIN repo_health_score_organization_dependency AS od USING (channel) + LEFT JOIN repo_health_score_retention AS ret USING (channel) + LEFT JOIN repo_health_score_stars AS st USING (channel) + LEFT JOIN repo_health_score_forks AS fk USING (channel) + LEFT JOIN repo_health_score_issues_resolution AS ir USING (channel) + LEFT JOIN repo_health_score_pull_requests AS pr USING (channel) + LEFT JOIN repo_health_score_merge_lead_time AS mlt USING (channel) + LEFT JOIN repo_health_score_active_days AS ad USING (channel) + LEFT JOIN repo_health_score_contributions_outside_work_hours AS owh USING (channel) + LEFT JOIN repo_health_score_security AS sec USING (channel) + +NODE repo_health_score_copy_result +DESCRIPTION > + Compute final health score percentages per repository + +SQL > + WITH + arrayFilter(x -> x >= 0, [ + activeContributorsBenchmark, + contributorDependencyBenchmark, + organizationDependencyBenchmark, + retentionBenchmark + ]) AS contributorBenchmarks, + arrayFilter(x -> x >= 0, [ + starsBenchmark, + forksBenchmark + ]) AS popularityBenchmarks, + arrayFilter(x -> x >= 0, [ + COALESCE(issueResolutionBenchmark, -1), + pullRequestsBenchmark, + COALESCE(mergeLeadTimeBenchmark, -1), + activeDaysBenchmark, + contributionsOutsideWorkHoursBenchmark + ]) AS developmentBenchmarks + SELECT + channel, + activeContributors, + activeContributorsBenchmark, + contributorDependencyCount, + contributorDependencyPercentage, + contributorDependencyBenchmark, + organizationDependencyCount, + organizationDependencyPercentage, + organizationDependencyBenchmark, + retentionRate, + retentionBenchmark, + stars, + starsBenchmark, + forks, + forksBenchmark, + issueResolution, + COALESCE(issueResolutionBenchmark, 0) AS issueResolutionBenchmark, + pullRequests, + pullRequestsBenchmark, + mergeLeadTime, + COALESCE(mergeLeadTimeBenchmark, 0) AS mergeLeadTimeBenchmark, + activeDaysCount, + activeDaysBenchmark, + contributionsOutsideWorkHours, + contributionsOutsideWorkHoursBenchmark, + securityPercentage, + round( + 100.0 * arraySum(contributorBenchmarks) + / if(length(contributorBenchmarks) = 0, 1, 5 * length(contributorBenchmarks)) + ) AS contributorPercentage, + round( + 100.0 * arraySum(popularityBenchmarks) + / if(length(popularityBenchmarks) = 0, 1, 5 * length(popularityBenchmarks)) + ) AS popularityPercentage, + round( + 100.0 * arraySum(developmentBenchmarks) + / if(length(developmentBenchmarks) = 0, 1, 5 * length(developmentBenchmarks)) + ) AS developmentPercentage, + round( + (contributorPercentage + popularityPercentage + developmentPercentage + securityPercentage) + / 4 + ) AS overallScore + FROM repo_health_score_copy_data + +TYPE COPY +TARGET_DATASOURCE repo_health_score_copy_ds +COPY_MODE replace +COPY_SCHEDULE 50 1 * * * diff --git a/services/libs/tinybird/pipes/repo_health_score_forks.pipe b/services/libs/tinybird/pipes/repo_health_score_forks.pipe new file mode 100644 index 0000000000..07c15b40db --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_forks.pipe @@ -0,0 +1,37 @@ +DESCRIPTION > + - Calculates repository-level forks health score. + - Returns fork count and benchmark (0-5) per repository. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_forks_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, count() AS forks + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + type = 'fork' + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, count() AS forks + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE type = 'fork' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_forks.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_forks_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe b/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe new file mode 100644 index 0000000000..6c972feccb --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe @@ -0,0 +1,42 @@ +DESCRIPTION > + - Calculates repository-level issue resolution health score. + - Returns average days to close issues and benchmark (0-5) per repository. + - Queries `issues_analyzed` directly for optimal performance. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter for issues opened after timestamp + - `endDate`: Optional DateTime filter for issues opened before timestamp + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_issues_resolution_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, round(avg(closedInSeconds) / (60 * 60 * 24)) AS issueResolution + FROM issues_analyzed + WHERE + channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND closedAt IS NOT NULL + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND openedAt + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND openedAt + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, round(avg(closedInSeconds) / (60 * 60 * 24)) AS issueResolution + FROM issues_analyzed + WHERE + openedAt >= toStartOfDay(now()) - INTERVAL 365 DAY + AND openedAt < toStartOfDay(now()) + INTERVAL 1 DAY + AND closedAt IS NOT NULL + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_issues_resolution.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_issues_resolution_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe b/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe new file mode 100644 index 0000000000..b073e39908 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe @@ -0,0 +1,40 @@ +DESCRIPTION > + - Calculates repository-level merge lead time health score. + - Returns average days to merge PRs and benchmark (0-5) per repository. + - Queries `pull_requests_analyzed` for optimal performance. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter for PRs opened after timestamp + - `endDate`: Optional DateTime filter for PRs opened before timestamp + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_merge_lead_time_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, round(avg(dateDiff('day', openedAt, mergedAt))) AS mergeLeadTime + FROM pull_requests_analyzed + WHERE + channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND openedAt + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND openedAt + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, round(avg(dateDiff('day', openedAt, mergedAt))) AS mergeLeadTime + FROM pull_requests_analyzed + WHERE + openedAt >= toStartOfDay(now() - toIntervalDay(365)) + AND openedAt < toStartOfDay(now() + toIntervalDay(1)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_merge_lead_time.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_merge_lead_time_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe b/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe new file mode 100644 index 0000000000..e6f04a73d5 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe @@ -0,0 +1,43 @@ +DESCRIPTION > + - Calculates repository-level organization dependency health score. + - Returns the number of organizations making up 51% of contributions, their combined percentage, and benchmark (0-5). + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_organization_dependency_count +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, organizationId, count() AS contributionCount + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + organizationId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel, organizationId + {% else %} + SELECT channel, organizationId, count() AS contributionCount + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + organizationId != '' + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) + AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel, organizationId + {% end %} + +INCLUDE "../includes/health_score_organization_dependency.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_organization_dependency_count" diff --git a/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe b/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe new file mode 100644 index 0000000000..10e8c9abc4 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe @@ -0,0 +1,41 @@ +DESCRIPTION > + - Calculates repository-level pull request activity health score. + - Returns PR count and benchmark (0-5) per repository for the last 365 days. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_pull_requests_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, count() AS pullRequests + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + (type = 'pull_request-opened' OR type = 'merge_request-opened' OR type = 'changeset-created') + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, count() AS pullRequests + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + (type = 'pull_request-opened' OR type = 'merge_request-opened' OR type = 'changeset-created') + AND timestamp >= toStartOfDay(now() - toIntervalDay(365)) + AND timestamp < toStartOfDay(now() + toIntervalDay(1)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_pull_requests.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_pull_requests_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_retention.pipe b/services/libs/tinybird/pipes/repo_health_score_retention.pipe new file mode 100644 index 0000000000..976d0d10b7 --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_retention.pipe @@ -0,0 +1,91 @@ +DESCRIPTION > + - Calculates repository-level contributor retention health score. + - Returns retention rate (percentage of previous quarter contributors active in current quarter) and benchmark (0-5). + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `endDate`: Optional DateTime filter to define the quarter boundary + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_retention_current_quarter +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, groupUniqArray(memberId) AS currentQuarterMembers + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + {% if defined(endDate) %} + AND timestamp >= toStartOfQuarter( + parseDateTimeBestEffort( + {{ DateTime(endDate, description="Filter before date", required=False) }} + ) + - INTERVAL 1 QUARTER + ) + AND timestamp < toStartOfQuarter( + parseDateTimeBestEffort( + {{ DateTime(endDate, description="Filter before date", required=False) }} + ) + ) + {% else %} + AND timestamp >= toStartOfQuarter(now() - INTERVAL 1 QUARTER) + AND timestamp < toStartOfQuarter(now()) + {% end %} + GROUP BY channel + {% else %} + SELECT channel, groupUniqArray(memberId) AS currentQuarterMembers + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND timestamp >= toStartOfQuarter(now() - INTERVAL 1 QUARTER) + AND timestamp < toStartOfQuarter(now()) + GROUP BY channel + {% end %} + +NODE repo_health_score_retention_previous_quarter +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, groupUniqArray(memberId) AS previousQuarterMembers + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + {% if defined(endDate) %} + AND timestamp >= toStartOfQuarter( + parseDateTimeBestEffort( + {{ DateTime(endDate, description="Filter before date", required=False) }} + ) + - INTERVAL 2 QUARTER + ) + AND timestamp < toStartOfQuarter( + parseDateTimeBestEffort( + {{ DateTime(endDate, description="Filter before date", required=False) }} + ) + - INTERVAL 1 QUARTER + ) + {% else %} + AND timestamp >= toStartOfQuarter(now() - INTERVAL 2 QUARTER) + AND timestamp < toStartOfQuarter(now() - INTERVAL 1 QUARTER) + {% end %} + GROUP BY channel + {% else %} + SELECT channel, groupUniqArray(memberId) AS previousQuarterMembers + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + memberId != '' + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) + AND timestamp >= toStartOfQuarter(now() - INTERVAL 2 QUARTER) + AND timestamp < toStartOfQuarter(now() - INTERVAL 1 QUARTER) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_retention.incl" "GROUP_COL=channel" "SOURCE_CURRENT=repo_health_score_retention_current_quarter" "SOURCE_PREVIOUS=repo_health_score_retention_previous_quarter" diff --git a/services/libs/tinybird/pipes/repo_health_score_security.pipe b/services/libs/tinybird/pipes/repo_health_score_security.pipe new file mode 100644 index 0000000000..9bf0da591d --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_security.pipe @@ -0,0 +1,48 @@ +DESCRIPTION > + - Calculates repository-level security health score. + - Returns security assessment pass rate percentage per repository. + - Uses `security_deduplicated_merged_ds` for security evaluation data. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_security_check_pass_rate +SQL > + % + SELECT + repo, + category, + arrayFilter( + x -> x['requirementId'] not in ('OSPS-AC-01.01'), assessments + ) as filteredAssessments, + length(arrayFilter(x -> x['result'] = 'Failed', filteredAssessments)) AS failedAssessments, + length(arrayFilter(x -> x['result'] = 'Passed', filteredAssessments)) AS passedAssessments + FROM security_deduplicated_merged_ds + WHERE + category NOT IN ('Documentation', 'Vulnerability Management') + AND repo != '' + {% if defined(repoUrl) %} + AND repo = {{ String(repoUrl, description="Repository URL", required=False) }} + {% end %} + +NODE repo_health_score_security_category +SQL > + SELECT + repo, + category, + sum(failedAssessments) AS failed, + sum(passedAssessments) AS passed, + sum(failedAssessments + passedAssessments) AS total, + round(100 * (passed / total)) AS percentage + FROM repo_health_score_security_check_pass_rate + WHERE repo != '' + GROUP BY repo, category + +NODE repo_health_score_security_score +SQL > + SELECT + repo AS channel, + round(avg(percentage)) AS securityPercentage + FROM repo_health_score_security_category + GROUP BY repo diff --git a/services/libs/tinybird/pipes/repo_health_score_stars.pipe b/services/libs/tinybird/pipes/repo_health_score_stars.pipe new file mode 100644 index 0000000000..dafdd89aff --- /dev/null +++ b/services/libs/tinybird/pipes/repo_health_score_stars.pipe @@ -0,0 +1,37 @@ +DESCRIPTION > + - Calculates repository-level stars health score. + - Returns star count and benchmark (0-5) per repository. + - Parameters: + - `repoUrl`: Optional repository URL for single-repo mode + - `startDate`: Optional DateTime filter + - `endDate`: Optional DateTime filter + +TAGS "Repository health", "Metrics" + +NODE repo_health_score_stars_score +SQL > + % + {% if defined(repoUrl) %} + SELECT channel, count() AS stars + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE + type = 'star' + AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + {% if defined(startDate) %} + AND timestamp + > {{ DateTime(startDate, description="Filter after date", required=False) }} + {% end %} + {% if defined(endDate) %} + AND timestamp + < {{ DateTime(endDate, description="Filter before date", required=False) }} + {% end %} + GROUP BY channel + {% else %} + SELECT channel, count() AS stars + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE type = 'star' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) + GROUP BY channel + {% end %} + +INCLUDE "../includes/health_score_stars.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_stars_score" diff --git a/services/libs/tinybird/pipes/repositories_populated_copy.pipe b/services/libs/tinybird/pipes/repositories_populated_copy.pipe new file mode 100644 index 0000000000..c60415ec81 --- /dev/null +++ b/services/libs/tinybird/pipes/repositories_populated_copy.pipe @@ -0,0 +1,75 @@ +NODE repositories_populated_copy_base +DESCRIPTION > + Returns base repository information from enabled, non-excluded repositories linked to insights projects + +SQL > + SELECT + r.id AS id, + r.url AS url, + r.segmentId AS segmentId, + r.insightsProjectId AS insightsProjectId + FROM repositories r FINAL + WHERE + r.enabled = true + AND r.excluded = false + AND isNull(r.deletedAt) + AND r.insightsProjectId != '' + +NODE repositories_populated_copy_contributor_org_counts +DESCRIPTION > + Calculate contributor and organization counts per repository (channel) + +SQL > + SELECT + channel, + uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS contributorCount, + uniq(CASE WHEN organizationId != '' THEN organizationId ELSE NULL END) AS organizationCount + FROM activityRelations_deduplicated_cleaned_bucket_union + GROUP BY channel + +NODE repositories_populated_copy_first_commit +DESCRIPTION > + Returns timestamp of first commit per repository (channel) + +SQL > + SELECT + channel, + minOrNull(timestamp) AS firstCommit + FROM activityRelations_deduplicated_cleaned_bucket_union + WHERE type = 'authored-commit' AND platform = 'git' AND timestamp > toDateTime('1971-01-01') + GROUP BY channel + +NODE repositories_populated_copy_software_value +DESCRIPTION > + Returns software value per repository from softwareValueProjectCosts + +SQL > + SELECT + repoUrl, + sum(estimatedCost) AS softwareValue + FROM softwareValueProjectCosts FINAL + GROUP BY repoUrl + +NODE repositories_populated_copy_results +DESCRIPTION > + Join all repository metrics together + +SQL > + SELECT + base.id AS id, + base.url AS url, + base.segmentId AS segmentId, + base.insightsProjectId AS insightsProjectId, + COALESCE(counts.contributorCount, 0) AS contributorCount, + COALESCE(counts.organizationCount, 0) AS organizationCount, + COALESCE(sv.softwareValue, 0) AS softwareValue, + fc.firstCommit AS firstCommit + FROM repositories_populated_copy_base AS base + LEFT JOIN repositories_populated_copy_contributor_org_counts AS counts ON base.url = counts.channel + LEFT JOIN repositories_populated_copy_first_commit AS fc ON base.url = fc.channel + LEFT JOIN repositories_populated_copy_software_value AS sv ON base.url = sv.repoUrl + +TYPE COPY +TARGET_DATASOURCE repositories_populated_ds +COPY_MODE replace +COPY_SCHEDULE 0 * * * * From e72807bd1eee511ebefe425e5477bfc3ae83811b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Tue, 31 Mar 2026 23:02:27 +0100 Subject: [PATCH 2/6] feat: add health score to repo list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- .../tinybird/pipes/project_insights_copy.pipe | 131 +++++++----------- 1 file changed, 50 insertions(+), 81 deletions(-) diff --git a/services/libs/tinybird/pipes/project_insights_copy.pipe b/services/libs/tinybird/pipes/project_insights_copy.pipe index 92ab0891c6..f8522fdea2 100644 --- a/services/libs/tinybird/pipes/project_insights_copy.pipe +++ b/services/libs/tinybird/pipes/project_insights_copy.pipe @@ -58,38 +58,23 @@ SQL > WHERE snapshotId = (SELECT max(snapshotId) FROM leaderboards_copy_ds) GROUP BY slug -NODE project_insights_copy_last_365_days_metrics +NODE project_insights_copy_period_metrics DESCRIPTION > - Calculate metrics for last 365 days: stars, forks, active contributors, active organizations + Calculate metrics for last 365 days and previous 365 days in a single scan SQL > SELECT segmentId, - countIf(type = 'star') AS starsLast365Days, - countIf(type = 'fork') AS forksLast365Days, - uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsLast365Days, - uniq( - CASE WHEN organizationId != '' THEN organizationId ELSE NULL END - ) AS activeOrganizationsLast365Days + countIf(type = 'star' AND timestamp >= now() - INTERVAL 365 DAY) AS starsLast365Days, + countIf(type = 'fork' AND timestamp >= now() - INTERVAL 365 DAY) AS forksLast365Days, + uniqIf(memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeContributorsLast365Days, + uniqIf(organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeOrganizationsLast365Days, + countIf(type = 'star' AND timestamp < now() - INTERVAL 365 DAY) AS starsPrevious365Days, + countIf(type = 'fork' AND timestamp < now() - INTERVAL 365 DAY) AS forksPrevious365Days, + uniqIf(memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeContributorsPrevious365Days, + uniqIf(organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeOrganizationsPrevious365Days FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE timestamp <= now() - GROUP BY segmentId - -NODE project_insights_copy_previous_365_days_metrics -DESCRIPTION > - Calculate metrics for previous 365 days (365-730 days ago): stars, forks, active contributors, active organizations - -SQL > - SELECT - segmentId, - countIf(type = 'star') AS starsPrevious365Days, - countIf(type = 'fork') AS forksPrevious365Days, - uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsPrevious365Days, - uniq( - CASE WHEN organizationId != '' THEN organizationId ELSE NULL END - ) AS activeOrganizationsPrevious365Days - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE timestamp < now() - INTERVAL 365 DAY + WHERE timestamp >= now() - INTERVAL 730 DAY AND timestamp <= now() GROUP BY segmentId NODE project_insights_copy_project_results @@ -120,19 +105,18 @@ SQL > dep.developmentPercentage AS developmentHealthScore, dep.securityPercentage AS securityHealthScore, base.firstCommit AS firstCommit, - l365.starsLast365Days AS starsLast365Days, - l365.forksLast365Days AS forksLast365Days, - l365.activeContributorsLast365Days AS activeContributorsLast365Days, - l365.activeOrganizationsLast365Days AS activeOrganizationsLast365Days, - p365.starsPrevious365Days AS starsPrevious365Days, - p365.forksPrevious365Days AS forksPrevious365Days, - p365.activeContributorsPrevious365Days AS activeContributorsPrevious365Days, - p365.activeOrganizationsPrevious365Days AS activeOrganizationsPrevious365Days + pm.starsLast365Days AS starsLast365Days, + pm.forksLast365Days AS forksLast365Days, + pm.activeContributorsLast365Days AS activeContributorsLast365Days, + pm.activeOrganizationsLast365Days AS activeOrganizationsLast365Days, + pm.starsPrevious365Days AS starsPrevious365Days, + pm.forksPrevious365Days AS forksPrevious365Days, + pm.activeContributorsPrevious365Days AS activeContributorsPrevious365Days, + pm.activeOrganizationsPrevious365Days AS activeOrganizationsPrevious365Days FROM project_insights_copy_base_projects AS base LEFT JOIN project_insights_copy_dependency_metrics AS dep ON base.slug = dep.slug LEFT JOIN project_insights_copy_achievements AS ach ON base.slug = ach.slug - LEFT JOIN project_insights_copy_last_365_days_metrics AS l365 USING (segmentId) - LEFT JOIN project_insights_copy_previous_365_days_metrics AS p365 USING (segmentId) + LEFT JOIN project_insights_copy_period_metrics AS pm USING (segmentId) NODE project_insights_copy_repo_base DESCRIPTION > @@ -154,38 +138,23 @@ SQL > FROM repositories_populated_ds AS rp JOIN repositories r FINAL ON r.id = rp.id -NODE project_insights_copy_repo_last_365_days_metrics -DESCRIPTION > - Calculate repository-level metrics for last 365 days grouped by channel (repo URL) - -SQL > - SELECT - channel, - countIf(type = 'star') AS starsLast365Days, - countIf(type = 'fork') AS forksLast365Days, - uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsLast365Days, - uniq( - CASE WHEN organizationId != '' THEN organizationId ELSE NULL END - ) AS activeOrganizationsLast365Days - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE timestamp <= now() - GROUP BY channel - -NODE project_insights_copy_repo_previous_365_days_metrics +NODE project_insights_copy_repo_period_metrics DESCRIPTION > - Calculate repository-level metrics for previous 365 days grouped by channel (repo URL) + Calculate repository-level metrics for last 365 days and previous 365 days in a single scan SQL > SELECT channel, - countIf(type = 'star') AS starsPrevious365Days, - countIf(type = 'fork') AS forksPrevious365Days, - uniq(CASE WHEN memberId != '' THEN memberId ELSE NULL END) AS activeContributorsPrevious365Days, - uniq( - CASE WHEN organizationId != '' THEN organizationId ELSE NULL END - ) AS activeOrganizationsPrevious365Days + countIf(type = 'star' AND timestamp >= now() - INTERVAL 365 DAY) AS starsLast365Days, + countIf(type = 'fork' AND timestamp >= now() - INTERVAL 365 DAY) AS forksLast365Days, + uniqIf(memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeContributorsLast365Days, + uniqIf(organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeOrganizationsLast365Days, + countIf(type = 'star' AND timestamp < now() - INTERVAL 365 DAY) AS starsPrevious365Days, + countIf(type = 'fork' AND timestamp < now() - INTERVAL 365 DAY) AS forksPrevious365Days, + uniqIf(memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeContributorsPrevious365Days, + uniqIf(organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeOrganizationsPrevious365Days FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE timestamp < now() - INTERVAL 365 DAY + WHERE timestamp >= now() - INTERVAL 730 DAY AND timestamp <= now() GROUP BY channel NODE project_insights_copy_repo_results @@ -205,28 +174,28 @@ SQL > base.contributorCount AS contributorCount, base.organizationCount AS organizationCount, base.softwareValue AS softwareValue, - toUInt64(0) AS contributorDependencyCount, - toFloat64(0) AS contributorDependencyPercentage, - toUInt64(0) AS organizationDependencyCount, - toFloat64(0) AS organizationDependencyPercentage, + COALESCE(hs.contributorDependencyCount, 0) AS contributorDependencyCount, + COALESCE(hs.contributorDependencyPercentage, 0) AS contributorDependencyPercentage, + COALESCE(hs.organizationDependencyCount, 0) AS organizationDependencyCount, + COALESCE(hs.organizationDependencyPercentage, 0) AS organizationDependencyPercentage, CAST([] AS Array(Tuple(String, UInt64, UInt64))) AS achievements, - CAST(NULL AS Nullable(Float64)) AS healthScore, - CAST(NULL AS Nullable(Float64)) AS contributorHealthScore, - CAST(NULL AS Nullable(Float64)) AS popularityHealthScore, - CAST(NULL AS Nullable(Float64)) AS developmentHealthScore, - CAST(NULL AS Nullable(Float64)) AS securityHealthScore, + hs.overallScore AS healthScore, + hs.contributorPercentage AS contributorHealthScore, + hs.popularityPercentage AS popularityHealthScore, + hs.developmentPercentage AS developmentHealthScore, + hs.securityPercentage AS securityHealthScore, base.firstCommit AS firstCommit, - COALESCE(l365.starsLast365Days, 0) AS starsLast365Days, - COALESCE(l365.forksLast365Days, 0) AS forksLast365Days, - COALESCE(l365.activeContributorsLast365Days, 0) AS activeContributorsLast365Days, - COALESCE(l365.activeOrganizationsLast365Days, 0) AS activeOrganizationsLast365Days, - COALESCE(p365.starsPrevious365Days, 0) AS starsPrevious365Days, - COALESCE(p365.forksPrevious365Days, 0) AS forksPrevious365Days, - COALESCE(p365.activeContributorsPrevious365Days, 0) AS activeContributorsPrevious365Days, - COALESCE(p365.activeOrganizationsPrevious365Days, 0) AS activeOrganizationsPrevious365Days + COALESCE(rm.starsLast365Days, 0) AS starsLast365Days, + COALESCE(rm.forksLast365Days, 0) AS forksLast365Days, + COALESCE(rm.activeContributorsLast365Days, 0) AS activeContributorsLast365Days, + COALESCE(rm.activeOrganizationsLast365Days, 0) AS activeOrganizationsLast365Days, + COALESCE(rm.starsPrevious365Days, 0) AS starsPrevious365Days, + COALESCE(rm.forksPrevious365Days, 0) AS forksPrevious365Days, + COALESCE(rm.activeContributorsPrevious365Days, 0) AS activeContributorsPrevious365Days, + COALESCE(rm.activeOrganizationsPrevious365Days, 0) AS activeOrganizationsPrevious365Days FROM project_insights_copy_repo_base AS base - LEFT JOIN project_insights_copy_repo_last_365_days_metrics AS l365 ON base.repoUrl = l365.channel - LEFT JOIN project_insights_copy_repo_previous_365_days_metrics AS p365 ON base.repoUrl = p365.channel + LEFT JOIN repo_health_score_copy_ds AS hs ON base.repoUrl = hs.channel + LEFT JOIN project_insights_copy_repo_period_metrics AS rm ON base.repoUrl = rm.channel NODE project_insights_copy_results DESCRIPTION > From 887e9d38f1ffb363dd207dd1865c723923736619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Mon, 6 Apr 2026 09:10:35 +0100 Subject: [PATCH 3/6] feat: add project info to repositories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- services/libs/tinybird/pipes/project_insights_copy.pipe | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/services/libs/tinybird/pipes/project_insights_copy.pipe b/services/libs/tinybird/pipes/project_insights_copy.pipe index f8522fdea2..4c3e79821b 100644 --- a/services/libs/tinybird/pipes/project_insights_copy.pipe +++ b/services/libs/tinybird/pipes/project_insights_copy.pipe @@ -120,16 +120,16 @@ SQL > NODE project_insights_copy_repo_base DESCRIPTION > - Returns base repository information with populated metrics from repositories_populated_ds + Returns base repository information with populated metrics from repositories_populated_ds, enriched with project data SQL > SELECT rp.id AS id, rp.url AS repoUrl, rp.url AS name, - '' AS slug, - '' AS logoUrl, - toUInt8(0) AS isLF, + COALESCE(ip.slug, '') AS slug, + COALESCE(ip.logoUrl, '') AS logoUrl, + COALESCE(ip.isLF, toUInt8(0)) AS isLF, if(r.archived = true, 'archived', 'active') AS status, rp.contributorCount AS contributorCount, rp.organizationCount AS organizationCount, @@ -137,6 +137,7 @@ SQL > rp.firstCommit AS firstCommit FROM repositories_populated_ds AS rp JOIN repositories r FINAL ON r.id = rp.id + LEFT JOIN insightsProjects ip FINAL ON r.insightsProjectId = ip.id NODE project_insights_copy_repo_period_metrics DESCRIPTION > From ee11f8c9a3f6a678cd54b83d97461fdde25757fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Mon, 6 Apr 2026 09:15:03 +0100 Subject: [PATCH 4/6] feat: add proejct name aswell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- services/libs/tinybird/pipes/project_insights_copy.pipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/libs/tinybird/pipes/project_insights_copy.pipe b/services/libs/tinybird/pipes/project_insights_copy.pipe index 4c3e79821b..61de51a2ea 100644 --- a/services/libs/tinybird/pipes/project_insights_copy.pipe +++ b/services/libs/tinybird/pipes/project_insights_copy.pipe @@ -126,7 +126,7 @@ SQL > SELECT rp.id AS id, rp.url AS repoUrl, - rp.url AS name, + COALESCE(ip.name, '') AS name, COALESCE(ip.slug, '') AS slug, COALESCE(ip.logoUrl, '') AS logoUrl, COALESCE(ip.isLF, toUInt8(0)) AS isLF, From 8d3febcba10ead0e9c06898a3690f94c9284b1f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Mon, 6 Apr 2026 13:38:49 +0100 Subject: [PATCH 5/6] fix: format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- .../health_score_active_contributors.pipe | 36 ------ .../pipes/health_score_active_days.pipe | 29 ----- ...core_contributions_outside_work_hours.pipe | 31 ----- .../health_score_contributor_dependency.pipe | 35 ------ .../tinybird/pipes/health_score_forks.pipe | 30 ----- .../pipes/health_score_issues_resolution.pipe | 30 ----- .../pipes/health_score_merge_lead_time.pipe | 28 ----- .../health_score_organization_dependency.pipe | 33 ------ .../pipes/health_score_pull_requests.pipe | 42 ------- .../pipes/health_score_retention.pipe | 111 ------------------ .../tinybird/pipes/health_score_stars.pipe | 30 ----- .../tinybird/pipes/project_insights_copy.pipe | 38 ++++-- .../tinybird/pipes/project_repo_insights.pipe | 26 +++- ...repo_health_score_active_contributors.pipe | 32 ----- .../pipes/repo_health_score_active_days.pipe | 28 ----- ...core_contributions_outside_work_hours.pipe | 38 ------ ...o_health_score_contributor_dependency.pipe | 34 ------ .../pipes/repo_health_score_copy.pipe | 56 +++++---- .../pipes/repo_health_score_forks.pipe | 26 ---- .../repo_health_score_issues_resolution.pipe | 30 ----- .../repo_health_score_merge_lead_time.pipe | 28 ----- ..._health_score_organization_dependency.pipe | 32 ----- .../repo_health_score_pull_requests.pipe | 30 ----- .../pipes/repo_health_score_retention.pipe | 81 ------------- .../pipes/repo_health_score_security.pipe | 7 +- .../pipes/repo_health_score_stars.pipe | 26 ---- .../pipes/repositories_populated_copy.pipe | 20 +--- 27 files changed, 88 insertions(+), 879 deletions(-) diff --git a/services/libs/tinybird/pipes/health_score_active_contributors.pipe b/services/libs/tinybird/pipes/health_score_active_contributors.pipe index 75e2e9ec79..23aad58678 100644 --- a/services/libs/tinybird/pipes/health_score_active_contributors.pipe +++ b/services/libs/tinybird/pipes/health_score_active_contributors.pipe @@ -1,37 +1 @@ -NODE health_score_active_contributors_score -DESCRIPTION > - Returns activeContributors for previous quarter per project - -SQL > - % - {% if defined(project) %} - SELECT segmentId, COALESCE(uniq(memberId), 0) AS activeContributors - FROM activityRelations_bucket_routing - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, COALESCE(uniq(memberId), 0) AS activeContributors - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfQuarter(now() - toIntervalQuarter(1)) - AND timestamp < toStartOfQuarter(now()) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_active_contributors.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_active_contributors_score" diff --git a/services/libs/tinybird/pipes/health_score_active_days.pipe b/services/libs/tinybird/pipes/health_score_active_days.pipe index bb3ef13fc1..ad91fae0a6 100644 --- a/services/libs/tinybird/pipes/health_score_active_days.pipe +++ b/services/libs/tinybird/pipes/health_score_active_days.pipe @@ -1,30 +1 @@ -NODE health_score_active_days_score -SQL > - % - {% if defined(project) %} - SELECT segmentId, countDistinct(DATE(timestamp)) AS activeDaysCount - FROM activityRelations_bucket_routing - WHERE - segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, countDistinct(DATE(timestamp)) AS activeDaysCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - timestamp >= toStartOfDay(now() - toIntervalDay(365)) - AND timestamp < toStartOfDay(now()) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_active_days.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_active_days_score" diff --git a/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe b/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe index 71169a46eb..39944e3997 100644 --- a/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe +++ b/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe @@ -1,32 +1 @@ -NODE health_score_contributions_outside_work_hours_score -SQL > - % - SELECT - segmentId, - round( - (100. * countIf((weekday >= 6) OR (two_hours_block >= 18) OR (two_hours_block < 8))) - / count(id) - ) AS contributionsOutsideWorkHours - FROM contributions_with_local_time_ds AS a - WHERE - 1 = 1 - {% if defined(project) %} - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - {% else %} - AND timestamp >= toStartOfDay(now() - toIntervalDay(365)) - AND timestamp < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% end %} - GROUP BY segmentId - INCLUDE "../includes/health_score_contributions_outside_work_hours.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_contributions_outside_work_hours_score" diff --git a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe index 76a47a7e94..9d84cea48c 100644 --- a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe @@ -1,36 +1 @@ -NODE health_score_contributor_dependency_contribution_count -SQL > - % - {% if defined(project) %} - SELECT segmentId, memberId, count() AS contributionCount, MIN(timestamp), MAX(timestamp) - FROM activityRelations_bucket_routing - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId, memberId - ORDER by contributionCount DESC - {% else %} - SELECT segmentId, memberId, count() AS contributionCount, MIN(timestamp), MAX(timestamp) - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) - AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId, memberId - ORDER by contributionCount DESC - {% end %} - INCLUDE "../includes/health_score_contributor_dependency.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_contributor_dependency_contribution_count" diff --git a/services/libs/tinybird/pipes/health_score_forks.pipe b/services/libs/tinybird/pipes/health_score_forks.pipe index 244a82b995..7d1076d38b 100644 --- a/services/libs/tinybird/pipes/health_score_forks.pipe +++ b/services/libs/tinybird/pipes/health_score_forks.pipe @@ -1,31 +1 @@ -NODE health_score_forks_score -DESCRIPTION > - Returns activeContributors for previous quarter per project - -SQL > - % - {% if defined(project) %} - SELECT segmentId, count() AS forks - FROM activityRelations_bucket_routing - WHERE - type = 'fork' - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, count() AS forks - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE type = 'fork' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_forks.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_forks_score" diff --git a/services/libs/tinybird/pipes/health_score_issues_resolution.pipe b/services/libs/tinybird/pipes/health_score_issues_resolution.pipe index cf7cc77511..cfcf2876f7 100644 --- a/services/libs/tinybird/pipes/health_score_issues_resolution.pipe +++ b/services/libs/tinybird/pipes/health_score_issues_resolution.pipe @@ -8,34 +8,4 @@ DESCRIPTION > - `startDate`: Optional DateTime filter for issues opened after timestamp - `endDate`: Optional DateTime filter for issues opened before timestamp -NODE health_score_issues_resolution_score -SQL > - % - {% if defined(project) %} - SELECT segmentId, round(avg(closedInSeconds) / (60 * 60 * 24)) AS issueResolution - FROM issues_analyzed - WHERE - segmentId = (SELECT segmentId FROM segments_filtered) - AND closedAt IS NOT NULL - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND openedAt - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND openedAt < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, round(avg(closedInSeconds) / (60 * 60 * 24)) AS issueResolution - FROM issues_analyzed - WHERE - openedAt >= toStartOfDay(now()) - INTERVAL 365 DAY - AND openedAt < toStartOfDay(now()) + INTERVAL 1 DAY - AND closedAt IS NOT NULL - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_issues_resolution.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_issues_resolution_score" diff --git a/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe b/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe index 037560cf4c..e88362ef09 100644 --- a/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe +++ b/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe @@ -1,29 +1 @@ -NODE health_score_merge_lead_time_score -DESCRIPTION > - Returns activeContributors for previous quarter per project - -SQL > - % - SELECT segmentId, round(avg(dateDiff('day', openedAt, mergedAt))) AS mergeLeadTime - FROM pull_requests_analyzed - WHERE - 1 = 1 - {% if defined(project) %} - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND openedAt - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND openedAt < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - {% else %} - AND openedAt >= toStartOfDay(now() - toIntervalDay(365)) - AND openedAt < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% end %} - GROUP BY segmentId - INCLUDE "../includes/health_score_merge_lead_time.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_merge_lead_time_score" diff --git a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe index 6a44308b77..dc2f3459fc 100644 --- a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe @@ -1,34 +1 @@ -NODE health_score_organization_dependency_contribution_count -SQL > - % - {% if defined(project) %} - SELECT segmentId, organizationId, count() AS contributionCount - FROM activityRelations_bucket_routing - WHERE - organizationId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId, organizationId - {% else %} - SELECT segmentId, organizationId, count() AS contributionCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - organizationId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) - AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId, organizationId - {% end %} - INCLUDE "../includes/health_score_organization_dependency.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_organization_dependency_contribution_count" diff --git a/services/libs/tinybird/pipes/health_score_pull_requests.pipe b/services/libs/tinybird/pipes/health_score_pull_requests.pipe index 2045003a04..9c3f46cc96 100644 --- a/services/libs/tinybird/pipes/health_score_pull_requests.pipe +++ b/services/libs/tinybird/pipes/health_score_pull_requests.pipe @@ -1,43 +1 @@ -NODE health_score_pull_requests_score -DESCRIPTION > - Returns activeContributors for previous quarter per project - -SQL > - % - {% if defined(project) %} - SELECT segmentId, count() AS pullRequests - FROM activityRelations_bucket_routing - WHERE - ( - type = 'pull_request-opened' - OR type = 'merge_request-opened' - OR type = 'changeset-created' - ) - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, count() AS pullRequests - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - ( - type = 'pull_request-opened' - OR type = 'merge_request-opened' - OR type = 'changeset-created' - ) - AND timestamp >= toStartOfDay(now() - toIntervalDay(365)) - AND timestamp < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_pull_requests.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_pull_requests_score" diff --git a/services/libs/tinybird/pipes/health_score_retention.pipe b/services/libs/tinybird/pipes/health_score_retention.pipe index b4999e2225..91ddc37ecc 100644 --- a/services/libs/tinybird/pipes/health_score_retention.pipe +++ b/services/libs/tinybird/pipes/health_score_retention.pipe @@ -1,112 +1 @@ -NODE health_score_retention_current_quarter -SQL > - % - {% if defined(project) %} - SELECT segmentId, groupUniqArray(memberId) AS currentQuarterMembers - FROM activityRelations_bucket_routing - WHERE - memberId != '' - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(endDate) %} - AND timestamp >= toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 1 QUARTER - ) - AND timestamp < toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - ) - {% else %} - AND timestamp >= toStartOfQuarter(now() - INTERVAL 1 QUARTER) - AND timestamp < toStartOfQuarter(now()) - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, groupUniqArray(memberId) AS currentQuarterMembers - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - {% if defined(endDate) %} - AND timestamp >= toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 1 QUARTER - ) - AND timestamp < toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - ) - {% else %} - AND timestamp >= toStartOfQuarter(now() - INTERVAL 1 QUARTER) - AND timestamp < toStartOfQuarter(now()) - {% end %} - GROUP BY segmentId - {% end %} - -NODE health_score_retention_previous_quarter -SQL > - % - {% if defined(project) %} - SELECT segmentId, groupUniqArray(memberId) AS previousQuarterMembers - FROM activityRelations_bucket_routing - WHERE - memberId != '' - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - {% if defined(endDate) %} - AND timestamp >= toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 2 QUARTER - ) - AND timestamp < toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 1 QUARTER - ) - {% else %} - AND timestamp >= toStartOfQuarter(now() - INTERVAL 2 QUARTER) - AND timestamp < toStartOfQuarter(now() - INTERVAL 1 QUARTER) - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, groupUniqArray(memberId) AS previousQuarterMembers - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - {% if defined(endDate) %} - AND timestamp >= toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 2 QUARTER - ) - AND timestamp < toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 1 QUARTER - ) - {% else %} - AND timestamp >= toStartOfQuarter(now() - INTERVAL 2 QUARTER) - AND timestamp < toStartOfQuarter(now() - INTERVAL 1 QUARTER) - {% end %} - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_retention.incl" "GROUP_COL=segmentId" "SOURCE_CURRENT=health_score_retention_current_quarter" "SOURCE_PREVIOUS=health_score_retention_previous_quarter" diff --git a/services/libs/tinybird/pipes/health_score_stars.pipe b/services/libs/tinybird/pipes/health_score_stars.pipe index 24396983bf..57c41d9483 100644 --- a/services/libs/tinybird/pipes/health_score_stars.pipe +++ b/services/libs/tinybird/pipes/health_score_stars.pipe @@ -1,31 +1 @@ -NODE health_score_stars_score -DESCRIPTION > - Returns activeContributors for previous quarter per project - -SQL > - % - {% if defined(project) %} - SELECT segmentId, count() AS stars - FROM activityRelations_bucket_routing - WHERE - type = 'star' - AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY segmentId - {% else %} - SELECT segmentId, count() AS stars - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE type = 'star' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY segmentId - {% end %} - INCLUDE "../includes/health_score_stars.incl" "GROUP_COL=segmentId" "SOURCE_NODE=health_score_stars_score" diff --git a/services/libs/tinybird/pipes/project_insights_copy.pipe b/services/libs/tinybird/pipes/project_insights_copy.pipe index 61de51a2ea..c5d8708202 100644 --- a/services/libs/tinybird/pipes/project_insights_copy.pipe +++ b/services/libs/tinybird/pipes/project_insights_copy.pipe @@ -67,12 +67,20 @@ SQL > segmentId, countIf(type = 'star' AND timestamp >= now() - INTERVAL 365 DAY) AS starsLast365Days, countIf(type = 'fork' AND timestamp >= now() - INTERVAL 365 DAY) AS forksLast365Days, - uniqIf(memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeContributorsLast365Days, - uniqIf(organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeOrganizationsLast365Days, + uniqIf( + memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY + ) AS activeContributorsLast365Days, + uniqIf( + organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY + ) AS activeOrganizationsLast365Days, countIf(type = 'star' AND timestamp < now() - INTERVAL 365 DAY) AS starsPrevious365Days, countIf(type = 'fork' AND timestamp < now() - INTERVAL 365 DAY) AS forksPrevious365Days, - uniqIf(memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeContributorsPrevious365Days, - uniqIf(organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeOrganizationsPrevious365Days + uniqIf( + memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY + ) AS activeContributorsPrevious365Days, + uniqIf( + organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY + ) AS activeOrganizationsPrevious365Days FROM activityRelations_deduplicated_cleaned_bucket_union WHERE timestamp >= now() - INTERVAL 730 DAY AND timestamp <= now() GROUP BY segmentId @@ -148,12 +156,20 @@ SQL > channel, countIf(type = 'star' AND timestamp >= now() - INTERVAL 365 DAY) AS starsLast365Days, countIf(type = 'fork' AND timestamp >= now() - INTERVAL 365 DAY) AS forksLast365Days, - uniqIf(memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeContributorsLast365Days, - uniqIf(organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY) AS activeOrganizationsLast365Days, + uniqIf( + memberId, memberId != '' AND timestamp >= now() - INTERVAL 365 DAY + ) AS activeContributorsLast365Days, + uniqIf( + organizationId, organizationId != '' AND timestamp >= now() - INTERVAL 365 DAY + ) AS activeOrganizationsLast365Days, countIf(type = 'star' AND timestamp < now() - INTERVAL 365 DAY) AS starsPrevious365Days, countIf(type = 'fork' AND timestamp < now() - INTERVAL 365 DAY) AS forksPrevious365Days, - uniqIf(memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeContributorsPrevious365Days, - uniqIf(organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY) AS activeOrganizationsPrevious365Days + uniqIf( + memberId, memberId != '' AND timestamp < now() - INTERVAL 365 DAY + ) AS activeContributorsPrevious365Days, + uniqIf( + organizationId, organizationId != '' AND timestamp < now() - INTERVAL 365 DAY + ) AS activeOrganizationsPrevious365Days FROM activityRelations_deduplicated_cleaned_bucket_union WHERE timestamp >= now() - INTERVAL 730 DAY AND timestamp <= now() GROUP BY channel @@ -203,9 +219,11 @@ DESCRIPTION > Union of project and repository insights SQL > - SELECT * FROM project_insights_copy_project_results + SELECT * + FROM project_insights_copy_project_results UNION ALL - SELECT * FROM project_insights_copy_repo_results + SELECT * + FROM project_insights_copy_repo_results TYPE COPY TARGET_DATASOURCE project_insights_copy_ds diff --git a/services/libs/tinybird/pipes/project_repo_insights.pipe b/services/libs/tinybird/pipes/project_repo_insights.pipe index 3e7f5dd93c..1ea2553e0c 100644 --- a/services/libs/tinybird/pipes/project_repo_insights.pipe +++ b/services/libs/tinybird/pipes/project_repo_insights.pipe @@ -55,10 +55,32 @@ SQL > AND ( 1 = 0 {% if defined(ids) %} - OR (type = 'project' AND id IN {{ Array(ids, 'String', description="Filter by project id list", required=False) }}) + OR ( + type = 'project' + AND id + IN {{ + Array( + ids, + 'String', + description="Filter by project id list", + required=False, + ) + }} + ) {% end %} {% if defined(repoUrls) %} - OR (type = 'repo' AND repoUrl IN {{ Array(repoUrls, 'String', description="Filter by repository URL list", required=False) }}) + OR ( + type = 'repo' + AND repoUrl + IN {{ + Array( + repoUrls, + 'String', + description="Filter by repository URL list", + required=False, + ) + }} + ) {% end %} ) {% end %} diff --git a/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe b/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe index 8a3d97594e..9af145d578 100644 --- a/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_active_contributors.pipe @@ -8,36 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_active_contributors_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, COALESCE(uniq(memberId), 0) AS activeContributors - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, COALESCE(uniq(memberId), 0) AS activeContributors - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfQuarter(now() - toIntervalQuarter(1)) - AND timestamp < toStartOfQuarter(now()) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_active_contributors.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_active_contributors_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_active_days.pipe b/services/libs/tinybird/pipes/repo_health_score_active_days.pipe index 998855dfc0..121251baec 100644 --- a/services/libs/tinybird/pipes/repo_health_score_active_days.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_active_days.pipe @@ -8,32 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_active_days_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, countDistinct(DATE(timestamp)) AS activeDaysCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, countDistinct(DATE(timestamp)) AS activeDaysCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - timestamp >= toStartOfDay(now() - toIntervalDay(365)) - AND timestamp < toStartOfDay(now()) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_active_days.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_active_days_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe b/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe index 0f9fe47017..4545c489c5 100644 --- a/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_contributions_outside_work_hours.pipe @@ -9,42 +9,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_contributions_outside_work_hours_score -SQL > - % - {% if defined(repoUrl) %} - SELECT - channel, - round( - (100. * countIf((weekday >= 6) OR (two_hours_block >= 18) OR (two_hours_block < 8))) - / count(id) - ) AS contributionsOutsideWorkHours - FROM contributions_with_local_time_ds - WHERE - channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT - channel, - round( - (100. * countIf((weekday >= 6) OR (two_hours_block >= 18) OR (two_hours_block < 8))) - / count(id) - ) AS contributionsOutsideWorkHours - FROM contributions_with_local_time_ds - WHERE - timestamp >= toStartOfDay(now() - toIntervalDay(365)) - AND timestamp < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_contributions_outside_work_hours.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_contributions_outside_work_hours_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe b/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe index abab682e39..c71fc58761 100644 --- a/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_contributor_dependency.pipe @@ -8,38 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_contributor_dependency_count -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, memberId, count() AS contributionCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel, memberId - ORDER BY contributionCount DESC - {% else %} - SELECT channel, memberId, count() AS contributionCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) - AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel, memberId - ORDER BY contributionCount DESC - {% end %} - INCLUDE "../includes/health_score_contributor_dependency.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_contributor_dependency_count" diff --git a/services/libs/tinybird/pipes/repo_health_score_copy.pipe b/services/libs/tinybird/pipes/repo_health_score_copy.pipe index 1fdb8d1e86..2b612818d8 100644 --- a/services/libs/tinybird/pipes/repo_health_score_copy.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_copy.pipe @@ -5,11 +5,7 @@ DESCRIPTION > SQL > SELECT r.url AS channel FROM repositories r FINAL - WHERE - r.enabled = true - AND r.excluded = false - AND isNull(r.deletedAt) - AND r.insightsProjectId != '' + WHERE r.enabled = true AND r.excluded = false AND isNull (r.deletedAt) AND r.insightsProjectId != '' NODE repo_health_score_copy_data DESCRIPTION > @@ -41,7 +37,9 @@ SQL > COALESCE(ad.activeDaysCount, 0) AS activeDaysCount, COALESCE(ad.activeDaysBenchmark, 0) AS activeDaysBenchmark, COALESCE(owh.contributionsOutsideWorkHours, 0) AS contributionsOutsideWorkHours, - COALESCE(owh.contributionsOutsideWorkHoursBenchmark, 0) AS contributionsOutsideWorkHoursBenchmark, + COALESCE( + owh.contributionsOutsideWorkHoursBenchmark, 0 + ) AS contributionsOutsideWorkHoursBenchmark, COALESCE(sec.securityPercentage, 0) AS securityPercentage FROM repo_health_score_copy_repos AS repos LEFT JOIN repo_health_score_active_contributors AS ac USING (channel) @@ -63,23 +61,26 @@ DESCRIPTION > SQL > WITH - arrayFilter(x -> x >= 0, [ - activeContributorsBenchmark, - contributorDependencyBenchmark, - organizationDependencyBenchmark, - retentionBenchmark - ]) AS contributorBenchmarks, - arrayFilter(x -> x >= 0, [ - starsBenchmark, - forksBenchmark - ]) AS popularityBenchmarks, - arrayFilter(x -> x >= 0, [ - COALESCE(issueResolutionBenchmark, -1), - pullRequestsBenchmark, - COALESCE(mergeLeadTimeBenchmark, -1), - activeDaysBenchmark, - contributionsOutsideWorkHoursBenchmark - ]) AS developmentBenchmarks + arrayFilter( + x -> x >= 0, + [ + activeContributorsBenchmark, + contributorDependencyBenchmark, + organizationDependencyBenchmark, + retentionBenchmark + ] + ) AS contributorBenchmarks, + arrayFilter(x -> x >= 0, [starsBenchmark, forksBenchmark]) AS popularityBenchmarks, + arrayFilter( + x -> x >= 0, + [ + COALESCE(issueResolutionBenchmark, -1), + pullRequestsBenchmark, + COALESCE(mergeLeadTimeBenchmark, -1), + activeDaysBenchmark, + contributionsOutsideWorkHoursBenchmark + ] + ) AS developmentBenchmarks SELECT channel, activeContributors, @@ -108,15 +109,18 @@ SQL > contributionsOutsideWorkHoursBenchmark, securityPercentage, round( - 100.0 * arraySum(contributorBenchmarks) + 100.0 + * arraySum(contributorBenchmarks) / if(length(contributorBenchmarks) = 0, 1, 5 * length(contributorBenchmarks)) ) AS contributorPercentage, round( - 100.0 * arraySum(popularityBenchmarks) + 100.0 + * arraySum(popularityBenchmarks) / if(length(popularityBenchmarks) = 0, 1, 5 * length(popularityBenchmarks)) ) AS popularityPercentage, round( - 100.0 * arraySum(developmentBenchmarks) + 100.0 + * arraySum(developmentBenchmarks) / if(length(developmentBenchmarks) = 0, 1, 5 * length(developmentBenchmarks)) ) AS developmentPercentage, round( diff --git a/services/libs/tinybird/pipes/repo_health_score_forks.pipe b/services/libs/tinybird/pipes/repo_health_score_forks.pipe index 07c15b40db..0dcb10b3dc 100644 --- a/services/libs/tinybird/pipes/repo_health_score_forks.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_forks.pipe @@ -8,30 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_forks_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, count() AS forks - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - type = 'fork' - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, count() AS forks - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE type = 'fork' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_forks.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_forks_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe b/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe index 6c972feccb..22ef18210a 100644 --- a/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_issues_resolution.pipe @@ -9,34 +9,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_issues_resolution_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, round(avg(closedInSeconds) / (60 * 60 * 24)) AS issueResolution - FROM issues_analyzed - WHERE - channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND closedAt IS NOT NULL - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND openedAt - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND openedAt - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, round(avg(closedInSeconds) / (60 * 60 * 24)) AS issueResolution - FROM issues_analyzed - WHERE - openedAt >= toStartOfDay(now()) - INTERVAL 365 DAY - AND openedAt < toStartOfDay(now()) + INTERVAL 1 DAY - AND closedAt IS NOT NULL - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_issues_resolution.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_issues_resolution_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe b/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe index b073e39908..ff7c33156e 100644 --- a/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_merge_lead_time.pipe @@ -9,32 +9,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_merge_lead_time_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, round(avg(dateDiff('day', openedAt, mergedAt))) AS mergeLeadTime - FROM pull_requests_analyzed - WHERE - channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND openedAt - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND openedAt - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, round(avg(dateDiff('day', openedAt, mergedAt))) AS mergeLeadTime - FROM pull_requests_analyzed - WHERE - openedAt >= toStartOfDay(now() - toIntervalDay(365)) - AND openedAt < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_merge_lead_time.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_merge_lead_time_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe b/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe index e6f04a73d5..72c033f336 100644 --- a/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_organization_dependency.pipe @@ -8,36 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_organization_dependency_count -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, organizationId, count() AS contributionCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - organizationId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel, organizationId - {% else %} - SELECT channel, organizationId, count() AS contributionCount - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - organizationId != '' - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) - AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel, organizationId - {% end %} - INCLUDE "../includes/health_score_organization_dependency.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_organization_dependency_count" diff --git a/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe b/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe index 10e8c9abc4..f4d8d78809 100644 --- a/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_pull_requests.pipe @@ -8,34 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_pull_requests_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, count() AS pullRequests - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - (type = 'pull_request-opened' OR type = 'merge_request-opened' OR type = 'changeset-created') - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, count() AS pullRequests - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - (type = 'pull_request-opened' OR type = 'merge_request-opened' OR type = 'changeset-created') - AND timestamp >= toStartOfDay(now() - toIntervalDay(365)) - AND timestamp < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_pull_requests.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_pull_requests_score" diff --git a/services/libs/tinybird/pipes/repo_health_score_retention.pipe b/services/libs/tinybird/pipes/repo_health_score_retention.pipe index 976d0d10b7..feb3d8253f 100644 --- a/services/libs/tinybird/pipes/repo_health_score_retention.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_retention.pipe @@ -7,85 +7,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_retention_current_quarter -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, groupUniqArray(memberId) AS currentQuarterMembers - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - {% if defined(endDate) %} - AND timestamp >= toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 1 QUARTER - ) - AND timestamp < toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - ) - {% else %} - AND timestamp >= toStartOfQuarter(now() - INTERVAL 1 QUARTER) - AND timestamp < toStartOfQuarter(now()) - {% end %} - GROUP BY channel - {% else %} - SELECT channel, groupUniqArray(memberId) AS currentQuarterMembers - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfQuarter(now() - INTERVAL 1 QUARTER) - AND timestamp < toStartOfQuarter(now()) - GROUP BY channel - {% end %} - -NODE repo_health_score_retention_previous_quarter -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, groupUniqArray(memberId) AS previousQuarterMembers - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - {% if defined(endDate) %} - AND timestamp >= toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 2 QUARTER - ) - AND timestamp < toStartOfQuarter( - parseDateTimeBestEffort( - {{ DateTime(endDate, description="Filter before date", required=False) }} - ) - - INTERVAL 1 QUARTER - ) - {% else %} - AND timestamp >= toStartOfQuarter(now() - INTERVAL 2 QUARTER) - AND timestamp < toStartOfQuarter(now() - INTERVAL 1 QUARTER) - {% end %} - GROUP BY channel - {% else %} - SELECT channel, groupUniqArray(memberId) AS previousQuarterMembers - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - memberId != '' - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) - AND timestamp >= toStartOfQuarter(now() - INTERVAL 2 QUARTER) - AND timestamp < toStartOfQuarter(now() - INTERVAL 1 QUARTER) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_retention.incl" "GROUP_COL=channel" "SOURCE_CURRENT=repo_health_score_retention_current_quarter" "SOURCE_PREVIOUS=repo_health_score_retention_previous_quarter" diff --git a/services/libs/tinybird/pipes/repo_health_score_security.pipe b/services/libs/tinybird/pipes/repo_health_score_security.pipe index 9bf0da591d..1ad336d4e8 100644 --- a/services/libs/tinybird/pipes/repo_health_score_security.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_security.pipe @@ -20,8 +20,7 @@ SQL > length(arrayFilter(x -> x['result'] = 'Passed', filteredAssessments)) AS passedAssessments FROM security_deduplicated_merged_ds WHERE - category NOT IN ('Documentation', 'Vulnerability Management') - AND repo != '' + category NOT IN ('Documentation', 'Vulnerability Management') AND repo != '' {% if defined(repoUrl) %} AND repo = {{ String(repoUrl, description="Repository URL", required=False) }} {% end %} @@ -41,8 +40,6 @@ SQL > NODE repo_health_score_security_score SQL > - SELECT - repo AS channel, - round(avg(percentage)) AS securityPercentage + SELECT repo AS channel, round(avg(percentage)) AS securityPercentage FROM repo_health_score_security_category GROUP BY repo diff --git a/services/libs/tinybird/pipes/repo_health_score_stars.pipe b/services/libs/tinybird/pipes/repo_health_score_stars.pipe index dafdd89aff..ee35715de2 100644 --- a/services/libs/tinybird/pipes/repo_health_score_stars.pipe +++ b/services/libs/tinybird/pipes/repo_health_score_stars.pipe @@ -8,30 +8,4 @@ DESCRIPTION > TAGS "Repository health", "Metrics" -NODE repo_health_score_stars_score -SQL > - % - {% if defined(repoUrl) %} - SELECT channel, count() AS stars - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE - type = 'star' - AND channel = {{ String(repoUrl, description="Repository URL", required=False) }} - AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - {% if defined(startDate) %} - AND timestamp - > {{ DateTime(startDate, description="Filter after date", required=False) }} - {% end %} - {% if defined(endDate) %} - AND timestamp - < {{ DateTime(endDate, description="Filter before date", required=False) }} - {% end %} - GROUP BY channel - {% else %} - SELECT channel, count() AS stars - FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE type = 'star' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) - GROUP BY channel - {% end %} - INCLUDE "../includes/health_score_stars.incl" "GROUP_COL=channel" "SOURCE_NODE=repo_health_score_stars_score" diff --git a/services/libs/tinybird/pipes/repositories_populated_copy.pipe b/services/libs/tinybird/pipes/repositories_populated_copy.pipe index c60415ec81..263eb455ee 100644 --- a/services/libs/tinybird/pipes/repositories_populated_copy.pipe +++ b/services/libs/tinybird/pipes/repositories_populated_copy.pipe @@ -3,17 +3,9 @@ DESCRIPTION > Returns base repository information from enabled, non-excluded repositories linked to insights projects SQL > - SELECT - r.id AS id, - r.url AS url, - r.segmentId AS segmentId, - r.insightsProjectId AS insightsProjectId + SELECT r.id AS id, r.url AS url, r.segmentId AS segmentId, r.insightsProjectId AS insightsProjectId FROM repositories r FINAL - WHERE - r.enabled = true - AND r.excluded = false - AND isNull(r.deletedAt) - AND r.insightsProjectId != '' + WHERE r.enabled = true AND r.excluded = false AND isNull (r.deletedAt) AND r.insightsProjectId != '' NODE repositories_populated_copy_contributor_org_counts DESCRIPTION > @@ -32,9 +24,7 @@ DESCRIPTION > Returns timestamp of first commit per repository (channel) SQL > - SELECT - channel, - minOrNull(timestamp) AS firstCommit + SELECT channel, minOrNull(timestamp) AS firstCommit FROM activityRelations_deduplicated_cleaned_bucket_union WHERE type = 'authored-commit' AND platform = 'git' AND timestamp > toDateTime('1971-01-01') GROUP BY channel @@ -44,9 +34,7 @@ DESCRIPTION > Returns software value per repository from softwareValueProjectCosts SQL > - SELECT - repoUrl, - sum(estimatedCost) AS softwareValue + SELECT repoUrl, sum(estimatedCost) AS softwareValue FROM softwareValueProjectCosts FINAL GROUP BY repoUrl From 50a13b2839b1d776ffcab4a4aa88fde9592b8ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Mon, 6 Apr 2026 13:43:36 +0100 Subject: [PATCH 6/6] fix: format script and format includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- .../health_score_active_contributors.incl | 18 ++++++++---- .../includes/health_score_active_days.incl | 18 ++++++++---- ...core_contributions_outside_work_hours.incl | 18 ++++++++---- .../health_score_contributor_dependency.incl | 22 +++++++++----- .../tinybird/includes/health_score_forks.incl | 18 ++++++++---- .../health_score_issues_resolution.incl | 18 ++++++++---- .../health_score_merge_lead_time.incl | 18 ++++++++---- .../health_score_organization_dependency.incl | 22 +++++++++----- .../includes/health_score_pull_requests.incl | 18 ++++++++---- .../includes/health_score_retention.incl | 29 ++++++++++++------- .../tinybird/includes/health_score_stars.incl | 18 ++++++++---- services/libs/tinybird/scripts/format.sh | 2 ++ 12 files changed, 147 insertions(+), 72 deletions(-) diff --git a/services/libs/tinybird/includes/health_score_active_contributors.incl b/services/libs/tinybird/includes/health_score_active_contributors.incl index 7ef04b49b4..1a6b0a6aa0 100644 --- a/services/libs/tinybird/includes/health_score_active_contributors.incl +++ b/services/libs/tinybird/includes/health_score_active_contributors.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, activeContributors, CASE - WHEN activeContributors BETWEEN 0 AND 1 THEN 0 - WHEN activeContributors BETWEEN 2 AND 3 THEN 1 - WHEN activeContributors BETWEEN 4 AND 6 THEN 2 - WHEN activeContributors BETWEEN 7 AND 10 THEN 3 - WHEN activeContributors BETWEEN 11 AND 20 THEN 4 - WHEN activeContributors > 20 THEN 5 + WHEN activeContributors BETWEEN 0 AND 1 + THEN 0 + WHEN activeContributors BETWEEN 2 AND 3 + THEN 1 + WHEN activeContributors BETWEEN 4 AND 6 + THEN 2 + WHEN activeContributors BETWEEN 7 AND 10 + THEN 3 + WHEN activeContributors BETWEEN 11 AND 20 + THEN 4 + WHEN activeContributors > 20 + THEN 5 ELSE 0 END AS activeContributorsBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_active_days.incl b/services/libs/tinybird/includes/health_score_active_days.incl index 3e1500dd0a..713542f2ce 100644 --- a/services/libs/tinybird/includes/health_score_active_days.incl +++ b/services/libs/tinybird/includes/health_score_active_days.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, activeDaysCount, CASE - WHEN activeDaysCount BETWEEN 0 AND 5 THEN 0 - WHEN activeDaysCount BETWEEN 6 AND 10 THEN 1 - WHEN activeDaysCount BETWEEN 11 AND 15 THEN 2 - WHEN activeDaysCount BETWEEN 16 AND 20 THEN 3 - WHEN activeDaysCount BETWEEN 21 AND 26 THEN 4 - WHEN activeDaysCount > 26 THEN 5 + WHEN activeDaysCount BETWEEN 0 AND 5 + THEN 0 + WHEN activeDaysCount BETWEEN 6 AND 10 + THEN 1 + WHEN activeDaysCount BETWEEN 11 AND 15 + THEN 2 + WHEN activeDaysCount BETWEEN 16 AND 20 + THEN 3 + WHEN activeDaysCount BETWEEN 21 AND 26 + THEN 4 + WHEN activeDaysCount > 26 + THEN 5 ELSE 0 END AS activeDaysBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl b/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl index c52cee5a41..cb6e5a2d05 100644 --- a/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl +++ b/services/libs/tinybird/includes/health_score_contributions_outside_work_hours.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, contributionsOutsideWorkHours, CASE - WHEN contributionsOutsideWorkHours >= 75 THEN 0 - WHEN contributionsOutsideWorkHours BETWEEN 50 AND 74 THEN 1 - WHEN contributionsOutsideWorkHours BETWEEN 40 AND 49 THEN 2 - WHEN contributionsOutsideWorkHours BETWEEN 30 AND 39 THEN 3 - WHEN contributionsOutsideWorkHours BETWEEN 20 AND 29 THEN 4 - WHEN contributionsOutsideWorkHours BETWEEN 0 AND 19 THEN 5 + WHEN contributionsOutsideWorkHours >= 75 + THEN 0 + WHEN contributionsOutsideWorkHours BETWEEN 50 AND 74 + THEN 1 + WHEN contributionsOutsideWorkHours BETWEEN 40 AND 49 + THEN 2 + WHEN contributionsOutsideWorkHours BETWEEN 30 AND 39 + THEN 3 + WHEN contributionsOutsideWorkHours BETWEEN 20 AND 29 + THEN 4 + WHEN contributionsOutsideWorkHours BETWEEN 0 AND 19 + THEN 5 ELSE 0 END AS contributionsOutsideWorkHoursBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_contributor_dependency.incl b/services/libs/tinybird/includes/health_score_contributor_dependency.incl index 67c4a73be9..c77a7e72de 100644 --- a/services/libs/tinybird/includes/health_score_contributor_dependency.incl +++ b/services/libs/tinybird/includes/health_score_contributor_dependency.incl @@ -5,7 +5,9 @@ SQL > $GROUP_COL, memberId, contributionCount, - ROUND(contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY $GROUP_COL), 2) AS contributionPercentage + ROUND( + contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY $GROUP_COL), 2 + ) AS contributionPercentage FROM $SOURCE_NODE ORDER BY contributionPercentage DESC @@ -42,12 +44,18 @@ SQL > contributorDependencyCount, contributorDependencyPercentage, CASE - WHEN contributorDependencyCount BETWEEN 0 AND 1 THEN 0 - WHEN contributorDependencyCount = 2 THEN 1 - WHEN contributorDependencyCount BETWEEN 3 AND 4 THEN 2 - WHEN contributorDependencyCount BETWEEN 5 AND 6 THEN 3 - WHEN contributorDependencyCount BETWEEN 7 AND 9 THEN 4 - WHEN contributorDependencyCount > 9 THEN 5 + WHEN contributorDependencyCount BETWEEN 0 AND 1 + THEN 0 + WHEN contributorDependencyCount = 2 + THEN 1 + WHEN contributorDependencyCount BETWEEN 3 AND 4 + THEN 2 + WHEN contributorDependencyCount BETWEEN 5 AND 6 + THEN 3 + WHEN contributorDependencyCount BETWEEN 7 AND 9 + THEN 4 + WHEN contributorDependencyCount > 9 + THEN 5 ELSE 0 END AS contributorDependencyBenchmark FROM health_score_contributor_dependency_score diff --git a/services/libs/tinybird/includes/health_score_forks.incl b/services/libs/tinybird/includes/health_score_forks.incl index a487d13d35..d348ffa35b 100644 --- a/services/libs/tinybird/includes/health_score_forks.incl +++ b/services/libs/tinybird/includes/health_score_forks.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, forks, CASE - WHEN forks BETWEEN 0 AND 4 THEN 0 - WHEN forks BETWEEN 5 AND 9 THEN 1 - WHEN forks BETWEEN 10 AND 19 THEN 2 - WHEN forks BETWEEN 20 AND 39 THEN 3 - WHEN forks BETWEEN 40 AND 79 THEN 4 - WHEN forks >= 80 THEN 5 + WHEN forks BETWEEN 0 AND 4 + THEN 0 + WHEN forks BETWEEN 5 AND 9 + THEN 1 + WHEN forks BETWEEN 10 AND 19 + THEN 2 + WHEN forks BETWEEN 20 AND 39 + THEN 3 + WHEN forks BETWEEN 40 AND 79 + THEN 4 + WHEN forks >= 80 + THEN 5 ELSE 0 END AS forksBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_issues_resolution.incl b/services/libs/tinybird/includes/health_score_issues_resolution.incl index 098cfea164..ffb68d0089 100644 --- a/services/libs/tinybird/includes/health_score_issues_resolution.incl +++ b/services/libs/tinybird/includes/health_score_issues_resolution.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, issueResolution, CASE - WHEN issueResolution >= 61 THEN 0 - WHEN issueResolution BETWEEN 51 AND 60 THEN 1 - WHEN issueResolution BETWEEN 36 AND 50 THEN 2 - WHEN issueResolution BETWEEN 22 AND 35 THEN 3 - WHEN issueResolution BETWEEN 8 AND 21 THEN 4 - WHEN issueResolution BETWEEN 0 AND 7 THEN 5 + WHEN issueResolution >= 61 + THEN 0 + WHEN issueResolution BETWEEN 51 AND 60 + THEN 1 + WHEN issueResolution BETWEEN 36 AND 50 + THEN 2 + WHEN issueResolution BETWEEN 22 AND 35 + THEN 3 + WHEN issueResolution BETWEEN 8 AND 21 + THEN 4 + WHEN issueResolution BETWEEN 0 AND 7 + THEN 5 ELSE 0 END AS issueResolutionBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_merge_lead_time.incl b/services/libs/tinybird/includes/health_score_merge_lead_time.incl index 66ba4eb496..6cd489624e 100644 --- a/services/libs/tinybird/includes/health_score_merge_lead_time.incl +++ b/services/libs/tinybird/includes/health_score_merge_lead_time.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, mergeLeadTime, CASE - WHEN mergeLeadTime >= 30 THEN 0 - WHEN mergeLeadTime BETWEEN 21 AND 30 THEN 1 - WHEN mergeLeadTime BETWEEN 15 AND 20 THEN 2 - WHEN mergeLeadTime BETWEEN 7 AND 14 THEN 3 - WHEN mergeLeadTime BETWEEN 3 AND 6 THEN 4 - WHEN mergeLeadTime BETWEEN 0 AND 2 THEN 5 + WHEN mergeLeadTime >= 30 + THEN 0 + WHEN mergeLeadTime BETWEEN 21 AND 30 + THEN 1 + WHEN mergeLeadTime BETWEEN 15 AND 20 + THEN 2 + WHEN mergeLeadTime BETWEEN 7 AND 14 + THEN 3 + WHEN mergeLeadTime BETWEEN 3 AND 6 + THEN 4 + WHEN mergeLeadTime BETWEEN 0 AND 2 + THEN 5 ELSE 0 END AS mergeLeadTimeBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_organization_dependency.incl b/services/libs/tinybird/includes/health_score_organization_dependency.incl index 03c5dc960d..c33b22a04a 100644 --- a/services/libs/tinybird/includes/health_score_organization_dependency.incl +++ b/services/libs/tinybird/includes/health_score_organization_dependency.incl @@ -5,7 +5,9 @@ SQL > $GROUP_COL, organizationId, contributionCount, - (contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY $GROUP_COL)) AS contributionPercentage + ( + contributionCount * 100.0 / SUM(contributionCount) OVER (PARTITION BY $GROUP_COL) + ) AS contributionPercentage FROM $SOURCE_NODE ORDER BY contributionPercentage DESC @@ -42,12 +44,18 @@ SQL > organizationDependencyCount, organizationDependencyPercentage, CASE - WHEN organizationDependencyCount BETWEEN 0 AND 1 THEN 0 - WHEN organizationDependencyCount = 2 THEN 1 - WHEN organizationDependencyCount = 3 THEN 2 - WHEN organizationDependencyCount BETWEEN 4 AND 5 THEN 3 - WHEN organizationDependencyCount BETWEEN 6 AND 7 THEN 4 - WHEN organizationDependencyCount >= 8 THEN 5 + WHEN organizationDependencyCount BETWEEN 0 AND 1 + THEN 0 + WHEN organizationDependencyCount = 2 + THEN 1 + WHEN organizationDependencyCount = 3 + THEN 2 + WHEN organizationDependencyCount BETWEEN 4 AND 5 + THEN 3 + WHEN organizationDependencyCount BETWEEN 6 AND 7 + THEN 4 + WHEN organizationDependencyCount >= 8 + THEN 5 ELSE 0 END AS organizationDependencyBenchmark FROM health_score_organization_dependency_score diff --git a/services/libs/tinybird/includes/health_score_pull_requests.incl b/services/libs/tinybird/includes/health_score_pull_requests.incl index 70567d3613..e0ad38d74c 100644 --- a/services/libs/tinybird/includes/health_score_pull_requests.incl +++ b/services/libs/tinybird/includes/health_score_pull_requests.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, pullRequests, CASE - WHEN pullRequests BETWEEN 0 AND 1 THEN 0 - WHEN pullRequests BETWEEN 2 AND 3 THEN 1 - WHEN pullRequests BETWEEN 4 AND 7 THEN 2 - WHEN pullRequests BETWEEN 8 AND 15 THEN 3 - WHEN pullRequests BETWEEN 16 AND 30 THEN 4 - WHEN pullRequests >= 31 THEN 5 + WHEN pullRequests BETWEEN 0 AND 1 + THEN 0 + WHEN pullRequests BETWEEN 2 AND 3 + THEN 1 + WHEN pullRequests BETWEEN 4 AND 7 + THEN 2 + WHEN pullRequests BETWEEN 8 AND 15 + THEN 3 + WHEN pullRequests BETWEEN 16 AND 30 + THEN 4 + WHEN pullRequests >= 31 + THEN 5 ELSE 0 END AS pullRequestsBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/includes/health_score_retention.incl b/services/libs/tinybird/includes/health_score_retention.incl index a44f848ad3..acf13549e1 100644 --- a/services/libs/tinybird/includes/health_score_retention.incl +++ b/services/libs/tinybird/includes/health_score_retention.incl @@ -6,10 +6,13 @@ SQL > if( length(coalesce(prev.previousQuarterMembers, [])) > 0, round( - 100 * length(arrayIntersect( - coalesce(cur.currentQuarterMembers, []), - coalesce(prev.previousQuarterMembers, []) - )) / length(coalesce(prev.previousQuarterMembers, [])) + 100 * length( + arrayIntersect( + coalesce(cur.currentQuarterMembers, []), + coalesce(prev.previousQuarterMembers, []) + ) + ) + / length(coalesce(prev.previousQuarterMembers, [])) ), 0 ) AS retentionRate @@ -23,12 +26,18 @@ SQL > $GROUP_COL, retentionRate, CASE - WHEN retentionRate BETWEEN 0 AND 2 THEN 0 - WHEN retentionRate BETWEEN 3 AND 5 THEN 1 - WHEN retentionRate BETWEEN 6 AND 9 THEN 2 - WHEN retentionRate BETWEEN 10 AND 14 THEN 3 - WHEN retentionRate BETWEEN 15 AND 19 THEN 4 - WHEN retentionRate >= 20 THEN 5 + WHEN retentionRate BETWEEN 0 AND 2 + THEN 0 + WHEN retentionRate BETWEEN 3 AND 5 + THEN 1 + WHEN retentionRate BETWEEN 6 AND 9 + THEN 2 + WHEN retentionRate BETWEEN 10 AND 14 + THEN 3 + WHEN retentionRate BETWEEN 15 AND 19 + THEN 4 + WHEN retentionRate >= 20 + THEN 5 ELSE 0 END AS retentionBenchmark FROM health_score_retention_counts diff --git a/services/libs/tinybird/includes/health_score_stars.incl b/services/libs/tinybird/includes/health_score_stars.incl index 1a87169ae0..2b379c7a13 100644 --- a/services/libs/tinybird/includes/health_score_stars.incl +++ b/services/libs/tinybird/includes/health_score_stars.incl @@ -5,12 +5,18 @@ SQL > $GROUP_COL, stars, CASE - WHEN stars BETWEEN 0 AND 9 THEN 0 - WHEN stars BETWEEN 10 AND 49 THEN 1 - WHEN stars BETWEEN 50 AND 199 THEN 2 - WHEN stars BETWEEN 200 AND 499 THEN 3 - WHEN stars BETWEEN 500 AND 999 THEN 4 - WHEN stars >= 1000 THEN 5 + WHEN stars BETWEEN 0 AND 9 + THEN 0 + WHEN stars BETWEEN 10 AND 49 + THEN 1 + WHEN stars BETWEEN 50 AND 199 + THEN 2 + WHEN stars BETWEEN 200 AND 499 + THEN 3 + WHEN stars BETWEEN 500 AND 999 + THEN 4 + WHEN stars >= 1000 + THEN 5 ELSE 0 END AS starsBenchmark FROM $SOURCE_NODE diff --git a/services/libs/tinybird/scripts/format.sh b/services/libs/tinybird/scripts/format.sh index cac577f95d..89fa182ec7 100755 --- a/services/libs/tinybird/scripts/format.sh +++ b/services/libs/tinybird/scripts/format.sh @@ -2,6 +2,7 @@ PIPES_FOLDER="../pipes" DATA_SOURCES_FOLDER="../datasources" +INCLUDES_FOLDER="../includes" show_help() { cat << EOF @@ -80,6 +81,7 @@ format_files_in_folder() { format_files_in_folder "$PIPES_FOLDER" format_files_in_folder "$DATA_SOURCES_FOLDER" +format_files_in_folder "$INCLUDES_FOLDER" # Only wait for background processes in parallel mode [ "$SEQUENTIAL" = false ] && wait