Skip to content

Commit

Permalink
fix: include max 1 video per channel in homepage videos (#313)
Browse files Browse the repository at this point in the history
* fix: include max 1 video per channel in homepage videos

* update setOrionLanguage Migration script

* format updateVideoRelevanceValue SQL query

* fix: use UTC midnight epoch instead of current epoch to calculate video relevance score

* bump package version and update CHANGELOG

* fix: lint bug
  • Loading branch information
zeeshanakram3 committed Mar 12, 2024
1 parent 8e5b98b commit 08fd96c
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 92 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# 3.7.0

## Schema changes
- Added `isShortDerived` field to `Video` entity indicating whether a video is a short format, vertical video or not. This field is computed in the mappings based on the video dimensions and duration when `isShort` is not set in the metadata.

## Misc

- update `setOrionLanguage` Custom migration script.

## Bug Fixes:
- Added fix to improve the accuracy of `Video.orionLanguage` field by reworking the `predictVideoLanguage` function in `src/utils/language.ts`
- Use UTC midnight epoch instead of current epoch to calculate video relevance score in `VideoRelevanceManager`


# 3.6.0

## Schema changes
Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "orion",
"version": "3.6.0",
"version": "3.7.0",
"engines": {
"node": ">=16"
},
Expand Down
108 changes: 64 additions & 44 deletions src/utils/VideoRelevanceManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export class VideoRelevanceManager {
}: VideoRelevanceManagerLoops): Promise<void> {
const em = await globalEm

this.updateScheduledLoop(em, scheduledUpdateLoopTime)
this.updateLoop(em, scheduledUpdateLoopTime)
.then(() => {
/* Do nothing */
})
Expand All @@ -33,7 +33,7 @@ export class VideoRelevanceManager {
process.exit(-1)
})

this.updateFullUpdateLoop(em, fullUpdateLoopTime)
this.updateLoop(em, fullUpdateLoopTime)
.then(() => {
/* Do nothing */
})
Expand Down Expand Up @@ -74,52 +74,72 @@ export class VideoRelevanceManager {

await em.query(`
WITH videos_with_weight AS (
SELECT
video.id as videoId,
channel.id as channelId,
(ROUND((
(extract(epoch from now()) - ${wtEpoch})
/ ${NEWNESS_SECONDS_DIVIDER} * ${newnessWeight * -1}
+ (views_num * ${viewsWeight})
+ (comments_count * ${commentsWeight})
+ (reactions_count * ${reactionsWeight}))
* COALESCE(channel.channel_weight, ${channelWeight}),2)) as videoRelevance
FROM video
INNER JOIN channel ON video.channel_id = channel.id
${
forceUpdateAll
? ''
: `WHERE video.channel_id in (${[...this.channelsToUpdate.values()]
.map((id) => `'${id}'`)
.join(', ')})`
}
ORDER BY video.id),
SELECT
video.id as videoId,
channel.id as channelId,
(ROUND((
(extract(epoch FROM date_trunc('day', now() at time zone 'UTC')) - ${wtEpoch})
/ ${NEWNESS_SECONDS_DIVIDER} * ${newnessWeight * -1}
+ (views_num * ${viewsWeight})
+ (comments_count * ${commentsWeight})
+ (reactions_count * ${reactionsWeight}))
* COALESCE(channel.channel_weight, ${channelWeight}), 2)) as videoRelevance
FROM
video
INNER JOIN channel ON video.channel_id = channel.id
${
forceUpdateAll
? ''
: `WHERE video.channel_id in (${[...this.channelsToUpdate.values()]
.map((id) => `'${id}'`)
.join(', ')})`
}
ORDER BY
video.id
),
top_channel_score as (
SELECT
channel.id as channelId,
MAX(videoCte.videoRelevance) as maxChannelRelevance
FROM channel
INNER JOIN videos_with_weight as videoCte on videoCte.channelId = channel.id
GROUP BY channel.id)
UPDATE video
SET video_relevance = COALESCE(topChannelVideo.maxChannelRelevance, 1)
FROM videos_with_weight as videoCte
LEFT JOIN top_channel_score as topChannelVideo on topChannelVideo.channelId = videoCte.channelId and topChannelVideo.maxChannelRelevance = videoCte.videoRelevance
WHERE video.id = videoCte.videoId;
`)
this.channelsToUpdate.clear()
}
SELECT
channel.id as channelId,
MAX(videos_with_weight.videoRelevance) as maxChannelRelevance
FROM
channel
INNER JOIN videos_with_weight on videos_with_weight.channelId = channel.id
GROUP BY
channel.id
),
private async updateScheduledLoop(em: EntityManager, intervalMs: number): Promise<void> {
while (true) {
await this.updateVideoRelevanceValue(em)
await new Promise((resolve) => setTimeout(resolve, intervalMs))
}
ranked_videos AS (
SELECT
videos_with_weight.videoId,
topChannelVideo.maxChannelRelevance,
ROW_NUMBER() OVER (
PARTITION BY videos_with_weight.channelId
ORDER BY
videos_with_weight.videoRelevance DESC,
videos_with_weight.videoId
) as rank
FROM
videos_with_weight
LEFT JOIN top_channel_score as topChannelVideo ON videos_with_weight.channelId = topChannelVideo.channelId
)
UPDATE
video
SET
video_relevance = CASE
WHEN ranked_videos.rank = 1 THEN ranked_videos.maxChannelRelevance
ELSE 1
END
FROM
ranked_videos
WHERE
video.id = ranked_videos.videoId;
`)
this.channelsToUpdate.clear()
}

private async updateFullUpdateLoop(em: EntityManager, intervalMs: number): Promise<void> {
private async updateLoop(em: EntityManager, intervalMs: number): Promise<void> {
while (true) {
await this.updateVideoRelevanceValue(em)
await new Promise((resolve) => setTimeout(resolve, intervalMs))
Expand Down
12 changes: 6 additions & 6 deletions src/utils/auth.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import { createLogger } from '@subsquid/logger'
import { Request, Response } from 'express'
import { EntityManager, FindOptionsWhere, IsNull, MoreThan } from 'typeorm'
import { Account, Session } from '../model'
import {
CachedSessionData,
sessionCache,
SESSION_CACHE_MINIMUM_TTL,
SESSION_CACHE_EXPIRY_TTL_MARGIN,
SESSION_CACHE_MINIMUM_TTL,
sessionCache,
} from './cache'
import { config, ConfigVariable } from './config'
import { getUserAgentData } from './http'
import { createLogger } from '@subsquid/logger'
import { globalEm } from './globalEm'
import { ConfigVariable, config } from './config'
import { uniqueId } from './crypto'
import { globalEm } from './globalEm'
import { getUserAgentData } from './http'

const authLogger = createLogger('authentication')

Expand Down
82 changes: 43 additions & 39 deletions src/utils/customMigrations/setOrionLanguage.ts
Original file line number Diff line number Diff line change
@@ -1,52 +1,56 @@
import { createLogger } from '@subsquid/logger'
import { IsNull } from 'typeorm'
import { Video } from '../../model'
import { EntityManager } from 'typeorm'
import { globalEm } from '../globalEm'
import { predictVideoLanguage } from '../language'

const logger = createLogger('setOrionLanguage')
async function detectVideoLanguage() {
const em: EntityManager = await globalEm
const videos: any[] = await em.query(`
SELECT id, title, description
FROM admin.video
`)

async function setOrionLanguage() {
const em = await globalEm
// Temporary storage for batch update data
const updates: any[] = []

const batchSize = 10000
let offset = 0
let hasMore = true

while (hasMore) {
const videos = await em.find(Video, {
where: { orionLanguage: IsNull() },
order: { id: 'ASC' },
take: batchSize,
skip: offset,
for (const [i, video] of videos.entries()) {
const orionLanguage = predictVideoLanguage({
title: video.title,
description: video.description,
})

if (videos.length === 0) {
hasMore = false
} else {
const updates = videos.map((video) => {
video.orionLanguage = predictVideoLanguage({
title: video.title ?? '',
description: video.description ?? '',
})
return video
})

// Save all updates in a single transaction
await em.transaction(async (transactionalEntityManager) => {
await transactionalEntityManager.save(updates)
})

logger.info(`Updated ${updates.length} videos.`)

offset += videos.length // Prepare the offset for the next batch
}
// Instead of updating immediately, push the update data into the array
updates.push({ orionLanguage, id: video.id })
console.log(i)
}

// Define batch size
const batchSize = 1000 // Adjust the batch size based on your database and network performance

for (let i = 0; i < updates.length; i += batchSize) {
const batch = updates.slice(i, i + batchSize)

// Prepare the query and parameters for batch update
const query = `
UPDATE admin.video AS v SET
orion_language = c.orion_language
FROM (VALUES ${batch
.map((_, idx) => `($${idx * 2 + 1}, $${idx * 2 + 2})`)
.join(',')}) AS c(orion_language, id)
WHERE c.id = v.id;
`

const queryParams = batch.flatMap((update) => [update.orionLanguage, update.id])

// Execute batch update
await em.query(query, queryParams)
}

console.log(`Updated languages for ${videos.length} videos`)
}

setOrionLanguage()
.then(() => logger.info('Update process completed.'))
detectVideoLanguage()
.then(() => console.log('Update process completed.'))
.catch(() => {
logger.error('process failed')
console.error('process failed')
process.exit(1)
})

0 comments on commit 08fd96c

Please sign in to comment.