Skip to content

Commit

Permalink
🦢 Switch language package (#296)
Browse files Browse the repository at this point in the history
* Replace ngram package

* Remove fallback to metadata language

* Add language predition to the update mapping

* Changelog and version bump
  • Loading branch information
WRadoslaw committed Feb 9, 2024
1 parent 33df6e2 commit 61c0909
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 22 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# 3.3.0

## Schema
- `orionLanguage` property has been added.

## Mappings
- Language detection is used to populate new property on video update and creation.

# 3.2.1

## Misc
Expand Down
35 changes: 21 additions & 14 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "orion",
"version": "3.2.1",
"version": "3.3.0",
"engines": {
"node": ">=16"
},
Expand Down Expand Up @@ -76,14 +76,14 @@
"graphql-tools": "^8.3.11",
"handlebars": "^4.7.7",
"haversine-distance": "^1.2.1",
"languagedetect": "^2.0.0",
"lodash": "^4.17.21",
"node-cache": "^5.1.2",
"node-schedule": "^2.1.1",
"p-limit": "3.1.0",
"patch-package": "^6.5.0",
"pg": "8.8.0",
"swagger-ui-express": "^4.6.2",
"tinyld": "^1.3.4",
"type-graphql": "^1.2.0-rc.1",
"typeorm": "^0.3.11",
"ua-parser-js": "^1.0.34",
Expand Down
5 changes: 4 additions & 1 deletion src/mappings/content/video.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ export async function processVideoCreatedEvent({
}

const languageText = [video.title ?? '', video.description ?? ''].join(' ')
video.orionLanguage = predictLanguage(languageText) ?? video.language
video.orionLanguage = predictLanguage(languageText)

channel.totalVideosCreated += 1

Expand Down Expand Up @@ -183,6 +183,9 @@ export async function processVideoUpdatedEvent({
)
}

const languageText = [video.title ?? '', video.description ?? ''].join(' ')
video.orionLanguage = predictLanguage(languageText)

if (autoIssueNft) {
await processNft(overlay, block, indexInBlock, extrinsicHash, video, contentActor, autoIssueNft)
}
Expand Down
7 changes: 2 additions & 5 deletions src/utils/language.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import LanguageDetect from 'languagedetect'
import { detect } from 'tinyld'

function cleanString(input: string): string {
// Remove symbols, numbers, and emojis
Expand All @@ -12,11 +12,8 @@ function cleanString(input: string): string {
return cleanedString.toLowerCase()
}

const lngDetector = new LanguageDetect()
lngDetector.setLanguageType('iso2')

// Example usage
export const predictLanguage = (text: string): string | undefined => {
const cleanedText = cleanString(text)
return lngDetector.detect(cleanedText, 1)[0]?.[0]
return detect(cleanedText) || undefined
}

0 comments on commit 61c0909

Please sign in to comment.