From 71163fc0f1ebc2f09f3a8c8c4c059cf07462e951 Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Thu, 11 Apr 2024 18:17:54 -0500 Subject: [PATCH 1/7] Increase intervals for long stats tracking --- src/services/stats.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/stats.ts b/src/services/stats.ts index ef068649..9f6fff6b 100644 --- a/src/services/stats.ts +++ b/src/services/stats.ts @@ -64,7 +64,7 @@ export const queryUniquePageviews = async (pagePath: string, timeRange) => { const startDate = new Date(timeRange === 'allTime' ? '2017-01-01' : offsetDate(startDateOffset)) const endDate = new Date(offsetDate()) - const numberOfIntervals = ['allTime'].includes(timeRange) ? 60 : ['year'].includes(timeRange) ? 12 : 1 + const numberOfIntervals = ['allTime'].includes(timeRange) ? 120 : ['year'].includes(timeRange) ? 24 : 1 const dateIntervals = splitDateIntoEqualIntervals(startDate, endDate, numberOfIntervals) let data: any[] = [] From d6efe7307a639274e0d5c936eb04e0e474fea30b Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Tue, 16 Apr 2024 14:50:20 -0500 Subject: [PATCH 2/7] Disable camelcase linting rule --- .eslintrc | 1 + 1 file changed, 1 insertion(+) diff --git a/.eslintrc b/.eslintrc index 8b04c236..16eb6fb0 100644 --- a/.eslintrc +++ b/.eslintrc @@ -7,6 +7,7 @@ "@typescript-eslint" ], "rules": { + "@typescript-eslint/camelcase": 0, "@typescript-eslint/explicit-function-return-type": 0, "@typescript-eslint/member-delimiter-style": 0, "@typescript-eslint/no-explicit-any": 0, From 3a43dcf72ac6ae4cc278a8753cb5548c9c1bffd1 Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Tue, 16 Apr 2024 15:05:53 -0500 Subject: [PATCH 3/7] Add abort parser logic for flag_status --- migrations/0055_podcasts_flag_status.sql | 4 +++ src/entities/podcast.ts | 10 +++++++ src/services/parser.ts | 36 ++++++++++++++++++------ 3 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 migrations/0055_podcasts_flag_status.sql diff --git a/migrations/0055_podcasts_flag_status.sql b/migrations/0055_podcasts_flag_status.sql new file mode 100644 index 00000000..fcec5f19 --- /dev/null +++ b/migrations/0055_podcasts_flag_status.sql @@ -0,0 +1,4 @@ +CREATE TYPE flag_status AS ENUM ('none', 'spam', 'takedown', 'other'); + +ALTER TABLE podcasts +ADD COLUMN flag_status flag_status DEFAULT 'none'; diff --git a/src/entities/podcast.ts b/src/entities/podcast.ts index e2d4dab1..fb048e68 100644 --- a/src/entities/podcast.ts +++ b/src/entities/podcast.ts @@ -24,6 +24,8 @@ type Funding = { value?: string } +type PodcastFlagStatus = 'none' | 'spam' | 'takedown' | 'other' + @Index(['hasVideo', 'pastAllTimeTotalUniquePageviews']) @Index(['hasVideo', 'pastHourTotalUniquePageviews']) @Index(['hasVideo', 'pastDayTotalUniquePageviews']) @@ -88,6 +90,14 @@ export class Podcast { @Column({ nullable: true }) feedLastUpdated?: Date + @Index() + @Column({ + type: 'enum', + enum: ['none', 'spam', 'takedown', 'other'], + default: 'none' + }) + flag_status: PodcastFlagStatus + @Column('simple-json', { nullable: true }) funding: Funding[] diff --git a/src/services/parser.ts b/src/services/parser.ts index 0b19ce8d..82d66214 100644 --- a/src/services/parser.ts +++ b/src/services/parser.ts @@ -86,6 +86,28 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = }, abortTimeLimit) try { + let podcast = new Podcast() + if (feedUrl.podcast) { + logPerformance('feedUrl.podcast getPodcast', _logStart) + const savedPodcast = await getPodcast(feedUrl.podcast.id, false, allowNonPublic) + logPerformance('feedUrl.podcast getPodcast', _logEnd) + if (!savedPodcast) throw Error('Invalid podcast id provided.') + podcast = savedPodcast + } + + if (podcast.flag_status === 'spam') { + console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = spam`) + return + } else if (podcast.flag_status === 'takedown') { + console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = takedown`) + return + } else if (podcast.flag_status === 'other') { + console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = other`) + return + } + + const podcastRepo = getRepository(Podcast) + /* Temporary: Stop parsing papi.qingting.fm domain until mediaUrl/guid switch is completed */ const isQingTing = feedUrl.url.indexOf('qingting.fm') > -1 if (isQingTing) { @@ -294,13 +316,11 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = const parsedEpisodes = parsedFeed.items.map(itemCompat) const parsedLiveItemEpisodes = meta.liveItems.map(liveItemCompatToParsedEpisode) - let podcast = new Podcast() - if (feedUrl.podcast) { - logPerformance('feedUrl.podcast getPodcast', _logStart) - const savedPodcast = await getPodcast(feedUrl.podcast.id, false, allowNonPublic) - logPerformance('feedUrl.podcast getPodcast', _logEnd) - if (!savedPodcast) throw Error('Invalid podcast id provided.') - podcast = savedPodcast + if (parsedEpisodes?.length >= 1 || parsedLiveItemEpisodes?.length >= 1) { + console.log('Aborting parser: too many episodes. Marking podcast as spam.') + podcast.flag_status = 'spam' + await podcastRepo.save(podcast) + return } logPerformance('podcast id', podcast.id) @@ -484,8 +504,6 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = } } - const podcastRepo = getRepository(Podcast) - logPerformance('podcastRepo.save', _logStart) await podcastRepo.save(podcast) logPerformance('podcastRepo.save', _logEnd) From df31e8ac1137e2fa9ca9d05451830a1d490012dd Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Tue, 16 Apr 2024 15:06:13 -0500 Subject: [PATCH 4/7] Bump to version 4.16.6 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d4c276a0..6898b0c4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "podverse-api", - "version": "4.16.5", + "version": "4.16.6", "description": "Data API, database migration scripts, and backend services for all Podverse models.", "contributors": [ "Mitch Downey" From 968577bb7015b9c70b68519b30060074319c6091 Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Wed, 17 Apr 2024 00:15:32 -0500 Subject: [PATCH 5/7] Add create int_id column note --- migrations/0028_episodes_ids.sql | 6 ++++++ migrations/0030_int_id_indexes.sql | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/migrations/0028_episodes_ids.sql b/migrations/0028_episodes_ids.sql index d1529459..a49ce317 100644 --- a/migrations/0028_episodes_ids.sql +++ b/migrations/0028_episodes_ids.sql @@ -7,6 +7,12 @@ ALTER TABLE episodes ALTER SEQUENCE episodes_int_id_seq OWNED BY episodes.int_id; COMMIT; +/* + NOTE: the int_id columns exist mainly so that the Manticore index jobs have + a reliable numeric identifier for selecting ~10000 rows at a time as part + of the index operation. +*/ + CREATE UNIQUE INDEX CONCURRENTLY episodes_int_id_key ON episodes (int_id); -- Use a script to report the UPDATE and VACUUM commands until not episodes with int_ids are left diff --git a/migrations/0030_int_id_indexes.sql b/migrations/0030_int_id_indexes.sql index 97810d62..8168f8b4 100644 --- a/migrations/0030_int_id_indexes.sql +++ b/migrations/0030_int_id_indexes.sql @@ -1,3 +1,9 @@ +/* + NOTE: the int_id columns exist mainly so that the Manticore index jobs have + a reliable numeric identifier for selecting ~10000 rows at a time as part + of the index operation. +*/ + CREATE INDEX CONCURRENTLY "authors_int_id_index" ON "authors" ("int_id"); CREATE INDEX CONCURRENTLY "categories_int_id_index" ON "categories" ("int_id"); CREATE INDEX CONCURRENTLY "feedUrls_int_id_index" ON "feedUrls" ("int_id"); From 217f49128faa3538fdcb8eed817f3807aecaae43 Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Wed, 17 Apr 2024 00:19:34 -0500 Subject: [PATCH 6/7] Add podcast.flag_status always-allow to enum --- migrations/0055_podcasts_flag_status.sql | 2 +- src/entities/podcast.ts | 4 ++-- src/services/parser.ts | 9 +++++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/migrations/0055_podcasts_flag_status.sql b/migrations/0055_podcasts_flag_status.sql index fcec5f19..09857a99 100644 --- a/migrations/0055_podcasts_flag_status.sql +++ b/migrations/0055_podcasts_flag_status.sql @@ -1,4 +1,4 @@ -CREATE TYPE flag_status AS ENUM ('none', 'spam', 'takedown', 'other'); +CREATE TYPE flag_status AS ENUM ('none', 'spam', 'takedown', 'other', 'always-allow'); ALTER TABLE podcasts ADD COLUMN flag_status flag_status DEFAULT 'none'; diff --git a/src/entities/podcast.ts b/src/entities/podcast.ts index fb048e68..0cf2a840 100644 --- a/src/entities/podcast.ts +++ b/src/entities/podcast.ts @@ -24,7 +24,7 @@ type Funding = { value?: string } -type PodcastFlagStatus = 'none' | 'spam' | 'takedown' | 'other' +type PodcastFlagStatus = 'none' | 'spam' | 'takedown' | 'other' | 'always-allow' @Index(['hasVideo', 'pastAllTimeTotalUniquePageviews']) @Index(['hasVideo', 'pastHourTotalUniquePageviews']) @@ -93,7 +93,7 @@ export class Podcast { @Index() @Column({ type: 'enum', - enum: ['none', 'spam', 'takedown', 'other'], + enum: ['none', 'spam', 'takedown', 'other', 'always-allow'], default: 'none' }) flag_status: PodcastFlagStatus diff --git a/src/services/parser.ts b/src/services/parser.ts index 82d66214..48428f40 100644 --- a/src/services/parser.ts +++ b/src/services/parser.ts @@ -95,7 +95,9 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = podcast = savedPodcast } - if (podcast.flag_status === 'spam') { + if (podcast.flag_status === 'always-allow') { + // do nothing + } else if (podcast.flag_status === 'spam') { console.log(`Aborting parser: podcast id ${podcast.id} marked as flag_status = spam`) return } else if (podcast.flag_status === 'takedown') { @@ -316,7 +318,10 @@ export const parseFeedUrl = async (feedUrl, forceReparsing = false, cacheBust = const parsedEpisodes = parsedFeed.items.map(itemCompat) const parsedLiveItemEpisodes = meta.liveItems.map(liveItemCompatToParsedEpisode) - if (parsedEpisodes?.length >= 1 || parsedLiveItemEpisodes?.length >= 1) { + if ( + podcast.flag_status !== 'always-allow' && + (parsedEpisodes?.length >= 1 || parsedLiveItemEpisodes?.length >= 1) + ) { console.log('Aborting parser: too many episodes. Marking podcast as spam.') podcast.flag_status = 'spam' await podcastRepo.save(podcast) From 402092a58c2fc1754df9f2da68c7dc449026383d Mon Sep 17 00:00:00 2001 From: Mitch Downey Date: Wed, 17 Apr 2024 00:23:44 -0500 Subject: [PATCH 7/7] Update 0055_podcasts_flag_status.sql --- migrations/0055_podcasts_flag_status.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migrations/0055_podcasts_flag_status.sql b/migrations/0055_podcasts_flag_status.sql index 09857a99..7c594f74 100644 --- a/migrations/0055_podcasts_flag_status.sql +++ b/migrations/0055_podcasts_flag_status.sql @@ -1,4 +1,4 @@ -CREATE TYPE flag_status AS ENUM ('none', 'spam', 'takedown', 'other', 'always-allow'); +CREATE TYPE flag_status_enum AS ENUM ('none', 'spam', 'takedown', 'other', 'always-allow'); ALTER TABLE podcasts -ADD COLUMN flag_status flag_status DEFAULT 'none'; +ADD COLUMN flag_status flag_status_enum DEFAULT 'none';