From 00466f7ce06f12063e42286592fc66cae90b09d8 Mon Sep 17 00:00:00 2001 From: Edward Hibbert Date: Mon, 9 Dec 2024 13:10:05 +0000 Subject: [PATCH] Experimenting with toxicity. --- composer/composer.json | 3 +- composer/composer.lock | 42 ++++++++++++++++++++++++- install/iznik.conf.php | 1 + scripts/fix/fix_toxicity.php | 59 ++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 scripts/fix/fix_toxicity.php diff --git a/composer/composer.json b/composer/composer.json index cb356c04d..58a1eb291 100644 --- a/composer/composer.json +++ b/composer/composer.json @@ -46,7 +46,8 @@ "prewk/xml-string-streamer": "^1.2", "sentry/sdk": "^3.1", "cweagans/composer-patches": "^1.7", - "stripe/stripe-php": "^16.2" + "stripe/stripe-php": "^16.2", + "stajor/perspectiveapi": "^2.0" }, "repositories": [ { diff --git a/composer/composer.lock b/composer/composer.lock index f70a3d1e7..1e9bb6d97 100644 --- a/composer/composer.lock +++ b/composer/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "a5cf5607f1c333efc61655eccd212c93", + "content-hash": "cc0cfa9999b860aef8169327663bb6f6", "packages": [ { "name": "beste/clock", @@ -6818,6 +6818,46 @@ ], "time": "2024-03-30T18:03:49+00:00" }, + { + "name": "stajor/perspectiveapi", + "version": "2.0.0", + "source": { + "type": "git", + "url": "https://github.com/Stajor/perspectiveapi.git", + "reference": "40ab172c1c153b38abf881a41e6c5811b652655d" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Stajor/perspectiveapi/zipball/40ab172c1c153b38abf881a41e6c5811b652655d", + "reference": "40ab172c1c153b38abf881a41e6c5811b652655d", + "shasum": "" + }, + "require": { + "ext-json": "*", + "guzzlehttp/guzzle": "^7.0", + "php": ">=8.0" + }, + "require-dev": { + "phpunit/phpunit": "^11.0", + "vlucas/phpdotenv": "^5.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "PerspectiveApi\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "What if technology could help improve conversations online", + "support": { + "issues": "https://github.com/Stajor/perspectiveapi/issues", + "source": "https://github.com/Stajor/perspectiveapi/tree/2.0.0" + }, + "time": "2024-02-14T15:15:18+00:00" + }, { "name": "stella-maris/clock", "version": "0.1.7", diff --git a/install/iznik.conf.php b/install/iznik.conf.php index e40850a02..a243bf627 100644 --- a/install/iznik.conf.php +++ b/install/iznik.conf.php @@ -38,6 +38,7 @@ define('GOOGLE_CLIENT_SECRET', 'zzz'); define('GOOGLE_APP_NAME', 'zzz'); define('GOOGLE_SITE_VERIFICATION', 'zzz'); +define('GOOGLE_PERSPECTIVE_KEY', 'zzz'); # Yahoo App keys define('YAHOO_APPID', 'zzz'); diff --git a/scripts/fix/fix_toxicity.php b/scripts/fix/fix_toxicity.php new file mode 100644 index 000000000..24f80117d --- /dev/null +++ b/scripts/fix/fix_toxicity.php @@ -0,0 +1,59 @@ +preQuery("SELECT * FROM newsfeed WHERE message IS NOT NULL ORDER BY id DESC LIMIT 100;"); +$newsfeeds = $dbhr->preQuery("SELECT * FROM chat_messages WHERE message IS NOT NULL ORDER BY id DESC LIMIT 100;"); + +foreach ($newsfeeds as $newsfeed) { + do { + $sleep = FALSE; + + try { + $commentsClient = new \PerspectiveApi\CommentsClient(GOOGLE_PERSPECTIVE_KEY); + $commentsClient->comment(['text' => $newsfeed['message']]); + $commentsClient->languages(['en']); + $commentsClient->context(['entries' => ['text' => 'off-topic', 'type' => 'PLAIN_TEXT']]); + $commentsClient->requestedAttributes(['TOXICITY' => ['scoreType' => 'PROBABILITY', 'scoreThreshold' => 0]]); + $response = $commentsClient->analyze(); + $tox = $response->attributeScores()['TOXICITY']['summaryScore']['value']; + } catch (\Exception $e) { + if (strpos($e->getMessage(), 'Quota exceeded') !== FALSE) { + #error_log("...quota exceeded"); + $sleep = TRUE; + } else { + throw $e; + } + } + + if ($sleep) { + sleep(1); + } + } while ($sleep); + + if ($tox > 0.3) { + #error_log("$tox for " . $newsfeed['id'] . " " . $newsfeed['message']); + $results[] = [ + 'id' => $newsfeed['id'], + 'message' => $newsfeed['message'], + 'tox' => $tox + ]; + } +} + +# Sort ascending by tox +usort($results, function($a, $b) { + return $b['tox'] - $a['tox']; +}); + +foreach ($results as $result) { + echo ($result['tox'] * 100) . " " . $result['id'] . " " . $result['message'] . "\n"; +}