Skip to content

Commit

Permalink
Experimenting with toxicity.
Browse files Browse the repository at this point in the history
  • Loading branch information
edwh committed Dec 9, 2024
1 parent 421e0b2 commit 00466f7
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 2 deletions.
3 changes: 2 additions & 1 deletion composer/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
"prewk/xml-string-streamer": "^1.2",
"sentry/sdk": "^3.1",
"cweagans/composer-patches": "^1.7",
"stripe/stripe-php": "^16.2"
"stripe/stripe-php": "^16.2",
"stajor/perspectiveapi": "^2.0"
},
"repositories": [
{
Expand Down
42 changes: 41 additions & 1 deletion composer/composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions install/iznik.conf.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
define('GOOGLE_CLIENT_SECRET', 'zzz');
define('GOOGLE_APP_NAME', 'zzz');
define('GOOGLE_SITE_VERIFICATION', 'zzz');
define('GOOGLE_PERSPECTIVE_KEY', 'zzz');

# Yahoo App keys
define('YAHOO_APPID', 'zzz');
Expand Down
59 changes: 59 additions & 0 deletions scripts/fix/fix_toxicity.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?php

namespace Freegle\Iznik;

define('BASE_DIR', dirname(__FILE__) . '/../..');
require_once(BASE_DIR . '/include/config.php');

require_once(IZNIK_BASE . '/include/db.php');
global $dbhr, $dbhm;

$results = [];

#$newsfeeds = $dbhr->preQuery("SELECT * FROM newsfeed WHERE message IS NOT NULL ORDER BY id DESC LIMIT 100;");
$newsfeeds = $dbhr->preQuery("SELECT * FROM chat_messages WHERE message IS NOT NULL ORDER BY id DESC LIMIT 100;");

foreach ($newsfeeds as $newsfeed) {
do {
$sleep = FALSE;

try {
$commentsClient = new \PerspectiveApi\CommentsClient(GOOGLE_PERSPECTIVE_KEY);
$commentsClient->comment(['text' => $newsfeed['message']]);
$commentsClient->languages(['en']);
$commentsClient->context(['entries' => ['text' => 'off-topic', 'type' => 'PLAIN_TEXT']]);
$commentsClient->requestedAttributes(['TOXICITY' => ['scoreType' => 'PROBABILITY', 'scoreThreshold' => 0]]);
$response = $commentsClient->analyze();
$tox = $response->attributeScores()['TOXICITY']['summaryScore']['value'];
} catch (\Exception $e) {
if (strpos($e->getMessage(), 'Quota exceeded') !== FALSE) {
#error_log("...quota exceeded");
$sleep = TRUE;
} else {
throw $e;
}
}

if ($sleep) {
sleep(1);
}
} while ($sleep);

if ($tox > 0.3) {
#error_log("$tox for " . $newsfeed['id'] . " " . $newsfeed['message']);
$results[] = [
'id' => $newsfeed['id'],
'message' => $newsfeed['message'],
'tox' => $tox
];
}
}

# Sort ascending by tox
usort($results, function($a, $b) {
return $b['tox'] - $a['tox'];
});

foreach ($results as $result) {
echo ($result['tox'] * 100) . " " . $result['id'] . " " . $result['message'] . "\n";
}

0 comments on commit 00466f7

Please sign in to comment.