Skip to content

Commit

Permalink
fix: fix bugs in 'create_github_emoji_list' script
Browse files Browse the repository at this point in the history
As pointed out in
#74 a lot of the
GitHub emojis were not parsed correctly by the
'create_github_emoji_list' script. This commit fixes the bugs that were
present in this script so that all emojis are parsed.
  • Loading branch information
rickstaa committed Feb 24, 2023
1 parent 9471d1e commit 18aefa7
Show file tree
Hide file tree
Showing 5 changed files with 693 additions and 53 deletions.
232 changes: 181 additions & 51 deletions scripts/create_github_emoji_list.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
* `github_custom_emojis.json` data files.
*/
const { mkdir, writeFile } = require("fs");
const inflection = require("inflection");
const emojiLib = require("emojilib");
const emojiData = require("emoji-datasource");
const unicodeEmoji = require("unicode-emoji-json");
const inflection = require("inflection"); // Keyword support library.
const emojiLib = require("emojilib"); // Emoji data search library.
const emojiData = require("emoji-datasource"); // Multi-OS emoji data.
const unicodeEmoji = require("unicode-emoji-json"); // Unicode emoji data.
const { Octokit } = require("@octokit/core");
const CustomKeyWords = require("./keywords.json");

// Script variables
const DRY_RUN = process.argv.indexOf("--dry") !== -1;
Expand All @@ -26,33 +27,168 @@ const CATEGORIES = [
];
const KEYWORD_SUBSTITUTES = {
highfive: "highfive high-five",
}; // Extend the keyword list with custom keywords.

/**
* Move array item to the front of the array.
* @param {array} arr Input array.
* @param {string} queryStr The item to move to the front of the array.
* @returns {array} The array with the item moved to the front.
*/
const moveToFront = (arr, queryStr) =>
arr.reduce((acc, curr) => {
if (queryStr === curr) {
return [curr, ...acc];
}
return [...acc, curr];
}, []);

/**
* Parse the unicode coming from the 'emoji-datasource' package to remove unicode
* padding characters.
* @param {string} unicode The unicode string.
* @returns {string} The unicode string without padding characters.
*/
const parseEmojiDataUnicode = (emojiDataUnicode) => {
return emojiDataUnicode
.toLowerCase()
.replaceAll("-200d", "")
.replaceAll("-fe0f", "");
};

/**
* Translate unified format to native unicode format.
* @param {*} unified The unified format.
* @returns The native unicode format.
* @param {string} unified The unified format.
* @returns {string} The native unicode format.
*/
function unifiedToNative(unified) {
const unifiedToNative = (unified) => {
let unicodes = unified.split("-");
let codePoints = unicodes.map((u) => `0x${u}`);
return String.fromCodePoint(...codePoints);
}
};

/**
* Parse unicode and custom emojis from GitHub emoji API response.
* @param {Object} githubEmojisData Object containing the GitHub emoji data retrieved from
* the GitHub emoji API.
* @returns {Object} Object containing the parsed GitHub emojis.
* @throws {Error} Throws an error if not all GitHub emojis were parsed.
*/
const parseGitHubEmojiData = (githubEmojisData) => {
let unicodeEmojis = {};
let customEmojis = {};

// Loop through GitHub emoji data and get the unicode. Store as custom emoji if no
// unicode is found.
for (const [key, value] of Object.entries(githubEmojisData)) {
let match = value.match(/(?<=unicode\/).*(?=\.png)/);
if (match) {
match = match[0].toLowerCase();
unicodeEmojis[match] = unicodeEmojis[match] || [];
unicodeEmojis[match].push(key);
continue;
}

// If no unicode is found, store as custom emoji.
if (Object.keys(customEmojis).includes(key))
throw new Error("Duplicate custom GitHub emoji's found.");
customEmojis[key] = value;
}

// Throw error if not all GitHub emojis were parsed.
if (
Object.keys(customEmojis).length +
Object.values(unicodeEmojis).flat().length !==
Object.keys(githubEmojisData).length
) {
throw new Error("Not all emoji unicodes were successfully parsed.");
}
return { unicodeEmojis, customEmojis };
};

/**
* Add GitHub short names to the 'emoji-datasource' object.
* @param {Object} emojiObject The 'emoji-datasource' object.
* @param {array} githubShortNames The GitHub short names.
* @returns {Object} The 'emoji-datasource' object with the GitHub short names added.
*/
const addGitHubShortName = (emojiObject, githubShortNames) => {
emojiObject.short_names = emojiObject.short_names || [];
emojiObject.short_names = [
...new Set([...emojiObject.short_names, ...githubShortNames]),
];
emojiObject.github_short_name = githubShortNames[0];
return emojiObject;
};

/**
* Filter the 'emoji-datasource' package data to only include the GitHub emojis.
* @param {Object} githubUnicodeEmojis Object containing the GitHub emoji unicodes.
* @returns {array} Array containing the filtered 'emoji-datasource' package data.
* @throws {Error} Throws an error if not all GitHub emojis have a match.
*/
const getFilteredEmojiData = (githubUnicodeEmojis) => {
let filteredEmojis = [];
let notFound = [];

// Loop through GitHub unicodes and try to find a match in the 'emoji-datasource'.
for (const [key, value] of Object.entries(githubUnicodeEmojis)) {
// Try to find match by using unicode.
const unicodeObject = emojiData.find(
(item) => item.unified.toLowerCase() === key
);
if (unicodeObject) {
filteredEmojis.push(addGitHubShortName(unicodeObject, value));
continue;
}

// Try to find match by using non-qualified unicode.
const nonQualifiedObject = emojiData.find(
(item) =>
(item.non_qualified ? item.non_qualified.toLowerCase() : null) === key
);
if (nonQualifiedObject) {
filteredEmojis.push(addGitHubShortName(nonQualifiedObject, value));
continue;
}

// Try to find match by using parsed unicode.
const unicodeObjectParsed = emojiData.find(
(item) => parseEmojiDataUnicode(item.unified.toLowerCase()) === key
);
if (unicodeObjectParsed) {
filteredEmojis.push(addGitHubShortName(unicodeObjectParsed, value));
continue;
}

notFound.push(value);
}

// Throw error if not all GitHub Emojis have a match.
if (notFound.length) {
throw new Error(
`Some GitHub Emojis could not be found in the 'emoji-datasource' package: ${notFound
.flat()
.join(", ")}.`
);
}

return filteredEmojis;
};

/**
* Build the emoji data files.
* @param {*} githubEmojisData Object containing the GitHub emoji data.
* @param {Object} githubEmojisData Object containing the GitHub emoji data retrieved from
* the GitHub emoji API.
*/
const buildData = (githubEmojisData) => {
const categoriesIndex = {};
const data = {
let categoriesIndex = {};
let data = {
categories: [],
emojis: {},
aliases: {},
sheet: { cols: 61, rows: 61 },
};
const gitHubEmojis = {};
const emojiDataUnified = {};

// Add categories.
CATEGORIES.forEach((category, i) => {
Expand All @@ -69,56 +205,44 @@ const buildData = (githubEmojisData) => {
return aTest - bTest;
});

// Retrieve emoji unicodes and unique GitHub emojis.
for (const [key, value] of Object.entries(githubEmojisData.data)) {
const match = value.match(/(?<=unicode\/).*(?=\.png)/);
if (match) {
emojiDataUnified[match[0].toUpperCase()] = key;
} else {
gitHubEmojis[key] = value;
}
}
// Parse GitHub emojis unicode and subtract custom GitHub emojis.
const {
unicodeEmojis: githubUnicodeEmojis,
customEmojis: githubCustomEmojis,
} = parseGitHubEmojiData(githubEmojisData);

// Retrieve GitHub emojis that exist in the 'emoji-datasource' package.
emojiData.forEach((datum) => {
if (!githubEmojisData.data.hasOwnProperty(datum.short_name)) {
// Filter out emojis that don't exist in the GitHub API.
if (!emojiDataUnified.hasOwnProperty(datum.unified)) {
return;
}
// Retrieve filtered emoji data from 'emoji-datasource'.
const filteredEmojis = getFilteredEmojiData(githubUnicodeEmojis);

datum.short_name = emojiDataUnified[datum.unified];
}

// Throw warning if emoji doesn't have a category.
if (!datum.category) {
throw new Error(`“${datum.short_name}” doesn’t have a category`);
}
// Make GitHub emojis searchable and create the EmojiMart data source.
filteredEmojis.forEach((datum) => {
if (!datum.category)
throw new Error(`“${datum.short_name}” doesn’t have a category.`);

// Retrieve emoji information.
let unified = datum.unified.toLowerCase();
let native = unifiedToNative(unified);
let name = inflection.titleize(
datum.name || datum.short_name.replace(/-/g, " ") || ""
);
let unicodeEmojiName = inflection.titleize(unicodeEmoji[native].name || "");
let unicodeEmojiName = inflection.titleize(
unicodeEmoji[native]?.name || ""
);
if (
name.indexOf(":") === -1 &&
unicodeEmojiName.length &&
unicodeEmojiName.length < name.length
) {
name = unicodeEmojiName;
}
if (!name) throw new Error(`“${datum.short_name}” doesn’t have a name.`);

// Throw warning if emoji name could not be retrieved.
if (!name) {
throw new Error(`“${datum.short_name}” doesn’t have a name`);
}

// Ensure short_name is first id element.
// Ensure short_name is first id.
let ids = datum.short_names || [];
if (ids.indexOf(datum.short_name) === -1) {
ids.unshift(datum.short_name);
} else if (ids[0] !== datum.short_name) {
ids = moveToFront(ids, datum.short_name);
}

// Add other ids as aliases.
Expand All @@ -132,6 +256,8 @@ const buildData = (githubEmojisData) => {
let emoticons = datum.texts || [];
if (datum.text && emoticons.indexOf(datum.text) === -1) {
emoticons.unshift(datum.text);
} else if (emoticons[0] !== datum.text) {
emoticons = moveToFront(emoticons, datum.text);
}

// Make sure expressionless emoticon has a emoji text.
Expand Down Expand Up @@ -185,8 +311,10 @@ const buildData = (githubEmojisData) => {
// Add version information to emoji.
let addedIn = parseFloat(datum.added_in);
if (addedIn < 1) addedIn = 1;

// Create emoji object.
const emoji = {
id,
id: datum.github_short_name,
name,
emoticons,
keywords,
Expand All @@ -199,15 +327,15 @@ const buildData = (githubEmojisData) => {
delete emoji.emoticons;
}

// Handle Component category emoji variants.
// Don't add Component emoji category items these are already included as skins.
if (datum.category !== "Component") {
let categoryIndex = categoriesIndex[datum.category];
data.categories[categoryIndex].emojis.push(emoji.id);
data.emojis[emoji.id] = emoji;
}
});

// Reorder flags category.
// Sort flags category.
let flags = data.categories[categoriesIndex["Flags"]];
flags.emojis = flags.emojis.sort();

Expand All @@ -222,13 +350,15 @@ const buildData = (githubEmojisData) => {
data.categories.unshift(smileysAndPeople);
data.categories.splice(1, 2);

// Retrieve unique GitHub emojis.
// Create EmojiMart data object for the unique GitHub emojis.
let githubEmojis = { id: "github", name: "GitHub", emojis: [] };
for (const [key, value] of Object.entries(gitHubEmojis)) {
for (const [key, value] of Object.entries(githubCustomEmojis)) {
if (CustomKeyWords[key] === undefined)
throw new Error(`“${key}” doesn’t have a keyword.`);
const emoji = {
id: key,
name: key[0].toUpperCase() + key.slice(1),
keywords: [key],
name: inflection.capitalize(key),
keywords: CustomKeyWords[key].keywords,
skins: [{ src: value }],
};
githubEmojis.emojis.push(emoji);
Expand Down Expand Up @@ -268,7 +398,7 @@ const run = async () => {

// Compare with
console.log("Create emoji data files...");
buildData(githubEmojis);
buildData(githubEmojis.data);
console.log("Emoji data files created.");
};

Expand Down
Loading

1 comment on commit 18aefa7

@vercel
Copy link

@vercel vercel bot commented on 18aefa7 Feb 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.