diff --git a/docs/no_toc/404.html b/docs/no_toc/404.html index 71a87d09..1d8d4078 100644 --- a/docs/no_toc/404.html +++ b/docs/no_toc/404.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/About.md b/docs/no_toc/About.md index cb270eb6..a7a1b386 100644 --- a/docs/no_toc/About.md +++ b/docs/no_toc/About.md @@ -46,7 +46,7 @@ These credits are based on our [course contributors table guidelines](https://ww ## collate en_US.UTF-8 ## ctype en_US.UTF-8 ## tz Etc/UTC -## date 2024-02-21 +## date 2024-03-04 ## ## ─ Packages ─────────────────────────────────────────────────────────────────── ## package * version date lib source diff --git a/docs/no_toc/about-the-authors.html b/docs/no_toc/about-the-authors.html index 6d5c581a..00af431b 100644 --- a/docs/no_toc/about-the-authors.html +++ b/docs/no_toc/about-the-authors.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ @@ -615,7 +616,7 @@

About the Authors

## collate en_US.UTF-8 ## ctype en_US.UTF-8 ## tz Etc/UTC -## date 2024-02-21 +## date 2024-03-04 ## ## ─ Packages ─────────────────────────────────────────────────────────────────── ## package * version date lib source diff --git a/docs/no_toc/adherence-practices.html b/docs/no_toc/adherence-practices.html index 353e7883..618a4a76 100644 --- a/docs/no_toc/adherence-practices.html +++ b/docs/no_toc/adherence-practices.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/ai-acts-orders-and-regulations.html b/docs/no_toc/ai-acts-orders-and-regulations.html index 6ff11cca..80c5fdd0 100644 --- a/docs/no_toc/ai-acts-orders-and-regulations.html +++ b/docs/no_toc/ai-acts-orders-and-regulations.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/ai-possibilities-case-studies.html b/docs/no_toc/ai-possibilities-case-studies.html index 64ccd933..12b620e0 100644 --- a/docs/no_toc/ai-possibilities-case-studies.html +++ b/docs/no_toc/ai-possibilities-case-studies.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/algorithm-considerations.html b/docs/no_toc/algorithm-considerations.html index 2347ff19..c948ca67 100644 --- a/docs/no_toc/algorithm-considerations.html +++ b/docs/no_toc/algorithm-considerations.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/assets/favicon.ico b/docs/no_toc/assets/favicon.ico new file mode 100644 index 00000000..f41ff9b0 Binary files /dev/null and b/docs/no_toc/assets/favicon.ico differ diff --git a/docs/no_toc/building-an-ai-advisory-team.html b/docs/no_toc/building-an-ai-advisory-team.html index a3eda193..f5d31236 100644 --- a/docs/no_toc/building-an-ai-advisory-team.html +++ b/docs/no_toc/building-an-ai-advisory-team.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/case-studies.html b/docs/no_toc/case-studies.html index a39343e0..ce62bad9 100644 --- a/docs/no_toc/case-studies.html +++ b/docs/no_toc/case-studies.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/consent-and-ai.html b/docs/no_toc/consent-and-ai.html index ba08a177..5331767e 100644 --- a/docs/no_toc/consent-and-ai.html +++ b/docs/no_toc/consent-and-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/considerations-for-creating-an-ai-policy.html b/docs/no_toc/considerations-for-creating-an-ai-policy.html index 6186e0c6..aac00e2a 100644 --- a/docs/no_toc/considerations-for-creating-an-ai-policy.html +++ b/docs/no_toc/considerations-for-creating-an-ai-policy.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/customized-interfaces-for-ai.html b/docs/no_toc/customized-interfaces-for-ai.html index d5e8ecf0..37b87b1d 100644 --- a/docs/no_toc/customized-interfaces-for-ai.html +++ b/docs/no_toc/customized-interfaces-for-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/customized-knowledge-for-ai.html b/docs/no_toc/customized-knowledge-for-ai.html index 4bd2f609..78e1d247 100644 --- a/docs/no_toc/customized-knowledge-for-ai.html +++ b/docs/no_toc/customized-knowledge-for-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/customized-security-for-ai.html b/docs/no_toc/customized-security-for-ai.html index 13283cc8..883bc397 100644 --- a/docs/no_toc/customized-security-for-ai.html +++ b/docs/no_toc/customized-security-for-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/demystifying-types-of-ai.html b/docs/no_toc/demystifying-types-of-ai.html index a1c9eaa8..c0bf4609 100644 --- a/docs/no_toc/demystifying-types-of-ai.html +++ b/docs/no_toc/demystifying-types-of-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/determining-your-ai-needs.html b/docs/no_toc/determining-your-ai-needs.html index 2034ecea..3975c755 100644 --- a/docs/no_toc/determining-your-ai-needs.html +++ b/docs/no_toc/determining-your-ai-needs.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/discussion-ai-possibilities.html b/docs/no_toc/discussion-ai-possibilities.html index 54eef562..446c272c 100644 --- a/docs/no_toc/discussion-ai-possibilities.html +++ b/docs/no_toc/discussion-ai-possibilities.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/discussion-ai-types.html b/docs/no_toc/discussion-ai-types.html index b257bf39..09515d24 100644 --- a/docs/no_toc/discussion-ai-types.html +++ b/docs/no_toc/discussion-ai-types.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/discussion-how-ai-works.html b/docs/no_toc/discussion-how-ai-works.html index 36c7a95e..6408dee9 100644 --- a/docs/no_toc/discussion-how-ai-works.html +++ b/docs/no_toc/discussion-how-ai-works.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/discussion-is-it-ai.html b/docs/no_toc/discussion-is-it-ai.html index 841a75c3..84525a94 100644 --- a/docs/no_toc/discussion-is-it-ai.html +++ b/docs/no_toc/discussion-is-it-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/elements-of-an-ai-policy.html b/docs/no_toc/elements-of-an-ai-policy.html index 3d142e8e..55bd9d98 100644 --- a/docs/no_toc/elements-of-an-ai-policy.html +++ b/docs/no_toc/elements-of-an-ai-policy.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/ethical-process.html b/docs/no_toc/ethical-process.html index 05698ab9..cb064367 100644 --- a/docs/no_toc/ethical-process.html +++ b/docs/no_toc/ethical-process.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/evaluating-your-customized-ai-tool.html b/docs/no_toc/evaluating-your-customized-ai-tool.html index ae93e52b..6bbb454d 100644 --- a/docs/no_toc/evaluating-your-customized-ai-tool.html +++ b/docs/no_toc/evaluating-your-customized-ai-tool.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/ground-rules-for-ai.html b/docs/no_toc/ground-rules-for-ai.html index 660521d2..11ed685b 100644 --- a/docs/no_toc/ground-rules-for-ai.html +++ b/docs/no_toc/ground-rules-for-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/how-ai-works.html b/docs/no_toc/how-ai-works.html index b1fd176c..2c40ef1b 100644 --- a/docs/no_toc/how-ai-works.html +++ b/docs/no_toc/how-ai-works.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/idare-and-ai.html b/docs/no_toc/idare-and-ai.html index fae35cac..386d3915 100644 --- a/docs/no_toc/idare-and-ai.html +++ b/docs/no_toc/idare-and-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ diff --git a/docs/no_toc/index.html b/docs/no_toc/index.html index 4703d1eb..83b92de4 100644 --- a/docs/no_toc/index.html +++ b/docs/no_toc/index.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

Click here to provide feedback

+ @@ -502,7 +503,7 @@

About this Course

diff --git a/docs/no_toc/index.md b/docs/no_toc/index.md index 98be3b5c..71e64701 100644 --- a/docs/no_toc/index.md +++ b/docs/no_toc/index.md @@ -1,14 +1,14 @@ --- title: "AI for Decision Makers" subtitle: "" -date: "February, 2024" +date: "March, 2024" site: bookdown::bookdown_site documentclass: book bibliography: [book.bib] biblio-style: apalike link-citations: yes description: "Description about Course/Book." -favicon: assets/dasl_favicon.ico +favicon: assets/favicon.ico always_allow_html: true output: bookdown::html_document2: diff --git a/docs/no_toc/introduction-to-avoiding-ai-harm.html b/docs/no_toc/introduction-to-avoiding-ai-harm.html index 19798ec6..f304fc2a 100644 --- a/docs/no_toc/introduction-to-avoiding-ai-harm.html +++ b/docs/no_toc/introduction-to-avoiding-ai-harm.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/introduction-to-determining-ai-needs.html b/docs/no_toc/introduction-to-determining-ai-needs.html index ad2dc4c0..a1176322 100644 --- a/docs/no_toc/introduction-to-determining-ai-needs.html +++ b/docs/no_toc/introduction-to-determining-ai-needs.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/introduction-to-developing-ai-policy.html b/docs/no_toc/introduction-to-developing-ai-policy.html index 340af5ae..57481f15 100644 --- a/docs/no_toc/introduction-to-developing-ai-policy.html +++ b/docs/no_toc/introduction-to-developing-ai-policy.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/introduction-to-exploring-ai-possibilities.html b/docs/no_toc/introduction-to-exploring-ai-possibilities.html index 71f6a255..74e220c3 100644 --- a/docs/no_toc/introduction-to-exploring-ai-possibilities.html +++ b/docs/no_toc/introduction-to-exploring-ai-possibilities.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/introduction.html b/docs/no_toc/introduction.html index efb9ae86..762639ca 100644 --- a/docs/no_toc/introduction.html +++ b/docs/no_toc/introduction.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/other-laws-to-consider.html b/docs/no_toc/other-laws-to-consider.html index a2fa38eb..c265445e 100644 --- a/docs/no_toc/other-laws-to-consider.html +++ b/docs/no_toc/other-laws-to-consider.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/references.html b/docs/no_toc/references.html index c146c959..1bf62e37 100644 --- a/docs/no_toc/references.html +++ b/docs/no_toc/references.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/search_index.json b/docs/no_toc/search_index.json index f40ee020..be0c9596 100644 --- a/docs/no_toc/search_index.json +++ b/docs/no_toc/search_index.json @@ -1 +1 @@ -[["index.html", "AI for Decision Makers About this Course Specialization Sections Available course formats", " AI for Decision Makers February, 2024 About this Course This is the series of courses in Fred Hutch DaSL’s “AI for Decision Makers” specialization on Coursera. Specialization Sections Introduction Course 1: Exploring AI Possibilities Course 2: Avoiding AI Harm Course 3: Determining AI Needs Course 4: Developing AI Policy Available course formats This course is available in multiple formats which allows you to take it in the way that best suites your needs. You can take it for certificate which can be for free or fee. The material for this course can be viewed without login requirement on this Bookdown website. This format might be most appropriate for you if you rely on screen-reader technology. This course can be taken on Coursera for certification here (but it is not available for free on Coursera). Our courses are open source, you can find the source material for this course on GitHub. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["introduction.html", "Introduction Motivation Target Audience", " Introduction Motivation How can understanding AI help you be a better leader? We think understanding AI is essential for executives. It helps today’s leaders make strategic decisions, drive innovation, enhance efficiency, and foster a culture that embraces the transformative power of these technologies. Specifically, AI proficiency can help leaders in the following ways: Strategic Decision-Making: Understanding AI and machine learning equips leaders to make informed decisions about integrating these technologies into business strategies, setting their teams up for success when working with AI. Risk Mitigation: Familiarity with AI helps leaders assess risks associated with implementing these technologies, ensuring that ethical considerations, data privacy, and potential biases are addressed to mitigate negative consequences. Leaders can also implement more informed policies for their teams. Efficiency and Experience: Leaders can explore how AI applications enhance operational efficiency, automate repetitive tasks, and assist employee learning and development, leading to increased productivity and breakthroughs. These improvements can also improve the experience of users or customers your organization serves. Resource Allocation: AI resources can be expensive, including in terms of computing resources, subscription services, and/or personnel time. Understanding AI enables leaders to allocate resources effectively, whether in building in-house AI capabilities, partnering with external experts, or investing in AI-driven solutions that align with the organization’s mission. Innovation Leadership: Leaders can foster a culture of innovation by understanding the transformative potential of AI. Awareness and knowledge can also enable leaders to identify opportunities for innovation, helping their teams match the rapidly evolving technological landscape. Data-Driven Decision Culture: Leaders can promote a data-driven decision-making culture within their organizations, using AI insights to inform strategic planning, understand their teams better, and improve other key business functions. Communication with Tech Teams: Executives and managers benefit from understanding AI even if they aren’t building tech, as it helps them effectively communicate with their technical teams. This can mean more effective collaboration and improved alignment between teams or departments. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Target Audience This specialization is intended for executives, decision-makers, and business leaders across industries, including executives in C-suite positions, managers, and directors. Our goal is for these learners to understand the strategic applications of AI and machine learning in driving innovation, improving operations, creating supportive working environments, and gaining an innovative edge. We also believe that learning is a life-long process. This specialization is targeted toward those who value continuous learning and want to stay ahead in today’s fast-paced technology landscape. "],["introduction-to-exploring-ai-possibilities.html", "Introduction to Exploring AI Possibilities Introduction Motivation Target Audience Curriculum Learning Objectives", " Introduction to Exploring AI Possibilities Introduction This course aims to help decision makers and leaders understand artificial intelligence (AI) at a strategic level. Not everyone will write an AI algorithm, and that is okay! Our rapidly evolving AI landscape means that we need executives and managers who know the essential information to make informed decisions and use AI for good. This course specifically focuses on the essentials of what AI is and what it makes possible, to better harmonize expectations and reality in the workplace. Motivation This course will help you with your understanding of AI, helping you make strategic decision and cultivate a business environment that embraces the benefits of AI, while understanding its limitations and risks. Target Audience This course is targeted toward industry and non-profit leaders and decision makers. Curriculum In this course, we’ll learn about what Artificial intelligence is, and what it isn’t. We’ll also learn the basics of how it works, and learn about different types of AI. This course will cover: Framework, or definition, of AI Essential AI examples and case studies The take-home of how AI works Key definitions of types of AI and related technologies What is possible with AI Learning Objectives We will learn how to: Determine what AI is and isn’t using our three part framework: the data, algorithm, and interface Identify common technologies and whether or not they are AI Explain the essential “behind the scenes” technology of how AI works Identify possibilities for using AI while understanding its limitations Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["what-is-artificial-intelligence.html", "What Is Artificial Intelligence Specific and General Intelligence Shifting Goalposts Our AI Definition What Is and Is Not AI Summary", " What Is Artificial Intelligence The term “artificial intelligence”, or AI, often makes people envision humanoid robots. For some of us, this prompts concerns about their ability to outsmart us. The notion of robots passing tests that blur the line between human and machine, often depicted in science fiction, adds to these worries, particularly when considering the potential for AI systems to act in self-interest and make decisions independently. Specific and General Intelligence Currently, no AI system can perform all the intellectual tasks that a human can. This is an active area of research, specifically into what’s called artificial general intelligence. We aren’t there yet. Currently, artificial intelligence systems are optimized to perform a specific task well, but not for general, multi-purpose tasks. For example, the AI application for recognizing voices can not be directly applied to drive cars, and vice versa. Similarly, a language translation app could not recognize images, and vice versa. Shifting Goalposts Defining what AI is can be tricky because what experts consider to be AI changes frequently. John McCarthy, one of the leading early figures in AI once said, “As soon as it works, no one calls it artificial intelligence anymore”. For instance, 20 years ago, the idea of an email spam checker was new. People were surprised that an algorithm could identify junk email accurately, and called it “artificial intelligence”. Since this type of algorithm has become so common, it is no longer called “artificial intelligence”. This transition happened because we no longer think it is surprising that computers can filter spam messages. Because it is not learning something new and surprising, it is no longer considered intelligent. We often look at human intelligence the same way. For example, many years ago, only a few people knew how to use the internet. These people might have been considered extremely talented and intelligent. Now, the massive growth of online resources and social media mean that fluent internet use is almost required! Artificial General Intelligence (AGI): A type of artificial intelligence that can understand, learn, and apply knowledge across a wide range of tasks, similar to the broad cognitive abilities of a human being. It represents the aspiration for machines to have versatile intelligence rather than focusing on specific, narrow domains. Check out the following lessons to learn more. Our AI Definition At its core, AI is about problem solving (Fogel 2022). But how does it do this? How hard does the problem have to be? There are no clear answers to these questions. Going forward in this course, we define AI as having the following features: Dataset: AI needs data examples that can be used to train a statistical or machine learning model to make predictions. Algorithm: AI needs an algorithm, or a set of procedures, that can be trained based on the data examples. That way, it can take a new example and execute a human-like task. For instance, the algorithm learns which images feature a cat from pre-labeled images. When given a new image, it decides whether the image has a cat in it. Interface: AI needs a physical interface or software for the trained algorithm to receive a data input and execute the human-like task in the real world. For example, you might interface with a chatbot in your web browser. As an example, consider Amazon Echo’s voice control device (Wikipedia 2023a). The data set consists of customer voices talking to Amazon Echo or other devices. The algorithm predicts what a new customer voice is asking it to do. Given human voice request, it may set a kitchen timer. Lastly, the interface, is a physical device with a microphone, speaker, and computer software running the algorithm and accessing the data. It is the part that will interact with humans. What Is and Is Not AI Let’s look at a few of examples. We can then decide whether or not the examples constitute AI. Smartphones The name “smartphone” implies these devices are making decisions and are powered by AI. Let’s consider our three criteria: Dataset: Smartphones do collect a lot of data. For example, they retain your text messages and collect motion tracking information. Algorithm: The smartphone as a whole does not usually get trained with this data. However, some features like virtual voice assistants and facial recognition do adapt given your data. Interface: Again, some features like voice assistants can be interacted with through the smartphone. While there are some features on smartphones that are powered by AI models, like virtual voice assistants and facial recognition, the device as a whole isn’t considered AI. Calculators Many of us use basic calculators, as you might find in Microsoft Excel, every day. AI also makes many calculations. Is it just a scaled-up calculator? Dataset: Calculators and spreadsheets can store data. Algorithm: Calculators do not generally use this data to train algorithms. The procedures that are performed (addition, subtraction, etc.) are almost always predefined. However, some AI-powered assistants are starting to be integrated into software like Excel and Google Sheets. Interface: Calculators do meet the criteria for an interface, whether through a physical device or software application. Traditional calculators are not considered AI, because everything they can do is predefined by people. Computer Programs Like calculators, computers follow set procedures for problem solving and computation. Everyday computers use these procedures to help automate repetitive tasks and save time. However, this isn’t generally considered AI, because the computer’s algorithms aren’t being trained with new data you supply. AI systems exhibit the ability to adapt and handle new inputs for tasks that might be more complicated. Examples of AI In the Real World As we can see from the examples above, there are many instances of technology that are incredibly useful but are not considered AI. Without getting too into the details of how they work yet, let’s list some examples of AI in the real world and their applications. Meta’s Advantage suite of tools helps advertisers produce content and target specific social media users. Google Search is using generative AI to summarize search results in an “AI-powered snapshot of key information”. Financial institutions use AI to detect fraud. For example, detecting the 1 audit risk in a database of 10 million entries. In medicine, AI can help predict Alzheimer’s risk from MRI scans. Global Plastic Watch uses satellite imagery and artificial intelligence to detect and monitor plastic waste sites globally. Summary The definition of artificial intelligence (AI) has shifted over time. We use the three part framework of data, algorithms, and interfaces to describe AI applications. You will need to consider specific technologies and whether they meet the criteria for being classified as AI using this framework. Adaptability and training with new data are key factors to keep in mind as we move further in the course. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-is-it-ai.html", "DISCUSSION Is It AI", " DISCUSSION Is It AI Consider the following examples. Are they examples of AI? Why or why not? Click to expand and see the answer. A smartfridge that lets you know when replacement parts are needed This is not AI. The computer in the fridge is typically programmed to look for specific signs of wear or time passing. It is not typically trained with new data. Speed cameras on the highway Speed cameras on highways typically use specialized technology and are not explicitly powered by AI. These cameras are often equipped with radar sensors for measuring vehicle speed between checkpoints. While the core functionality of speed cameras relies on sensor technology and predetermined speed thresholds, AI elements may be incorporated in some advanced systems. For example, AI could be used to enhance image recognition accuracy for reading license plates. However, the fundamental operation of speed cameras is rooted in sensor-based speed detection, not AI. Suggested accounts on Instagram This is considered AI. Social media algorithms, like Instagram’s, make recommendations based on user behavior. For example, if you spend a lot of time viewing a page that was recommended, the system interprets that as positive feedback and will make similar recommendations. Typically, these recommendations get better over time as the user generates more user-specific data. You supply data through your behaviors, the algorithm gets trained, and you interact with the suggestions via the app. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["how-ai-works.html", "How AI Works Early Warning for Skin Cancer Collecting Datapoints Understanding the Algorithm Interfacing with AI Understanding the AI Spring Summary", " How AI Works Let’s briefly revisit our definition of AI. It must have some data, an algorithm, and an interface. Let’s break these down in more detail below. Early Warning for Skin Cancer Each year in the United States, 6.1 million adults are treated for skin cancer (basal cell and squamous cell carcinomas), totaling nearly $10 billion in costs (CDC 2023). It is one of the most common forms of cancer in the United States, and mortality from skin cancer is a real concern. Fortunately, early detection through regular screening can increase survival rates to over 95% (Melarkode et al. 2023). Cost and accessibility of screening providers, however, means that many people aren’t getting the preventative care they need. Increasingly, AI is being used to flag potential skin cancer. AI focused on skin cancer detection could be used by would-be patients to motivate them to seek a professional opinion, or by clinicians to validate their findings or help with continuous learning. Data: Images with and without skin cancer present Algorithm: Detection of possible skin cancer Interface: Web portal or app where you can submit a new picture Collecting Datapoints Let’s say a clinician, Dr. Derma, is learning how to screen for skin cancer. Dr. D goes to their first day at the clinic and sees their first instance of skin cancer. Dr. D now has one data point. Dr. D could make future diagnoses based on this single data point, but these diagnoses probably won’t be very accurate. Over time, as Dr. D does more screenings of skin with and without cancer, they will get a better and better idea of what skin cancer looks like. This is part of what we do best. Human beings are powerhouses when it comes to pattern recognition and processing (Mattson 2014). Like Dr. D, AI will get better at finding the right patterns with more data. In order to train an AI algorithm to detect possible skin cancer, we’ll first want to gather as many pictures of normal and cancerous skin as we can. This is the raw data (Leek and Narayanan 2017). What Is Data In our skin cancer screening example, our data is all of the information stored in an image. However, data comes in many shapes and forms. Data can be structured, such as a spreadsheet of the time of day plus traffic volume or counts of viral particles in different patients. Data can also be unstructured, such as might be found in social media text or genome sequence data. Other kinds of data can be collected and used to train algorithms. These might include survey data collected directly from consumers, medical data collected in a healthcare setting, purchase or transaction tracking, and online tracking of your time on certain web pages (Cote 2022). Quantity and quality of data are very important. More data makes it easier to detect and account for minor differences among observations. However, that shouldn’t come at the cost of quality. It is sometimes better to have fewer, high resolution or high quality images in our dataset than many images that are blurry, discolored, or in other ways questionable. Representative diversity of datasets is crucial for the effectiveness of AI. For instance, if an AI used for skin cancer screening only encounters instances of skin cancer on lighter skin tones, it might fail to alert individuals with darker skin tones. The tech industry’s lack of diversity contributes to these issues, often leading to the discovery of failures only after harm has occurred. Large Language Models (LLMs), which we will cover later, are great examples of using high quantity and quality of data. Think about how much text information is freely available on the internet! Throughout the internet, we’re much more likely to see the phrase “cancer is a disease” than “cancer is a computer program”. Many LLMs are trained on sources like Wikipedia, which are typically grammatically sound and informative, leading to higher quality output. It is essential that you and your team think critically about data sources. Many companies releasing generative AI systems have come under fire for training these systems on data that doesn’t belong to them (Walsh 2023). Individual people also have a right to data privacy. No personal data should be used without permission, even if that data could be interesting or useful. Preparing the Data It’s important to remember that AI systems need specific instructions to start detecting patterns. We’ll need to take our raw data and indicate which pictures are positive for skin cancer and which aren’t. This process is called labeling and has to be done by humans. Once data is labeled, either “cancer” or “not cancer”, we can use it to train the algorithm in the next step. This data is aptly called training data. Understanding the Algorithm Our goal is “detection of possible skin cancer”, but how does a computer do that? First, we’ll need to break down the image into attributes called features. This could be the presence of certain color pixels, percentage of certain shades, spot perimeter regularity, or other features. Features can be determined by computers or by data scientists who know what kind of features are important. It’s not uncommon for an AI looking at image data to have thousands of features. Because we’ve supplied a bunch of images with labels, AI can look for patterns that are present in cancerous images that are not present in others. As an example, here is a very simple algorithm with one feature (spot perimeter): Calculate the perimeter of a darker spot in the image. If the perimeter of the spot is exactly circular, label the image “not cancer”. If the perimeter of the spot is not circular, label the image “cancer”. Testing the Algorithm After setting up and quantifying the features, we want to make sure the AI is actually doing a good job. We’ll take some images the AI hasn’t seen before, called test data. We know the correct answers, but the AI does not. The AI will measure the features within each of the images to provide an educated guess of the proper label. Every time AI gets a label wrong, it will reassess parts of the algorithm. For example, it might make the tweak below: Calculate the perimeter of a darker spot in the image. If the perimeter of the spot is close to circular, label the image “not cancer”. If the perimeter of the spot is not close to circular, label the image “cancer”. Humans play a big part in what kind of scores are acceptable when producing outputs. With cancer screening, we might be very worried about missing a real instance of cancer. Therefore, we might tell the AI to score false negatives more harshly than false positives. Interfacing with AI Finally, AI would not work without an interface. This is where we can get creative. In our skin cancer screening, we might create a website where providers or patients could upload a picture of an area that needs screening. Because skin images could be considered medical data, we would need to think critically about what happens to images after they are uploaded. Are images deleted after a screening prognosis is made? Will images be used to update the training data? Telling people they might have cancer could be very upsetting for them. Our interface should provide supporting resources and clear disclaimers about its abilities. Understanding the AI Spring The “AI Spring” is the period of rapid growth and progress in artificial intelligence starting in the early 2020s. A huge component of the AI Spring is Generative AI, which includes text generation, image creation, natural speech generation, computer code production, biological molecule discovery, and more. In the example above, the AI learns to distinguish between skin conditions based on features and patterns it identifies. Its main goal is to make decisions about someone’s skin condition rather than generating new examples. This is called discriminative AI. Other examples of discriminative AI include: Classifying emails as spam Facial recognition Converting speech-to-text However, let’s imagine we wanted AI to generate examples of skin cancer. If the AI was creating new, realistic images of skin cancer, trying to generate what cancerous lesions might look like, it would be considered generative AI. Examples of generative AI include: Text generated by a chat bot Images created from a text prompt Human sounding voices from an audio clip Generative AI: Creates new, creative things that look like what it has learned. Discriminative AI: Tells things apart or makes decisions based on what it has learned. We’ll talk next about some generative AI models which have made recent breakthroughs possible. Transformer Models Transformers have been especially helpful for text generation. They work like smart readers that can understand context and relationships in language very well. Imagine you’re reading a sentence, and at each word, you want to pay attention to other words to understand the context better. The self-attention mechanism does this very efficiently. It allows the model to focus on different parts of the input (like words in a sentence) simultaneously, capturing long-range dependencies. The model then uses this training to generate new text. Take for example this paragraph from the Wikipedia entry for skin cancer. A transformer model would be able to synthesize the information to understand the relationship between UV exposure, risk factors, and the development of different types of skin cancers for different groups of individuals. It can easily distill the information into themes and topics. More than 90% of cases are caused by exposure to ultraviolet radiation from the Sun.[4] This exposure increases the risk of all three main types of skin cancer.[4] Exposure has increased, partly due to a thinner ozone layer. Tanning beds are another common source of ultraviolet radiation. For melanomas and basal-cell cancers, exposure during childhood is particularly harmful. For squamous-cell skin cancers, total exposure, irrespective of when it occurs, is more important. Between 20% and 30% of melanomas develop from moles.[6] People with lighter skin are at higher risk as are those with poor immune function such as from medications or HIV/AIDS. Diagnosis is by biopsy. Diffusion Models Like transformers, diffusion models are useful for generative AI, particularly image generation. The key to diffusion models is that they have a lot of training in how to fill in the blanks. The model starts with many “noisy” images (imagine a photo with lots of holes or black spots) and tries to reproduce the original image. This process is called “denoising score matching”. It then uses this training to generate entirely new content. Summary In our skin cancer detection example, an AI system required lots of data labeled with information (“cancer” or “not cancer”). An algorithm looked for patterns between these two groups and then provided the results via an interface. This AI is an example of discriminative AI. Since the early 2020s, generative AI has exploded in popularity, assisted by transformer and diffusion models, among other advancements. These technologies have allowed AI to excel at creating new content, by recognizing deeper context and patterns. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-how-ai-works.html", "DISCUSSION How AI Works", " DISCUSSION How AI Works Compare and contrast discriminative vs generative AI. When might each approach be most useful? What are some benefits and limitations of each? Early detection of diseases like skin cancer using AI could help save lives. What challenges or limitations might exist in real-world applications of such a system? How could the interface be designed with care, transparency and privacy in mind? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["demystifying-types-of-ai.html", "Demystifying Types of AI Machine Learning Generative AI Natural Language Processing Strengths and Weaknesses Summary", " Demystifying Types of AI We’ve learned a bit about how AI works. However there are many different types of AI with different combinations of data, algorithms, and interfaces. There are also general terms that are important to know. Let’s explore some of these below. Machine Learning Machine learning is broad concept describing how computers learn from looking at lots of examples. Imagine you are learning to tell the difference between apples and oranges. Someone first has to show you examples and say, “This is an apple, and this is an orange.” Similarly, machine learning approaches need examples of input data that is “labeled” with the correct output. The goal of machine learning is making useful or accurate predictions. Machine learning includes simpler approaches like regression, and more complicated approaches like deep learning. Below are a few examples of machine learning methods. Neural Networks Neural networks are a specific class of algorithms within machine learning. Neural networks mimic the way data is transferred between neurons in the brain. Neural networks organize data into layers, starting with an “input layer” of raw data. Data is then transferred to the next layer, called a “hidden” layer. The hidden layer combines the raw data in many ways to create levels of abstraction. You can think of an image that is very pixelated becoming more clear. Finally, results are produced in an “output layer”. Neural networks often require large amounts of labeled data for training, and their performance may continue to improve with more data. Google uses a neural network to power its search algorithm (AI Team 2023). Neural networks also do a pretty good job of recognizing human handwritten digits. Deep Learning Deep learning refers to neural networks with multiple intermediate “hidden” layers. A neural network with 2+ hidden layers could be considered a deep learning system (AI Team 2023). The advantage of deep learning is that these approaches cluster data automatically, and can detect abstraction or patterns that we might not know ahead of time. This is especially useful for complicated data, like unstructured text or images. Google Translate has used deep learning to accurately translate text since 2016 (Turner 2016). However, generative AI methods started being incorporated in the 2020s (Gu 2023). Many of the machine learning approaches we’re discussing here are supervised learning approaches. This means that data is labeled in predefined categories. An example could be “spam” or “not spam” labels attached to a data set of emails. Sometimes, we are more interested in discovering variation, regardless of how we describe, or label, the data. This is called unsupervised learning. An example of this approach could be clustering human cells based on what kind of genes they have turned on. We don’t know what type of cells they are necessarily, but can group them based on their behavior. Generative AI Artificial Intelligence exploded in the early 2020s due to advancements in Generative AI, which includes text generation, image creation, natural speech generation, computer code production, biological molecule discovery, simulated data, and more. Let’s break down some of the following terms related to generative AI. Transformer Models and Architecture In 2017, Google engineers published a paper, “Attention is all you need”, describing a type of neural network they called a transformer (Vaswani et al. 2017). Transformer architecture has revolutionized the field of natural language processing and led to an explosion in what was possible with AI. Transformers are a key feature of what drives generative AI models today, and have allowed huge leaps forward in language understanding and image processing (Tay et al. 2022). The transformer architecture uses something called self-attention to figure out how important different parts of a sentence are when making predictions. This helps the model understand how words relate to each other in a sentence, regardless of their order in the sentence. Do we say transformer model or transformer architecture? Transformer architecture refers to the overall design, or “transformers” generally speaking. We use the term “transformer model” when dealing with a specific example, such as the GPT (Generative Pre-trained Transformer) model. Large Language Model Large Language Models (LLMs) are a specific type of generative AI model, often built using the transformer architecture, that leverage a huge volume of language data. Examples include models like OpenAI’s GPT (Generative Pre-trained Transformer) series. LLMs are trained on extensive text datasets and can generate coherent and contextually relevant text passages. You might be very familiar with LLMs, as they include super popular tools like ChatGPT, Bard, Claude Instant, and Llama. The process of interpreting a user prompt for a GPT model might go as follows: A user provides a prompt, such as “Describe a nice vacation for winter time.” The encoder translates words into machine-relevant values such as numerical vectors. It also captures semantic relationships. The transformer weighs different parts of the input for better understanding. The hidden layers of the neural network further decipher complex patterns and representations. Decoders generate the output that the user sees. Diffusion Model Diffusion models are a type of deep generative model. They are particularly powerful when it comes to image generation, but can also be used for other generative AI applications, like video generation and molecule design (Yang et al. 2023). The approach behind diffusion models is that they add more and more random noise to images (the “diffusion” process). Noise is then removed to generate the most “likely” novel outputs. The key feature of these models is the denoising process. A very popular diffusion model is used by Stable Diffusion for real-time text-to-image generation. Variational Autoencoders (VAEs) Variational autoencoders are a type of deep generative model. Variational autoencoders emerged slightly earlier than diffusion models (Kingma and Welling 2013). Like diffusion models, they work with data that is noisy and not perfect. Variational autoencoders are trained and generate outputs differently, however. They detect essential features or patterns within inputs and condense them in a more concise and abstract form. This differs from diffusion models, which focus on the process of cleaning up noisy data to make it look like new images, text, etc. Generative Adversarial Networks (GANs) Generative adversarial networks are a type of deep generative model. While the end goals are similar (new generated content), GANs differ in their training and objective. Generative Adversarial Networks work like two computers competing with each other. The first component, the “generator”, creates data, while the second, the “discriminator”, determines if the sample is realistic. Imagine two professionals, one artist specializing in artwork forgery, and one a detective specializing in forgery detection. If they are constantly competing, they will both get better at their respective specialty! The website This Person Does Not Exist (https://thispersondoesnotexist.com/) creates photorealistic headshots of imaginary people using a GAN called StyleGAN2 (Karras et al. 2020). Natural Language Processing Natural language processing, or NLP, deals with interpreting text and extracting relevant information and insights. It is a field of study rather than a type of algorithm. Typically, these systems look at huge volumes of text data to understand the relationship among words, parts of words, or sentences. Natural language processing can also categorize and organize the documents themselves. For example, NLP could help read the contents of documents online and decide whether they are patents or journal articles. These documents could then be indexed in Google Scholar. Initially, NLP was accelerated by techniques such as word vectorization (ODSC 2023). In short, this makes it easier for computers to understand that the words “apple” and “orange” (both fruits) are more closely related than “apple” and “planet” (perhaps both round, but that’s less important). Many NLP approaches also use deep learning (Wikipedia 2023b). Increasingly, generative AI is part of natural language processing (ODSC 2023). Natural language processing has been used to summarize the abundance of text information available in electronic health records. For example, healthcare practitioners showed that detecting evidence and information in records could improve treatment and quality of care for patients with diabetes (Turchin and Florez Builes 2021). Strengths and Weaknesses Here is a summary of some strengths and weaknesses of different concepts in AI. These are handy to keep in mind as you are making decisions about what kind of AI to use in your workplace. Strengths Challenges Machine Learning Makes accurate predictions based on learning from labeled examples; includes a large variety of approaches, including computationally cheap ones Extensive volumes of labeled data might be needed Neural Networks Great for recognizing intricate patterns in data; automatically discovers important features in data Require large datasets; can be computationally intensive Deep Learning Captures complex representations of data, enhancing performance in tasks like image and speech recognition Require large datasets; computationally intensive; difficult for humans to interpret reasoning behind outputs (“black box”), which raises ethical concerns Transformer and Large Language Models Self-attention mechanism enables understanding of context efficiently, pivotal for language understanding and generation Very computationally intensive; difficult for humans to interpret reasoning behind outputs (“black box”) Diffusion Models Great for image generation due to the denoising score matching approach. Very computationally intensive; might only work well for a specific task (e.g., image generation) Summary Neural Networks and Deep Neural Network Learning are both key components of today’s AI. They function like human brains for advanced pattern recognition. Deep Neural Networks are a critical component of generative AI. Transformer architecture is central to many of today’s Large Language Models and allows for rapid processing of context in text. Diffusion models adjust noise to generate new content, such as images. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-ai-types.html", "DISCUSSION AI Types", " DISCUSSION AI Types Discuss some of the strengths and weaknesses of different AI techniques covered in the chapter. When might certain approaches be preferable over others given the available data or task? Reflect on the different definitions and terminology covered regarding AI types. What stood out to you? What questions do you still have? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["what-ai-makes-possible.html", "What AI Makes Possible Advancements in Text Mining Modifying and Generating Text Automating Tedious Processes Idea Generation Planning and Organizing Synthetic Data Generation Text to Speech to Text Interactive Help", " What AI Makes Possible Artificial Intelligence is opening up many possible pathways in many different fields. It has allowed: Advancements in text mining More accurate text modification and generation Automation of tedious tasks Idea generation Planning and organizing Synthetic data generation Text-to-speech and back Interactive help and debugging Let’s explore several broad ways in which AI can be used today. Advancements in Text Mining Text mining is the process of extracting meaningful insights, patterns, and knowledge from unstructured textual data. This data could include articles, documents, emails, social media posts, business records, policy records, and more. This data is digested by a computer into a structured format for analysis, allowing for the discovery of hidden patterns, relationships, and / or summary information. Historically, mining for relevant text had to follow rule-based or statistical methods that required a lot of human oversight. Generative AI has led to many advancements in text mining. Some of these include: Contextual Understanding: Generative AI, especially transformer models, has improved contextual understanding in text mining. AI can consider relationships between words in a sentence more effectively using the self-attention mechanisms available as part of transformers. This results in more accurate extraction of context-dependent information. Text Completion and Generation: Generative AI allows for the completion of partial or missing text. In text mining, this capability is useful for handling incomplete or noisy data, improving the overall quality of mined information. Domain-Specific Language Generation: Generative AI can be fine-tuned for domain-specific language generation. This is particularly beneficial in industries where specialized terminology or jargon is prevalent. By training generative models on domain-specific data, text mining models can better adapt to the nuances of the industry or discipline in general. Examples Text mining can be used for: Mining clinical patient notes to identify patients with similar symptoms Mining extensive technical or financial documents to identify relevant sections more easily Extracting Population Statistics A simplified example of text mining can be seen here with ChatGPT. We will provide some information from the Wikipedia article for the city of Seattle and ask the large language model to extract only relevant statistics related to population growth. Can you extract relevant statistics from this Wikipedia passage related to population growth? Seattle is a seaport city on the West Coast of the United States. It is the seat of King County, Washington. With a 2022 population of 749,256[10] it is the most populous city in both the state of Washington and the Pacific Northwest region of North America, and the 18th most populous city in the United States. The Seattle metropolitan area’s population is 4.02 million, making it the 15th-largest in the United States.[11] Its growth rate of 21.1% between 2010 and 2020 made it one of the country’s fastest-growing large cities.[12] Seattle is situated on an isthmus between Puget Sound (an inlet of the Pacific Ocean) and Lake Washington. It is the northernmost major city in the United States, located about 100 miles (160 km) south of the Canadian border. A major gateway for trade with East Asia, the Port of Seattle is the fourth-largest port in North America in terms of container handling as of 2021.[13] The Seattle area has been inhabited by Native Americans (such as the Duwamish, who had at least 17 villages around Elliot Bay) for at least 4,000 years before the first permanent European settlers.[14] Arthur A. Denny and his group of travelers, subsequently known as the Denny Party, arrived from Illinois via Portland, Oregon, on the schooner Exact at Alki Point on November 13, 1851.[15] The settlement was moved to the eastern shore of Elliott Bay in 1852 and named “Seattle” in honor of Chief Seattle, a prominent 19th-century leader of the local Duwamish and Suquamish tribes. Seattle currently has high populations of Native Americans alongside Americans with strong Asian, African, European, and Scandinavian ancestry, and hosts the fifth-largest LGBT community in the U.S.[16] Here is the response: Certainly! Here are the relevant statistics extracted from the provided passage related to population growth: Population Growth Rate (2010-2020): The growth rate of Seattle between 2010 and 2020 was 21.1%, making it one of the country’s fastest-growing large cities.[12] Current Population (2022): The population of Seattle in 2022 is reported to be 749,256.[10] Metropolitan Area Population: The population of the Seattle metropolitan area is 4.02 million, making it the 15th-largest in the United States.[11] These statistics highlight the significant population growth of Seattle over the specified time period, positioning it as one of the fastest-growing large cities in the United States. In reality, you might be providing a much larger volume of data or be looking for more nuanced information. Modifying and Generating Text Generative AI in the form of Large language Models like ChatGPT and Claude can be especially useful for modifying and generating text. This could mean changing the tone of a piece of writing, creating summaries based on text data input, or making writing more understandable to different audiences. Examples Here are some examples of using AI to modify or generate text: Altering an email to be more empathetic Translating text into another language, otherwise known as machine translation Summarizing multiple files within a project to make a project summary - For example, a README file across many code scripts for software development Summarizing legal documents in a way that is readable to broader audiences with less familiarity with legal jargon Transforming an Angry Email Let’s see this in practice. Take for example this email to a colleague that is really frustrating you right now. You might ask Bard to help you make it more constructive and empathetic. Can you help me transform this email into a more understanding and constructive one? Subject: RE: Proposal Revisions (ARE YOU KIDDING ME?!) Hi Ben, I just read your revisions to the proposal and frankly, it’s an insult to my intelligence and hard work. Did you even bother to skim the original document? Half your changes are nonsensical and the other half seem like you were just trying to sound fancy. This project is riding on my expertise. If you want your name anywhere near this proposal, you’d better come up with something actually useful, not just reword my sentences. And don’t even get me started on the formatting… I expect a complete rewrite by tomorrow morning, and this time, make sure it’s actually worthy of presenting to the client. Don’t waste my time or theirs. Sincerely (not really), Alex Here is the response: Subject: Proposal Revisions - Discussion Needed Hi Ben, Thanks for sending over your revisions to the proposal. I appreciate your taking the time to add your perspective. I do see some areas where our ideas diverge, and I think it would be helpful to discuss them further to ensure we present a unified front to the client. For example, I noticed your revisions to [mention specific section(s)]. While I understand your point about [mention specific reason behind Ben’s revision], I was originally thinking of [explain your rationale briefly]. Similarly, there are a few formatting choices in [mention specific sections] that might benefit from further refinement. Perhaps we could brainstorm some ways to improve visual clarity while maintaining consistency with the client’s branding guidelines. Would you be free for a quick chat sometime this afternoon to discuss these points and find a solution that incorporates both our strengths? I believe that collaborating on this will lead to a stronger proposal overall. Best, Alex We all have our moments of frustration, but AI can help us be better colleagues. By asking Bard to transform this email, we can prevent an uncomfortable and hurtful work situation. Automating Tedious Processes Generative AI can help tackle repetitive processes. You can think of it like an automation assistant. It can be especially useful for automating work that requires conversion of content from one form to another, for example text-to-speech, image-to-text, and so on. Examples Here are some examples of using AI to automate tasks: Writing a draft of code annotations to help you annotate code Generating meeting notes automatically Getting suggestions for ways to deidentify clinical data Generating audio from text for a video voiceover Converting physician dictations from a patient visit to text format Meeting Note Takers No one likes to attend a meeting, only to forget what was being discussed. Fortunately, many AI note takers have entered our world recently. The online meeting platform Zoom has an “AI assistant” that can summarize meetings (Zoom 2023). Companies like fireflies.ai and Otter.ai also offer similar functionality, recording meeting notes, offering summaries, capturing action items, and more. Idea Generation Generative AI can provide templates or scaffolds. In other words, AI can get the creative juices flowing and make it easier to start new content. AI can also help synthesize data to suggest possible new relationships. Diabetes Gene Hypotheses The Diabetes Data and Hypothesis Hub (D2H2, https://d2h2.maayanlab.cloud/) was created to make it easier for biomedical researchers to access and use data related to diabetes (Marino et al. 2023). The Hub incorporated a GPT-style chatbot that allows users to ask questions in plain terms about their data analysis needs. D2H2 also has the ability to generate hypotheses. It randomly selects gene sets related to existing gene activation signatures and compares them with gene sets found in published articles. The AI provides ideas about possible explanations for the similarities between these gene sets. Planning and Organizing You can use generative AI to suggest and break down ways to accomplish a task based on prior task planning. In other words, it can generate a template for events or projects. For example, it can: Plan the agenda for a conference or meeting Plan and assign roles for a new project task list. Meeting Agendas Let’s imagine we are in a leadership role at a research institution. Soon, the institution will be unveiling a new genomic research coordinating center, but we need to make sure the whole executive suite is on board. Let’s see how a large language model might give us a starting point for the meeting agenda: Can you provide a sample agenda for an executive meeting, where the goal is to reveal a new genomic research coordinating center at our institution? Here is the response: Executive Meeting Agenda - Unveiling the New Genomic Research Coordinating Center Meeting Date: [Date] Meeting Time: [Time] Meeting Location: [Location] Attendees: [List executive team members and other key stakeholders] Introduction & Welcome (5 minutes) Welcome by [Host/Chair] Brief overview of meeting goals and agenda Genomic Research Landscape & Challenges (10 minutes) Presentation by [Speaker]: State of the art in genomic research Current challenges in coordination and collaboration Economic and societal impact of advancements in genomics Introducing the New Genomic Research Coordinating Center (20 minutes) Presentation by [Center Director]: Center’s mission, vision, and core values Key services and capabilities offered by the center How the center addresses existing challenges and fosters collaboration Examples of ongoing or planned research initiatives Center Operations & Budget (15 minutes) Presentation by [Center Administrator]: Funding sources and budget allocation Staffing plan and key personnel introductions Technology infrastructure and resources Partnerships and collaborations with external institutions Open Discussion & Q&A (20 minutes) Encourage executives to ask questions and seek clarification Address any concerns or potential roadblocks Gather feedback and suggestions for the center’s future Next Steps & Action Items (10 minutes) Summarize key takeaways and action items Assign clear responsibilities and timelines for next steps Outline communication plan for broader institutional announcement Closing Remarks (5 minutes) Express gratitude for executive support and engagement Reiterate the center’s potential impact and contributions Positive outlook on the future of genomic research at the institution Optional Items - Depending on time constraints, you may consider adding a brief tour of the center facilities or including short demos of key technologies. - Prepare handouts with key information about the center, including contact details and resources. - Consider a light breakfast or refreshments to encourage engagement and networking. Remember to adjust the agenda based on your specific needs and priorities. The key is to ensure a clear, concise, and informative presentation of the new center, while also addressing any potential concerns and fostering enthusiasm among the executive team. I hope this sample agenda helps you prepare for a successful meeting! The above gives us a good place to start, so we aren’t staring at a blank page. Of course, it’s important to tailor responses to meet your needs. However, this could get us started, ensuring that we use time in the meeting effectively. As long as the information is not sensitive or proprietary, you could also give the large language model more context. This will help it tailor the response better. Synthetic Data Generation For more technical needs, AI can help generate synthetic data to experiment with. For example, it can help with sparse data, propose/test models for data that does not yet exist, or generate toy data for development and testing. We might be interested in who drinks the most coffee in the office, but we don’t have data yet. Before interviewing all of our colleagues, we might want to test that we can actually do the analysis. Can you create a toy time series dataset where we track individuals, time points, and coffee consumption? Here is the response: Here is a toy time series dataset tracking individuals, time points, and coffee consumption: ID Time_point Coffee_cups 1 1 2 1 2 3 1 3 1 2 1 4 2 2 2 2 3 3 3 1 1 3 2 0 3 3 2 This tracks 3 individuals over 3 time points (days) and their daily coffee consumption in cups. Individual 1 drank 2 cups on day 1, 3 cups on day 2, and 1 cup on day 3. Individual 2 drank 4 cups on day 1, 2 cups on day 2, and 3 cups on day 3. Individual 3 drank 1 cup on day 1, 0 cups on day 2, and 2 cups on day 3. Text to Speech to Text Generative AI is great at converting text to realistic sounding speech. It can also listen to you dictate and convert that speech into text. Many of the AI tools that do this are commercial and cost money. For example, companies like Eleven Labs, Natural Reader, and Speechify will read text to you, but typically cost money to use. Both Text-to-Speech, and Speech-to-Text models benefit from specific training. This is because there are likely to be acronyms and jargon specific to your industry or institution. Interactive Help Finally, we think AI is a great tool for interactive help and debugging, especially for programming. If you want to learn more about how this works, check out our other course AI for Efficient Programming on Coursera and on the web. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-ai-possibilities.html", "DISCUSSION AI Possibilities", " DISCUSSION AI Possibilities How could the various applications of AI discussed help improve life in your industry/field of work or personal life? Are there any specific problems they could help address? Do you see any risks or downsides to organizations and individuals becoming overly reliant on automated tools and AI assistants? How should we strive for a healthy human-AI relationship? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["ground-rules-for-ai.html", "Ground Rules for AI", " Ground Rules for AI The rapidly changing AI landscape has brought unexpected ethical challenges. To promote benefit over harm, we suggest following these AI use and development guidelines: Recognize guidelines. Today, there are some guidelines for ethical use. More will be developed. It is advised that you stay up-to-date on industry-specific guidelines. Consider consequences. Think about possible downstream unintended consequences for using AI. This could be in the context of creating content or developing new AI tools. Acknowledge shortcomings. AI is not perfect. It makes mistakes, is not necessarily superior to humans, and should be used as intended and trained. It is also only as up-to-date as its training data. While humans are very good at generalizing knowledge for different contexts, AI systems can sometimes struggle with this. Human oversight is needed for important and consequential uses. Understand bias. Realize that AI often perpetuates bias. AI is created using data generated by humans, and that data can be biased. It is important to use inclusive datasets and seek expert advice. Promote access. Promote equitable access to AI. Differences in access could worsen existing disparities, or create new ones. Think securely. AI poses security and privacy threats. AI needs to be used and developed carefully with these aspects in mind. Do not use proprietary or private information as prompts for consumer AI tools unless it was specifically designed for private data. Understand costs. AI could exacerbate global climate change and human welfare disparities. Developers should be considerate about their computation needs and not use larger than necessary datasets. Workers who label and curate datasets should be compensated appropriately. Be transparent. Users should be transparent about their use of AI tools. It makes it easier to locate the source of issues. It also helps to uplift human contributions to work and art. Credit sources. When developing tools, be transparent about what data you used to create your AI systems. Be careful not to use work or data from individuals who did not consent to it being used in such ways. Work thoughtfully. Ramp up AI projects gradually to identify unexpected behaviors or impacts before full deployment. Starting slowly enables recognition and resolution of issues. Acknowledge complexity. Recognize that if AI systems use overly complex models, it can be difficult to trace how decisions are made using them. Diversify usage. Check the consistency of results using multiple AI tools and timepoints, where possible. Keep learning. Educate yourself and others. To comply with ethical standards, users must be educated about best use practices. If you help set standards for an institution or group, it strongly advised that you carefully consider how to educate individuals about those standards of use. To learn more about how to responsibly use and develop AI, check out the following minicourse about Avoiding AI harm. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["ai-possibilities-case-studies.html", "AI Possibilities Case Studies Financial Forecasting", " AI Possibilities Case Studies The following are case studies that can help us conceptualize AI in the real world. Financial Forecasting In this case study, we will look at how artificial intelligence has been utilized in governmental financial services. National banks, such as the Federal Reserve of United States and the European Central bank of the European Union, have started to explore how Artificial Intelligence can be used for data mining and economic forecast prediction. There are many uses of AI for improving financial institutions, each with potential benefits and risks. Most financial institutions weigh the benefits and risks carefully before implementation. For instance, if a financial institution takes a high-risk prediction seriously, such as predicting a financial crisis or a large recession, then it would have huge impact on a bank’s policy and allows the bank to act early. However, many financial institutions are hesitant to take action based on artificial intelligence predictions because the prediction is for a high-risk situation. If the prediction is not accurate then there can be severe consequences. Additionally, data on rare events such as financial crises are not abundant, so researchers worry that there is not enough data to train accurate models (Nelson 2023). Many banks prefer to pilot AI for low-risk, repeated predictions, in which the events are common and there is a lot of data to train the model on. Let’s look at a few examples that illustrate the potential benefits and risks of artificial intelligence for improving financial institutions. Categorizing Businesses An important task in analysis of economic data is to classify business by institutional sector. For instance, given 10 million legal entities in the European Union, they need to be classified by financial sector to conduct downstream analysis. In the past, classifying legal entities was curated by expert knowledge (Moufakkir 2023). Text-based analysis and machine learning classifiers, which are all considered AI models, help reduce this manual curation time. An AI model would extract important keywords and classify into an appropriate financial sector, such as “non-profits”, “small business”, or “government”. This would be a low-risk use of AI, as one could easily validate the result to the true financial sector. Incorporating new predictors for forecasting Banks are considering expanding upon existing traditional economic models to bring in a wider data sources, such as pulling in social media feeds as an indicator of public sentiment. The National bank of France has started to use social media information to estimate the public perception of inflation. The Malaysian national bank has started to incorporate new articles into its financial model of gross domestic product estimation. However, the use of these new data sources may may raise questions about government oversight of social media and public domain information (OMFIF 2023). Using Large Language Models to predict inflation The US Federal Reserve has researched the idea of using pre-trained large language models from Google to make inflation predictions. Usually, inflation is predicted from the Survey of Professional Forecasters, which pools forecasts from a range of financial forecasts and experts. When compared to the true inflation rate, the researchers found that the large language models performed slightly better than the Survey of Professional Forecasters (Federal Reserve Bank of St. Louis 2023). A concern of using pre-trained large language models is that the data sources used for model training are not known, so the financial institution may be using data that is not in line with its policy. Also, a potential risk of using large language models that perform similarly is the convergence of predictions. If large language models make very similar predictions, banks would act similarly and make similar policies, which may lead to financial instability (OMFIF 2023). Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["introduction-to-avoiding-ai-harm.html", "Introduction to Avoiding AI Harm Motivation Target Audience Curriculum Learning Objectives", " Introduction to Avoiding AI Harm This course aims to help you recognize some of the potential consequences of using or developing AI tools. Some of this content was adapted from our course on AI for Efficient Programming. If you intend to use AI for writing code, we recommend that you review this content for a deeper dive into ethics specifically for writing code with generative AI. Motivation The use of artificial intelligence (AI) and in particular, generative AI, has raised a number of ethical concerns. We will highlight several current concerns, however please be aware that this is a dynamic field and the possible implications of this technology is continuing to develop. It is critical that society continue to evaluate and predict what the consequences of the use of AI will be, so that we can try to mitigate harmful effects. Target Audience This course is intended for leaders who might make decisions about AI at nonprofits, in industry, or academia. They may have an interest to use or develop AI tools. Curriculum This course provides a brief introduction about ethical concepts to be aware of when making decisions about AI, as well as real-world examples of situations that involved ethical challenges. The course is largely focused on generative AI considerations, although some of the content will also be applicable to other types of AI applications. The course will cover: Possible societal impacts of AI Guidelines for using AI training and testing data (optional) Concerns to be aware of for AI algorithms Strategies to adhere to AI codes of ethics Concepts for consent with AI IDARE principles (Inclusion, Diversity, Anti-Racism, and Equity) with AI A proposed process for ethical AI use and development Learning Objectives We will demonstrate how to: Describe key ethical concerns for using AI tools Discuss why human evaluation and monitoring is important and necessary Explain why AI should be thought of as a better computer, not a human replacement Discuss the potential benefits of being transparent about the use of AI tools Recognize real-world examples of AI usage that has resulted in ethical debate Identify possible mitigation strategies for major ethical concerns with regard to the algorithms underlying AI tools Describe practices that can help you to adhere to more responsible AI use and development Identify concepts and strategies for promoting social justice in AI use and development Discuss nuances involved with consent in the use of AI Describe a possible process for reflecting on ethical AI use and development Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["societal-impact.html", "Societal Impact Guidelines for Responsible Development and Use of AI. Major Ethical Considerations Intentional and Inadvertent Harm Replacing Humans Inappropriate Use and Lack of Oversight Bias Perpetuation and Disparities Security and Privacy Issues Climate Impact Tips for reducing climate impact Transparency Summary", " Societal Impact There is the potential for AI to dramatically influence society. It is our responsibility to proactively think about what uses and impacts we consider to be useful and appropriate and those we consider harmful and inappropriate. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Guidelines for Responsible Development and Use of AI. There are currently several guidelines for the responsible use and development of AI: United States Blueprint for an AI Bill of Rights United States Executive Order on the Safe, Secure, and Trustworthy Development and Use of Artificial Intelligence United States National Institute of Standards and Technology (NIST): AI Risk Management Framework European Commission Ethics Guidelines for trustworthy AI European Union AI Act United Kingdom National AI Strategy The Institute of Electrical and Electronics Engineers (IEEE) Ethically Aligned Design Version 2 As this is an emerging technology, more guidelines will be developed and updated as the technology evolves. When you read this, more guideline and updates are likely to be available. It is important to be aware of the current ethical guidelines and regulations for your respective field. Major Ethical Considerations In this chapter we will discuss the some of the major ethical considerations in terms of possible societal consequences for the use or development of AI tools: Intentional and Inadvertent Harm - Data and technology intended to serve one purpose may be reused by others for unintended purposes. How do we prevent intentional harm? Replacing Humans - AI tools can help humans, but they are not a replacement. Humans are still much better at generalizing their knowledge to other contexts (Sinz et al. (2019)). Also studies suggests that humans value content and objects created by humans more than that of AI when it relates to abstract thought or unique work (Bellaiche et al. (2023), Granulo, Fuchs, and Puntoni (2021)). Inappropriate Use and Lack of Oversight - There are situations in which using AI might not be appropriate now or in the future. A lack of human monitoring and oversight can result in harm. Bias Perpetuation and Disparities - AI models are built on data and code that were created by biased humans, thus bias can be further perpetuated by using AI tools. In some cases bias can even be exaggerated. This combined with differences in access may exacerbate disparities. Security and Privacy Issues - Data for AI systems should be collected in an ethical manner that is mindful of the rights of the individuals the data comes from. Data around usage of those tools should also be collected in an ethical manner. Commercial tool usage with proprietary or private data, code, text, images or other files may result in leaked data not only to the developers of the commercial tool, but potentially also to other users. Climate Impact - As we continue to use more and more data and computing power, we need to be ever more mindful of how we generate the electricity to store and perform our computations. Transparency - Being transparent about what AI tools you use where possible, helps others to better understand how you made decisions or created any content that was derived by AI, as well as the possible sources that the AI tools might have used when helping you. It may also help with future unknown issues related to the use of these tools. Keep in mind that some fields, organizations, and societies have guidelines or requirements for using AI, like for example the policy for the use of large language models for the International Society for Computational Biology. Be aware of the requirements/guidelines for your field. Note that this is an incomplete list; additional ethical concerns will become apparent as we continue to use these new technologies. We highly suggest that users of these tools be careful to learn more about the specific tools they are interested in and to be transparent about the use of these tools, so that as new ethical issues emerge, we will be better prepared to understand the implications. Intentional and Inadvertent Harm AI tools need to be developed with safeguards and continually audited to ensure that the AI system is not responsive to harmful requests by users. With additional usage and updates, AI tools can adapt and thus continual auditing is required. Of course using AI to help you perform a harmful action would result in intentional harm. This may sound like an obvious and easy issue to avoid, at least by those with good intent. However, the consequences may be much further reaching than might be first anticipated. Perhaps you or your company develop an AI tool that helps to identify individuals that might especially benefit from a product or service that you offer. This in and of itself is likely not harmful. However, the data you have used, the data that you may have collected, and the tool that you have created, all could be used for other malicious reasons, such as targeting specific groups of people for advertisements when they are vulnerable. Therefore it is critical that we be considerate of the downstream consequences of what we create and what might happen if that technology or data was used for other purposes. Tips for avoiding inadvertent harm For decision makers about AI use: Consider how the content or decisions generated by an AI tool might be used by others. Continually audit how AI tools that you are using are preforming. Do not implement changes to systems or make important decisions using AI tools without AI oversight. For decision makers about AI development: Consider newly developed AI tools might be used by others. Continually audit AI tools to look for unexpected and potentially harmful or biased behavior. Be transparent with users about the limitations of the tool and the data used to train the tool. Caution potential users about any potential negative consequences of use Replacing Humans While AI systems are useful, they do not replace human strengths. While AI systems are good at synthesizing lots of data, humans remain far superior at generalizing concepts to new contexts (Sinz et al. (2019)). AI systems should be thought of as better computers as opposed to replacements for humans. While there are some contexts in which human labor has already been replaced by robotics and AI, studies show that humans tend to prefer human-made goods when those goods are not strictly functional (Bellaiche et al. (2023), Granulo, Fuchs, and Puntoni (2021)). It has been proposed that there will be radical shifts in the way that humans work in many fields including health care, banking, retail, security, and more (Selenko et al. (2022)). Yet we need to implement changes gradually to allow for time to better understand the consequences and mindfully consider how such changes impact human employment and well-being. Selenko et al. (2022) have proposed a framework for considering the impact of AI usage on human workers to promote benefit and avoid harm. It suggests considering usage in a few different ways: AI for complementing work, AI for replacing tasks, and AI for generating new tasks. It suggests considering how such usages might reduce tedious or dangerous work, while also preserving work-related benefits such as self-esteem, belonging, and perceived meaningfulness. See here for the article. Example 1 AI might become much more prominent in the field of journalism and may help deliver more rapidly, deliver news from dangerous locations, and possibly even create content less biased politically or otherwise if the models are specifically trained to be objective (Latar (2015)). Yet, larger usage of AI in journalism also poses additional risks of misinformation, infiltration by outsiders, and a lack of human values if the usage lacks appropriate and sufficient human oversight. “robot journalist story writers will have instant access to new insights and information, and their new ability to compose the story and publish it in seconds may cause human journalists to become obsolete. This is alarming, as no robot journalists can replace human journalists as the guardians of democracy and human rights.” (Latar (2015)) “This potential threat to the profession of human journalism is viewed by some optimistic journalists merely as another tool that will free them of the necessity to conduct costly and, at times, dangerous investigations. The robot journalists will provide them, so the optimists hope, with an automated draft for a story that they will edit and enrich with their in-depth analysis, their perspectives and their narrative talents. The more pessimistic journalists view the new robot journalists as a real threat to their livelihood and style of working and living. Computer science is a field that has historically lacked diversity. It is also critical that we support diverse new learners of computer science, as we will continue to need human involvement in the development and use of AI tools. This can help to ensure that more diverse perspectives are accounted for in our understanding of how these tools should be used responsibly. Tips for supporting human contributions For decision makers about AI use: Avoid thinking that content by AI tools must be better than that created by humans, as this is not true (Sinz et al. (2019)). Recall that humans wrote the code to create these AI tools and that the data used to train these AI tools also came from humans. Many of the large commercial AI tools were trained on websites and other content from the internet. Be transparent where possible about when you do or do not use AI tools, give credit to the humans involved as much as possible. Make decisions about using AI tools based on ethical frameworks in terms of considering the impact on human workers. For decision makers about AI development: Be transparent about the data used to generate tools as much as possible and provide information about what humans may have been involved in the creation of the data. Make decisions about creating AI tools based on ethical frameworks in terms of considering the impact on human workers. A new term in the medical field called AI paternalism describes the concept that doctors (and others) may trust AI over their own judgment or the experiences of the patients they treat. This has already been shown to be a problem with earlier AI systems intended to help distinguish patient groups. Not all humans will necessarily fit the expectations of the AI model if it is not very good at predicting edge cases (Hamzelou n.d.). Therefore, in all fields it is important for us to not forget our value as humans in our understanding of the world. Inappropriate Use and Lack of Oversight There are situations in which we may, as a society, not want an automated response. There may even be situations in which we do not want to bias our own human judgment by that of an AI system. There may be other situations where the efficiency of AI may also be considered inappropriate. While many of these topics are still under debate and AI technology continues to improve, we challenge the readers to consider such cases given what is currently possible and what may be possible in the future. Some reasons why AI may not be appropriate for certain situation include: Despite the common misconception that AI systems have clearer judgment than humans, they are in fact typically just as prone to bias and sometimes even exacerbate bias (Pethig and Kroenung (2023)). There are some very mindful researchers working on these issues in specific contexts and making progress where AI may actually improve on human judgment, but generally speaking AI systems are currently typically biased and reflective of human judgment but in a more limited manner based on the context in which they have been trained. AI systems can behave in unexpected ways (Gichoya et al. (2022)). Humans are still better than AI at generalizing what they learn for new contexts (Sinz et al. (2019)). Humans can better understand the consequences of discussions from a humanity standpoint. Some examples where it may be considered inappropriate for AI systems to be used (even with human involvement) include: In the justice system to determine if someone is guilty of a crime or to determine the punishment of someone found guilty of a crime. It may be considered inappropriate for AI systems to be used in certain warfare circumstances. Additionally there are many contexts in which using AI without human intervention could be very problematic including: Diagnosis of disease for patients - Delivering this news should likely come from a human. Secondly, the stakes for errors in the AI system could be very high. What if the system works poorly occasionally for certain individuals? What if the system starts behaving strangely? What if a patient with an unusual situation comes in that the AI system can’t work well for? Even for seemingly benign uses, if humans do not intervene, it is possible that negative consequences could occur if the system starts working poorly or unusually. Example 2 Real-World Example Uber drivers in India experienced issues with the facial recognition technology for logging into the App. This caused many drivers to get locked out of their accounts temporarily or permanently resulting in a reduction in their capacity to work and earn a living (Bansal (2022)). Read more about this in this article. Tips for avoiding inappropriate uses and lack of oversight For decision makers about AI use: Stay up-to-date on current laws, practices, and standards for your field, especially for high-risk uses. Stay up-to-date on the news for how others have experienced their use of AI. Stay involved in discussions about appropriate uses for AI, particularly for policy. Begin using AI slowly and iteratively to allow time to determine the appropriateness of the use. Some issues will only be discovered after some experience. Involve a diverse group of individuals in discussions of intended uses to better account for a variety of perspectives. Seek outside expert opinion whenever you are unsure about your AI use plans. Consider AI alternatives if something doesn’t feel right. For decision makers about AI development: Be transparent with users about the potential risks that usage may cause. Stay up-to-date on current laws, practices, and standards for your field, especially for high-risk uses. Stay up-to-date on the news for how others may have experienced problems using AI. Stay involved in discussions about appropriate uses for AI, particularly for policy. Involve a diverse group of individuals in development to better account for a variety of perspectives. Seek outside expert opinion whenever you are unsure about your AI development plans. Consider AI alternatives if something doesn’t feel right. Design tools with safeguards to stop users from requesting harmful or irresponsible uses. Design tools with responses that may ask users to be more considerate in the usage of the tool. Bias Perpetuation and Disparities One of the biggest concerns is the potential for AI to further perpetuate bias. AI systems are trained on data created by humans. If this data used to train the system is biased (and this includes existing code that may be written in a biased manner), the resulting content from the AI tools could also be biased. This could lead to discrimination, abuse, or neglect for certain groups of people, such as those with certain ethnic or cultural backgrounds, genders, ages, sexuality, capabilities, religions or other group affiliations. It is well known that data and code are often biased (Belenguer 2022). The resulting output of AI tools should be evaluated for bias and modified where needed. Please be aware that because bias is intrinsic, it may be difficult to identify issues. Therefore, people with specialized training to recognize bias should be consulted. It is also vital that evaluations be made throughout the software development process of new AI tools to check for and consider potential perpetuation of bias. Because of differences in access to technology, disparities may be further exacerbated by the usage of AI tools. Consideration and support for under-served populations will be even more necessary. For example tools that only work well on individuals with light skin, will lead to further challenges to some individuals. Developing and scaling-up artificial intelligence-based innovations for use in low- and middle-income countries will thus require deliberate efforts to generate locally representative training data (Paul and Schaefer (2020)). In the flip side, AI has the potential if used wisely, to reduce health inequities by potentially enabling the scaling and access to expertise not yet available in some locations. Tips for avoiding bias For decision makers about AI use: Be aware of the biases in the data that is used to train AI systems. Check what data was used to train the AI tools that you use where possible. Tools that are more transparent are likely more ethically developed. Check if the developers of the AI tools you are using were/are considerate of bias issues in their development where possible. Tools that are more transparent are likely more ethically developed. Consider the possible outcomes of the use of content created by AI tools. Consider if the content could possibly be used in a manner that will result in discrimination. For decision makers about AI development: Check for possible biases within data used to train new AI tools. Are there harmful data values? Examples could include discriminatory and false associations. Are the data adequately inclusive? Examples could include a lack of data about certain ethnic or gender groups or disabled individuals, which could result in code that does not adequately consider these groups, ignores them all together, or makes false associations. Are the data of high enough quality? Examples could include data that is false about certain individuals. Evaluate the code for new AI tools for biases as it is developed. Check if any of the criteria for weighting certain data values over others are rooted in bias. Continually audit the code for potentially biased responses. Potentially seek expert help. Be transparent with users about potential bias risks. Consider the possible outcomes of the use of content created by newly developed AI tools. Consider if the content could possibly be used in a manner that will result in discrimination. See Belenguer (2022) for more guidance. We also encourage you to check out the following video for a classic example of bias in AI: For further details check out this course on Coursera about building fair algorithms. We will also describe more in the next section. Security and Privacy Issues Security and privacy are a major concern for AI usage. Here we discuss a few aspects related to this. Use the right tool for the job There are three kinds of commercial AI tools (Nigro (2023)): Consumer tools (likely not private/secure) Enterprise tools (can be secure with the right legal agreements in place) Open source tools (depends on where you use them and whether you control the computers they run on) Public commercial AI tools are often not designed to protect users from unknowingly submitting prompts that include propriety are private information. Different AI tools have different practices in terms of how they do or do not collect data about the prompts that people submit. They also have different practices in terms of if they reuse information from prompts to other users. Note that the AI system itself may not be trained on responses for how prompt data is collected or not. So asking the AI system may not give accurate answers. Thus if users of public AI tools, such as ChatGPT submit prompts that include propriety or private information, they run the risk of that information being viewable not only by the developers/maintainers of the AI tool used, but also by other users who use that same AI tool. AI can have security blind spots Furthermore, AI tools are not always trained in a way that is particularly conscious of data security. If for example, code is written using these tools by users who are less familiar with coding security concerns, protected data or important passwords may be leaked within the code itself. AI systems may also utilize data that was actually intended to be private. Data source issues It is also important to consider what data the responses that you get from a commercial AI tool might actually be using. Are these datasets from people who consented to their data being used in this manner? If you are generating your own tools, did people consent for their data to be used as you intend? Data privacy is a major issue all on it’s own: 98% of Americans still feel they should have more control over the sharing of their data (Pearce (2021)) It is important to follow legal and ethical guidance around the collection of data and to use tools that also abide by these guidelines. Tips for reducing security and privacy issues For decision makers about AI use: Check that no sensitive data, such as Personal Identifiable Information (PII) or propriety information becomes public through prompts to consumer AI systems or systems not designed or set up with the right legal agreements in place for sensitive data. Consider purchasing a license for a private AI system if needed or create your own if you wish to work with sensitive data (seek expert guidance to determine if the AI systems are secure enough). Ask AI tools for help with security when using consumer tools, but to not rely on them alone. In some cases, consumer AI tools will even provide little guidance about who developed the tool and what data it was trained on, regardless of what happens to the prompts and if they are collected and maintained in a secure way. Promote regulation of AI tools by voting for standards where possible. Possible Generative AI Prompt: Are there any methods that could be implemented to make this code more secure? For decision makers about AI development: Consult with an expert about data security if you want to design or use a AI tool that will regularly use private or propriety data. Be clear with users about the limitations and security risks associated with tools that you develop. Promote regulation of AI tools by voting for standards where possible. Possible Generative AI Prompt: Are there any possible data security or privacy issues associated with the plan you proposed? Climate Impact AI can help humans to innovate ways to improve efficiency and to devise strategies to help mitigate climate issues (Jansen et al. (2023); Cowls et al. (2023)). Importantly this needs to be done in a manner with social justice in mind, as often those that have the least resources deal with climate issues are also the most likely to be impacted (Jansen et al. (2023); Bender et al. (2021)). A few organizations are working on supporting the use of AI for climate crises mitigation uses such as: AI for the Plane: https://www.aifortheplanet.org/en Climate Change AI (CCAI): https://www.climatechange.ai/about However, AI also poses a number of climate risks (Bender et al. (2021); Hulick (2021); Jansen et al. (2023); Cowls et al. (2023)) . The data storage and computing resources needed for the development of AI tools could exacerbate climate challenges (Bender et al. (2021)) If not designed carefully, AI could also spread false solutions for climate crises or promote inefficient practices (Jansen et al. (2023)). Differences in access to AI technologies may exacerbate social inequities related to climate (Hulick (2021)) Tips for reducing climate impact For decision makers about AI use: Where possible use tools that are transparent about resource usage and that identify how they have attempted to improve efficiency For decision makers about AI development: Modify existing models as opposed to unnecessarily creating new models from scratch where possible. Avoid using models with datasets that are unnecessarily large (Bender et al. (2021)) Solutions such as federated learning, where AI models are iteratively trained in multiple locations using data at those locations, instead of collectively sharing the data to create more massive datasets can help reduce the required resources and also help preserve data privacy and security. Use emerging tools and guidelines to estimate and monitor the resource usage involved in training models (Castaño Fernández (2023)). Be transparent about resources used to train models (Castaño Fernández (2023)). Utilize data storage and computing options that are designed to be more environmentally conscious options, such as solar or wind power generated electricity. Transparency In the United States Blueprint for the AI Bill of Rights, it states: You should know that an automated system is being used and understand how and why it contributes to outcomes that impact you. This transparency is important for people to understand how decisions are made using AI, which can be especially vital to allow people to contest decisions. It also better helps us to understand what AI systems may need to be fixed or adapted if there are issues. Tips for being transparent For decision makers about AI use: Where possible include the AI tool and version that you may be using and why so people can trace back where decisions or content came from Use tools that are transparent about what data was used where possible For decision makers about AI development: Providing information about what training data was or methods used to develop new AI models can help people to better understand why it is working in a particular Summary Here is a summary of all the tips we suggested: Be mindful of how content created with AI or AI tools may be used for unintended purposes. Be aware that humans are still better at generalizing concepts to other contexts (Sinz et al. (2019)). Always have expert humans review content created by AI and value human contributions and thoughts. Carefully consider if an AI solution is appropriate for your context. Be aware that AI systems are biased and their responses are likely biased. Any content generated by an AI system should be evaluated for potential bias. Be aware that AI systems may behave in unexpected ways. Implement new AI solutions slowly to account for the unexpected. Test those systems and try to better understand how they work in different contexts. Be aware of the security and privacy concerns for AI, be sure to use the right tool for the job and train those at your institute appropriately. Consider the climate impact of your AI usage and proceed in a manner makes efficient use of resources. Be transparent about your use of AI. Overall, we hope that awareness of these concerns and the tips we shared will help us all use AI tools more responsibly. We recognize however, that as this is emerging technology and more ethical issues will emerge as we continue to use these tools in new ways. Staying up-to-date on the current ethical considerations will also help us all continue to use AI responsibly. References "],["algorithm-considerations.html", "Algorithm considerations Harmful or Toxic Responses Lack of Interpretability Misinformation and Faulty Responses Summary", " Algorithm considerations In this chapter we will discuss the some of the major ethical considerations regarding the algorithms underlying AI tools. We will provide some tips for how to deal with these issues that may be useful for creating AI guidelines at your institution. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Toxic Responses - Currently it is not clear how well generative AI models restrict harmful responses in terms of ideas, code, text, etc. Lack of Interpretability - When complicated algorithms are used within AI systems, it can be unclear how it came up with a decision. In many circumstances it is necessary to understand how the AI system works to know how to proceed. Misinformation and Faulty Responses - Fake or manipulated data used to help design algorithms could be believed to be correct and this could be further propagated. Text, code, etc. provided to users may not be correct or optimal for a given situation, and may have at times severe downstream consequences. Note that this is an incomplete list; additional ethical concerns will become apparent as we continue to use these new technologies. We highly suggest that users of these tools careful to learn more about the specific tools they are interested in and to be transparent about the use of these tools, so that as new ethical issues emerge, we will be better prepared to understand the implications. Harmful or Toxic Responses One major concern is the use of AI to generate malicious content. Secondly the AI itself may accidentally create harmful responses or suggestions. For instance, AI could start suggesting the creation of code that spreads malware or hacks into computer systems. Another issue is what is called “toxicity”, which refers to disrespectful, rude, or hateful responses (Nikulski (2021)). These responses can have very negative consequences for users. Ultimately these issues could cause severe damage to individuals and organizations, including data breaches and financial losses. AI systems need to be designed with safeguards to avoid harmful responses, to test for such responses, and to ensure that the system is not infiltrated by additional possibly harmful parties. Tips for avoiding the creation of harmful content For decision makers about AI use: Be careful about what commercial tools you employ, they should be transparent about what they do to avoid harm. Be careful about the context in which you might have people use AI - will they know how to use it responsibly? Be careful about what content you share publicly, as it could be used for malicious purposes. Consider how the content might be used by others unintended purposes. Ask the AI tools to help you, but do not rely on them alone. Possible Generative AI Prompt: What are the possible downstream uses of this content? Possible Generative AI Prompt: What are some possible negative consequences of using this content? For decision makers about AI development: If designing a system, ensure that best practices are employed to avoid harmful responses. This should be done during the design process and should the system should also be regularly evaluated. Some development systems such as Amazon Bedrock have tools for evaluating toxicity to test for harmful responses. Although such systems can be helpful to automatically test, evaluation should also be done directly by humans. Consider how the content from AI tools that you design might be used by others for unintended purposes. Monitor your tools for unusual and harmful responses. Lack of Interpretability There is risk in using AI tools, that we may encounter situations where it is unclear why the AI system came to a particular result. AI systems that use more complicated algorithms can make it difficult to trace back the decision process of the algorithm. Using content created or modified by AI, could make it difficult for others to understand if the content is adequate or appropriate, or to identify and fix any issues that may arise. This could result in negative consequences, such as for example reliance on a system that distinguishes consumers or patients based on an arbitrary factor that is actually not consequential. Decisions based on AI responses therefore need to be made extra carefully and with clarity about why the AI system may be indicating various trends or predictions. Tips for avoiding a lack of interpretability For decision makers about AI use: Content should be reviewed by those experienced in the given field. Ask AI tools to help you understand how it got to the response that it did, but get expert assistance where needed. Always consider how an AI system derived a decision if the decision is being used for something that impacts humans Possible Generative AI Prompt: Can you explain how you generated this response? For decision makers about AI developers: New AI tools should be designed with interpretability in mind, simpler models may make it easier to interpret results. Responses from new tools should be reviewed by those experienced in the given field. Provide transparency to users about how new AI tools generally create responses. Misinformation and Faulty Responses AI tools use data that may contain false or incorrect information and may therefore respond with content that is also false or incorrect. This is due to number of reasons: AI tools may “hallucinate” fake response based on artifacts of the algorithm AI tools may be trained on data that is out-of-date AI tools may be trained on data that has fake or incorrect information AI tools are not necessarily trained for every intended use and may therefore may not reflect best practices for a given task or field AI tools may also report that fake data is real, when it is in fact not real. For example, currently at the time of the writing of this course, older versions of ChatGPT will report citations with links that are not always correct and it doesn’t seem to be able to correct itself very well when challenged. Furthermore, AI models can “hallucinate” incorrect responses based on artifacts of the algorithm underneath the tool. These responses are essentially made up by the tool. It is difficult to know when a tool is hallucinating especially if it is a tool that you did not create, therefore it is important to review and check responses from AI tools. There is also a risk that content written with AI tools, may be incorrect or inappropriate for the given context of intended use, or they may not reflect best practices for a given context or field. The tools are limited to the data they were trained on, which may not reflect your intended use. It is also important to remember that content generated by AI tools is not necessarily better than content written by humans. Additionally review and auditing of AI-generated content by humans is needed to ensure that they are working properly and giving expected results. Tips for reducing misinformation & faulty responses For decision makers about AI use: Be aware that some AI tools currently make up false information based on artifacts of the algorithm called hallucinations or based on false information in the training data. Do not assume that the content generated by AI is real or correct. Realize that AI is only as good or up-to-date as what it was trained on, the content may be generated using out-of-date data. Look up responses to ensure it is up-to-date. In many cases utilizing multiple AI tools can help you to cross-check the responses (however be careful about the privacy of each tool if you use any private or propriety data in your prompts!). Ask the AI tools for extra information about if there are any potential limitations or weaknesses in the responses, but keep in mind that the tool may not be aware of issues and therefore human review is required. The information provided by the tool can however be a helpful starting point. Possible Generative AI Prompt: Are there any limitations associated with this response? Possible Generative AI Prompt: What assumptions were made in creating this content? For decision makers about AI development: Monitor newly developed tools for accuracy Be transparent with users about the limitations of the tool Consider training generative AI tools to have responses that are transparent about limitations of the tool. Example 3 Real World Example Stack Overflow, a popular community-based website where programmers help one another, has (at the time of writing this) temporarily banned users from answering questions with AI-generated code. This is because users were posting incorrect answers to questions. It is important to follow policies like this (as you may face removal from the community). This policy goes to show that you really need to check the code that you get from AI models. While they are currently helpful tools, they do not know everything. Summary Here is a summary of all the tips we suggested: Design new AI systems with interpretability in mind Don’t assume AI-generated content is real, accurate, consistent, current, or better than that of a human. Ask the AI tools to help you understand: Sources for the content that you can cite Any decision processes in how the content was created Potential limitations Potential security or privacy issues Potential downstream consequences of the use of the content Always have expert humans review/auditing and value your own contributions and thoughts. Overall, we hope that these guidelines and tips will help us all use AI tools more responsibly. We recognize however, that as this is emerging technology and more ethical issues will emerge as we continue to use these tools in new ways. AI tools can even help us to use them more responsibly when we ask the right additional questions, but remember that human review is always necessary. Staying up-to-date on the current ethical considerations will also help us all continue to use AI responsibly. References "],["adherence-practices.html", "Adherence practices Start Slow Check for Allowed Use Use Multiple AI Tools Educate Yourself and Others Summary", " Adherence practices Here we suggest some simple practices that can help you and others at your institution to better adhere to current proposed ethical guidelines. Start Slow - Starting slow can allow for time to better understand how AI systems work and any possible unexpected consequences. Check for Allowed Use - AI model responses are often not transparent about using code, text, images and other data types that may violate copyright. They are currently not necessarily trained to adequately credit those who contributed to the data that may help generate content. Use Multiple AI Tools - Using a variety of tools can help reduce the potential for ethical issues that may be specific to one tool, such as bias, misinformation, and security or privacy issues. Educate Yourself and Others - To actually comply with ethical standards, it is vital that users be educated about best practices for use. If you help set standards for an institution or group, it strongly advised that you carefully consider how to educate individuals about those standards of use. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Start Slow Launching large projects using AI before you get a chance to test them could lead to disastrous consequences. Unforeseen challenges have already come up with many uses of AI, so it is wise to start small and do evaluation first before you roll out a system to more users. This also gives you time to correspond with legal, equity, security, etc. experts about the risks of your AI use. Tips for starting slow For decision makers about AI users: Consider an early adopters program to evaluate usage. Educate early users about the limitations of AI. Consider using AI first for more specific purposes. Consult with experts about potential unforeseen challenges. Continue to assess and evaluate AI systems over time. For decision makers about AI developers: Consider developing tools for more simple specific tasks, rather than broad difficult tasks. Consider giving potential users guidance about using the tool for simpler tasks at first. Continue to assess and evaluate AI systems over time. Example 4 Real-World Example IBM created Watson, an AI system that participated and won on the game show Jeopardy! and showed promise for advancing healthcare. However IBM had lofty goals for Watson to revolutionize cancer diagnosis, yet unexpected challenges resulted in unsafe and incorrect responses. IBM poured many millions of dollars in the next few years into promoting Watson as a benevolent digital assistant that would help hospitals and farms as well as offices and factories. The challenges turned out to be far more difficult and time-consuming than anticipated. IBM insists that its revised A.I. strategy — a pared-down, less world-changing ambition — is working ((lohr_what_2021?)). See here for addition info: https://ieeexplore.ieee.org/abstract/document/8678513 Check for Allowed Use When AI systems are trained on data, they may also learn and incorporate copyrighted information or protected intellectual property. This means that AI-generated content could potentially infringe on the copyright or protection of trademarks or patents of the original author. For more extreme examples, if an AI system is trained on an essay or art or in some cases even code written by a human, the AI system could generate responses that are identical to or very similar to that of the original author, which some AI tools have done. Regardless, even training AI tools on copyrighted information where the responses are still relatively different, if the AI system uses this content without permission from the original author, this could constitute copyright or trademark infringement Brittain and Brittain (2023). Example 5 Open AI is facing lawsuits about using writing from several authors to train ChatGPT without permission from the authors. While this poses legal questions, it also poses ethical questions about the use of these tools and what it means for the people who created content that helped train AI tools. How can we properly give credit to such individuals? The lawsuits are summarized by Brittain and Brittain (2023): The lawsuit is at least the third proposed copyright-infringement class action filed by authors against Microsoft-backed OpenAI. Companies, including Microsoft (MSFT.O), Meta Platforms (META.O) and Stability AI, have also been sued by copyright owners over the use of their work in AI training The new San Francisco lawsuit said that works like books, plays and articles are particularly valuable for ChatGPT’s training as the “best examples of high-quality, long form writing.” OpenAI and other companies have argued that AI training makes fair use of copyrighted material scraped from the internet. The lawsuit requested an unspecified amount of money damages and an order blocking OpenAI’s “unlawful and unfair business practices.” AI poses questions about how we define art and if AI will reduce the opportunities for employment for human artists. See here for an interesting discussion, in which it is argued that AI may enhance our capacity to create art. This will be an important topic for society to consider. Tips for checking for allowed use For decision makers about AI use: Be transparent about what AI tools you use to create content. Ask the AI tools if the content it helped generate used any content that you can cite. Possible Generative AI Prompt: Did this content use any content from others that I can cite? For decision makers about AI development: Obtain permission from the copyright holders of any content that you use to train an AI system. Only use content that has been licensed for use. Cite all content that you can. Use Multiple AI Tools Only using one AI tool can increase the risk of the ethical issues discussed. For example, it may be easier to determine if a tool incorrect about a response if we see that a variety of tools have different answers to the same prompt. Secondly, as our technology evolves, some tools may perform better than others at specific tasks. It is also necessary to check responses over time with the same tool, to verify that a result is even consistent from the same tool. Tips for using multiple AI tools For decision makers about AI use: Check that each tool you are using meets the privacy and security restrictions that you need. Utilize platforms that make it easier to use multiple AI tools, such as https://poe.com/, which as access to many tools, or Amazon Bedrock, which actually has a feature to send the same prompt to multiple tools automatically, including for more advanced usage in the development of models based on modifying existing foundation models. Evaluate the results of the same prompt multiple times with the same tool to see how consistent it is overtime. Use slightly different prompts to see how the response may change with the same tool. Consider if using tools that work with different types of data maybe helpful for answering the same question. For decision makers about AI development: Consider if using different types of data maybe helpful for answering the same question. Consider promoting your tool on platforms that allow users to work with multiple AI tools. Educate Yourself and Others There are many studies indicating that individuals typically want to comply with ethical standards, but it becomes difficult when they do not know how (Giorgini et al. (2015)). Furthermore, individuals who receive training are much more likely to adhere to standards (Kowaleski, Sutherland, and Vetter (2019)). Properly educating those you wish to comply with standards, can better ensure that compliance actually happens. It is especially helpful if training materials are developed to be especially relevant to the actually potential uses by the individuals receiving training and if the training includes enough fundamentals so that individuals understand why policies are in place. Example 6 Real-World Example A lack of proper training at Samsung lead to a leak of proprietary data due to unauthorized use of ChatGPT by employees – see https://cybernews.com/news/chatgpt-samsung-data-leak for more details: “The information employees shared with the chatbot supposedly included the source code of software responsible for measuring semiconductor equipment. A Samsung worker allegedly discovered an error in the code and queried ChatGPT for a solution. OpenAI explicitly tells users not to share “any sensitive information in your conversations” in the company’s frequently asked questions (FAQ) section. Information that users directly provide to the chatbot is used to train the AI behind the bot. Samsung supposedly discovered three attempts during which confidential data was revealed. Workers revealed restricted equipment data to the chatbot on two separate occasions and once sent the chatbot an excerpt from a corporate meeting. Privacy concerns over ChatGPT’s security have been ramping up since OpenAI revealed that a flaw in its bot exposed parts of conversations users had with it, as well as their payment details in some cases. As a result, the Italian Data Protection Authority has banned ChatGPT, while German lawmakers have said they could follow in Italy’s footsteps.” Tips to educate yourself and others For decision makers about AI use: Emphasize the importance of training and education. Recognize that general AI literacy to better understand how AI works, can help individuals use AI more responsibly. Seek existing education content made by experts that can possibly be modified for your use case. Consider how often people will need to be reminded about best practices. Should training be required regularly? Should individuals receive reminders about best practices especially in contexts in which they might use AI tools. Make your best practices easily findable and help point people to the right individuals to ask for guidance. Recognize that best practices for AI will likely change frequently in the near future as the technology evolves, education content should be updated accordingly. For decision makers about AI development: Emphasize the importance of training and education. Recognize that more AI literacy to better understand security, privacy, bias, climate impact and more can help individuals develop AI more responsibly. Seek existing education content made by experts that can possibly be modified for your use case. Consider how often people will need to be reminded about best practices. Should training be required regularly? Should individuals receive reminders about best practices especially in contexts in which they might develop AI tools. Make your best practices easily findable and help point people to the right individuals to ask for guidance. Recognize that best practices for AI will likely change frequently in the near future as the technology evolves, education content should be updated accordingly. We have also included an optional section for new developers about considerations for testing and training data to ensure accurate assessment of performance. Effective use of Training and Testing data In the previous chapters, we started to think about the ethics of using representative data for building our AI model. In this chapter we will see that even if our data is inclusive and represents our population of interest, issues can still happen if the data is mishandled during the AI model building process. Let’s take a look at how that can happen. Population and sample The data we collect to train our model is typically a limited representation of what we want to study, and as we explored in the previous chapter, bias can arise through our choice of selection. Let us define two terms commonly used in artificial intelligence and statistics: the population is the entire group of entities we want to get information from, study, and describe. If we were building an artificial intelligence system to classify dog photographs based on their breeds, then the population is every dog photograph in the world. That’s prohibitively expensive and not easy data to acquire, so we use a sample, which is a subset of the population, to represent our desired population. Even if we are sure that the sample is representative of the population, a different type of bias, in this case statistical bias can arise. It has to do with how we use the sample data for training and evaluating the model. If we do this poorly, it can result in a model that gives skewed or inaccurate results at times, and/or we may overestimate the performance of the model. This statistical bias can also result in the other type of bias we have already described, in which a model unfairly impacts different people, often called unfairness. There are many other sources of unfairness in model development - see Baker and Hawn (2022). Training data The above image depicts some of our samples for building an artificial intelligence model to classify dog photographs based on their breeds. Each dog photograph has a corresponding label that gives the correct dog breed, and the goal of the model training process is to have the artificial intelligence model learn the association between photograph and dog breed label. For now, we will use all of our samples for training the model. The data we use for model training is called the training data. Then, once the model is trained and has learned the association between photograph and dog breed, the model will be able make new predictions: given a new dog image without its breed label, the model will make a prediction of what its breed label is. Testing data To evaluate how well this model is good as predicting dog breeds from dog images, we need to use some of our samples to evaluate our model. The samples being used for model evaluation is called the testing data. For instance, suppose we used these four images to score our model. We give it to our trained model without the true breed label, and then the model makes a prediction on the breed. Then we compare the predicted breed label with the true label to compute the model accuracy. Evaluation Suppose we get 3 out of 4 breed predictions correct. That would be an accuracy of 75 percent. Proper separation of Training and Testing data However, we have inflated our model evaluation accuracy. The samples we used for model evaluation were also used for model training! Our training and testing data are not independent of each other. Why is this a problem? When we train a model, the model will naturally perform well on the training data, because the model has seen it before. This is called Overfitting. In real life, when the dog breed image labeling system is deployed, the model would not be seeing any dog images it has seen in the training data. Our model evaluation accuracy is likely too high because it is evaluated on data it was trained on. Let’s fix this. Given a sample, we split it into two independent groups for training and testing. We use the training data for training the model, and we use the testing data for evaluating the model. They don’t overlap. When we evaluate our model under this division of training and testing data, our accuracy will look a bit lower compared to our first scenario, but that is more realistic of what happens in the real world. Our model evaluation needs to be a simulation of what happens when we deploy our model into the real world! Validation Note that there should actually be an intermediate phase called validation, where we fine tune the model to be better at performing, in other words to improve the accuracy of predicting dog breeds, this should also ideally use a dataset that is independent from the training and testing set. You may also hear people use these two terms in a different order, where testing refers to the improvement phase and validation refers to the evaluation of the general performance of the model in other contexts.Sometimes the validation set for fine tuning is also called the development set. There are clever ways of taking advantage of more of the data for validation data, such as a method called “K-Fold cross validation”, in which many training and validation data subsets are trained and evaluated and for more validation and to determine if performance is consistent across more of the data. This is especially beneficial of there is diversity within the dataset, to better ensure that the data performs well on some of the rarer data points (for example, a more rare dog breed) (“Training, Validation, and Test Data Sets” (2023)). Conclusions This seemingly small tweak in how data is partitioned during model training and evaluation can have a large impact on how artificial intelligence systems are evaluated. We always should have independence between training and testing data so that our model accuracy is not inflated. If we don’t have this independence of training and testing data, many real-life promotions of artificial intelligence efficacy may be exaggerated. Imagine that someone claimed that their cancer diagnostic model from a blood draw is 90%. But their testing data is a subset of their training data. That would over-inflate their model accuracy, and it will less accurate than advertised when used on new patient data. Doctors would make clinical decisions on inaccurate information and potentially create harm. Summary Here is a summary of all the tips we suggested: Disclose when you use AI tools to create content. Be aware that AI systems may behave in unexpected ways. Implement new AI solutions slowly to account for the unexpected. Test those systems and try to better understand how they work in different contexts. Adhere to restrictions for use of data and content created by AI systems where possible, citing the AI system itself and learning how the tool obtained permission for use can help reduce risk. Cross-check content from AI tools by using multiple AI tools and checking for consistent results over time. Check that each tool meets the privacy and security restrictions that you need. Emphasize training and education about AI and recognize that best practices will evolve as the technology evolves. Overall, we hope that these suggestions will help us all use AI tools more responsibly. We recognize however, that as this is emerging technology and more ethical issues will emerge as we continue to use these tools in new ways. AI tools can even help us to use them more responsibly when we ask the right additional questions, but remember that human review is always necessary. Staying up-to-date on the current ethical considerations will also help us all continue to use AI responsibly. References "],["consent-and-ai.html", "Consent and AI Summary", " Consent and AI Much of the world is developing data privacy regulations, as many individuals value their right to better control how others can collect and store data about them (Chaaya (2021)). While data collection concerns have been increasing up for years, AI systems present new challenges (Pearce (2021); Tucker (2018)): Accountability - It is more difficult to determine who is accountable at times when separate parties may collect versus redistribute data, versus use data (Hao (2021)) Data Persistence - Since data rapidly be redistributed it can be difficult to remove data (Gangarapu (2022); Hao (2021)). Data reuse - Data collected for one purpose is getting reused for other purposes that may dramatically change over time. For example data collected on food purchases for food companies could get reused by insurance companies to determine health risk based on dietary behavior. Data spillover - Accidental data collection due to collection for a different purpose, for example a photo of someone with other individuals in the background Trickier Consent - Consent to allow data collection is trickier as it is less clear that users understand the potential risks (Andreotta, Kirkham, and Rizzi (2022)) Easier Data Collection/Translation - AI makes it really easy to collect and record new forms of data about individuals such as transcriptions of meetings. This is making it easier for people to record people without their consent and poses privacy risks (Elefant (2023)). Consent is especially a concern for healthcare research, where potential participants need to understand the potential risks of participation. Yet, the risks of data collection continue to evolve. See https://link.springer.com/article/10.1007/s00146-021-01262-5 for deeper discussions on the topic. Example 7 Real-World Example Facial Recognition technology has been an especially debated topic. There have many instances of unethical practices including collecting and reusing data without consent, collecting data on particularly vulnerable populations that could easily be misused, and creating tools that perpetuate bias and harm, such as a tool that was aimed at predicting if someone was likely to be a criminal. A Berlin-based artist Adam Harvey created a project and website that flags questionable datasets and discussing ethical issues around facial recognition. I wanted to uncover the uncomfortable truth that many of the photos people posted online have an afterlife as training data (Van Noorden (2020)) See these articles for more information: https://www.nature.com/articles/d41586-020-03187-3 https://learn.g2.com/ethics-of-facial-recognition https://www.technologyreview.com/2021/08/13/1031836/ai-ethics-responsible-data-stewardship/ Another important consideration is consent or awareness that you are viewing AI generated content that may be fake. The EU AI act has many regulations regarding this, as well as notifying and consenting individuals about when AI tools are being used on them. Example 8 The EU AI act includes regulations for many things including banning predictive policing technology and requiring consent for emotional recognition technology in work and at school. Despite many likely very useful restrictions including around facial images, a major debate has been about a lack of restriction for live facial recognition. The potential for harm across different data types and advances in technology will continue to create new ethical challenges. It has been argued that there are possible uses that should be exempt, such as live facial recognition to locate human trafficking victims. See here to learn more about this debate. Tips to encourage responsible consent practices For decision makers about AI use: Emphasize education about consent practices Stay up-to-date on current issues related to consent. Encourage usage of tools that are transparent about using responsible consent practices. Encourage users to be careful what data they upload or allow AI tools to use. For decision makers about AI development: Emphasize education of AI developers about consent considerations, guidelines, and regulations. Stay up-to-date on current issues related to consent. Be considerate of the data that you use for AI tools and how it was collected and if individuals consented to the collection and distribution of the data Be transparent with users about what consent practices were used for the data utilized by the tool. Be transparent with users about what may happen with their responses if they are being collected. Summary Overall the consent process is particularly challenging and consideration should especially be centered on the rights of the individuals who may have data collected about them. We hope that awareness of some of the major challenges can help you to more responsibly implement any consenting processes that may be needed for AI tools that you employ or develop. We advise that you speak with ethical and legal experts. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["idare-and-ai.html", "IDARE and AI AI is biased Examples of AI Bias Be extremely careful using AI for decisions More inclusive teams means better models Access Summary", " IDARE and AI IDARE stands for Inclusion, Diversity, Anti-Racism, and Equity. It is an acronym used by some institutions (such as the Johns Hopkins Bloomberg School of Public Health, the University of California, Davis, and the University of Pennsylvania Perelman School of Medicine) to remind people about practices to improve social justice. As we strive to use AI responsibly, keeping the major principles of IDARE in mind will be helpful to better ensure that individuals of all backgrounds and life experiences more equally benefit from advances in technological and that technology is not used to perpetuate harm. AI is biased Humans are biased, therefore data from text written by humans is often also biased, which mean AI systems built on human text are trained to be biased, even those created with the best intentions (Pethig and Kroenung (2023)). To better understand your own personal bias, consider taking a test at https://implicit.harvard.edu/. It is nearly impossible to create a training dataset that is free from all possible bias and include all possible example data, so by necessity the data used to train AI systems are generally biased in some way and lack data about people across the full spectrum of backgrounds and life experiences. This can lead to AI-created products that cause discrimination, abuse, or neglect for certain groups of people, such as those with certain ethnic or cultural backgrounds, genders, ages, sexuality, capabilities, religions or other group affiliations. Our goal is to create and use AI systems that are as inclusive and unbiased as possible while also keeping in mind that the system is not perfect. To learn more about how AI algorithms become biased, see https://www.criticalracedigitalstudies.com/peoplesguide. Algorithmic Fairness - The field of algorithmic fairness aims to mitigate the effects of bias in models or algorithms in AI. Importantly issues with bias can occur in all steps of model development. (J. Huang et al. (2022), Baker and Hawn (2022)). There are experts in fairness that can help you to avoid the potential harm caused by bias in your AI development. Examples of AI Bias There are many examples in which biased AI systems were used in a context with negative consequences. Amazon’s resume system was biased against women Amazon used an AI system was to help filter candidates for jobs. They started using the system in 2014. In 2015, it was discovered that the system penalized resumes that included words like “women’s”, and also for graduates of two all-women’s colleges (Dastin (2018)). How did this happen? The model was trained on resume’s of existing Amazon employees and most of their employees were male. Thus the training data for this system was not gender inclusive, which lead to bias in the model. X-ray studies show AI surprises Algorithms used to evaluate medical images seem to be predicting the self-reported race of the individuals in the images from the images alone (Gichoya et al. (2022)). This is despite the fact that the radiologists examining those same images were not able to identify what aspect about the images helped the AI systems identify the race of the individuals. Why is this a problem? That information from models that evaluate medical images are being used to help suggest care. It is recognized that health disparities exist in the treatment of different racial groups. Therefore bias related to these disparities may be perpetuated by algorithms even when the AI system is trained in a manner that is “blind” to the self-reported race of the individuals. This example shows that AI systems can possibly amplify existing biases even when humans are unaware of the AI systems using those biases to make decisions. This is especially a problem, as some populations are under-diagnosed and therefore denied care or they receive poorer care because an AI system does not work as well for their population (Ricci Lara, Echeveste, and Ferrante (2022)). As an example, a study evaluating diagnosis of various diseases from chest X-ray images, found that certain groups of patients, such as females, those under 20, those who self report as Black or Hispanic, were more likely to be falsely flagged by AI system as healthy when they in fact had an issue (Seyyed-Kalantari et al. (2021)). Another example shows that processing of cardiac images from specific patient populations is much poorer using models where the training set was not diverse enough (Puyol-Anton et al. (2021)). However, there is promise for good AI systems to mitigate bias. For example, a team studying pain levels in osteoarthritis (a disease where under-served populations often have higher than expected levels of pain) found that using predictions of pain based on AI system examining images were much more accurate than predictions from radiologists examining those same images (Pierson et al. (2021)). A magazine article describing this work stated: In this case, researchers were training the models based on physician reports of pain, and since doctors are less likely to believe marginalized people when they report pain, this algorithm replicated this bias. When a team of computer scientists at the University of California, Berkeley, tweaked the algorithm to factor in patient pain reports rather than a physician’s, however, they eliminated that racial bias, paving the way for more equitable treatment of osteoarthritis.” (Arnold (2022)) Tips for Mitigating Bias AI tools with training data that lacks data about certain ethnic or gender groups or disabled individuals could result in responses that do not adequately consider these groups, ignores them all together, or makes false associations. For decision makers about AI use: Where possible, use tools that are transparent about what training data was used and limitations of this data and actively evaluate the data for bias including: if the dataset includes any harmful data, such as discriminatory and false associations if training data is adequately inclusive for the given needs Where possible, users of commercial AI tools should ask prompts in a manner that includes concern for equity and inclusion Always question the responses from the tool for possible bias. Obtain expert review where possible. Start slowly if rolling out the usage of new AI tools and continue to monitor used AI tools for bias. Possible Generative AI Prompt: Why did you assume that the individual was male? For decision makers about AI development: Be careful to use datasets that do not contain harmful data, such as discriminatory and false associations. Use datasets that adequately inclusive for the given needs. evaluate the training data and the model for biases and false associations as it is being developed instead of waiting to test the product after creation is finished. Verifying that the product works properly for potential use cases from a variety of ethnic, gender, ability, socioeconomic, language, and educational backgrounds. When possible, the developers should also augment the training dataset with data from groups that are missing or underrepresented in the original training dataset. Potentially consider creating different models for different populations to obtain better performance for different groups of people. However, be careful to be inclusive in the creation of such models. Seek expert evaluation of your tools for bias. Be transparent about possible bias or dataset limitations to users. Be extremely careful using AI for decisions There is a common misconception that AI tools might make better decisions for humans because they are believed to not be biased like humans (Pethig and Kroenung (2023)). However since they are built by humans and trained on human data, they are also biased. It is possible that AI systems specifically trained to avoid bias, to be inclusive, to be anti-racist, and for specific contexts may be helpful to enable a more neutral party, but that is generally not currently possible. We highly suggest caution with using AI to make or help make employment decisions about applicants or employees at this time. This includes recruitment, hiring, retention, promotions, transfers, performance monitoring, discipline, demotion, terminations, or other similar decisions. At a minimum, humans should be involved in the testing the AI system, evaluating the results of the AI system, and monitoring the system’s behavior overtime. Experts of algorithm fairness should be consulted. More inclusive teams means better models It is vital that teams hired for the development, auditing or testing of AI tools be as inclusive as possible and should follow the current best IDARE practices for standards for hiring standards. This will help to ensure that different perspectives and concerns are considered. Access Improving access for all individuals holds the power to make the benefits of AI and other technology a reality to everyone. However expanding access should be done mindfully to empower others, rather than to exploit or create further vulnerability. The Bill and Melinda Gates Foundation has suggested principles (“The First Principles Guiding Our Work with AI” (n.d.)) for their work to expand AI access responsibly, including the following summarized here : Adhering to core values of helping all people reach their full potential Promoting co-design and inclusivity by including individuals in low-income settings to be collaborators and partners and acknowledging infrastructure limitations. Proceeding responsibly with continuous improvement in a step-wise fashion Addressing Privacy and security concerns by regularly performing assessments and ensuring compliance with relevant regulations and laws, as well as careful consent practices Building equitable access - focusing not just on access distribution by on equitable ownership and maintenance and development Committing to transparency - Sharing information for public good Summary In summary we suggest you consider the following to better promote the well-being of all individuals when approaching AI: Recognize that humans are biased and AI systems created by humans are therefore biased. They typically currently enhance bias, unless mindfully engineered for specific contexts with appropriate training data. Recognize that sometimes AI works in unexpected ways and systems can be biased in ways that are not fully understood Testing, auditing, and questioning AI systems about bias can help mitigate harm Using AI for decisions at this point in time could be very harmful towards vulnerable populations. AI should not be used for any important decisions without human oversight. More inclusive AI teams can help us build more responsible and more useful models Enhancing access to AI tools has the potential to improve the well-being of individuals in places with other limited technology or healthcare access, however this needs to be done in a collaborative manner to avoid harm and exploitation Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["ethical-process.html", "Ethical process Ethical Use Process Ethical Development Process Summary", " Ethical process The concepts for ethical AI use are still highly debated as this is a rapidly evolving field. However, it is becoming apparent based on real-world situations that ethical consideration should occur in every stage of the process of use and development. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Ethical Use Process Here is a proposed framework for using AI more ethically. This should involve active consideration at three stages: the inception of an idea, during usage, and after usage. Reflection during inception of the idea Consider if AI is actually needed and appropriate for the potential use. Consider the possible downstream consequences of use. Consider the following questions: What could happen if the AI system worked poorly? Are tools mature enough for your specific use? Should you start smaller? Do you need a tool that is designed for sensitive data? Are the tools you are considering well-made for the job with transparency about how the tool works? Are the training sets for the tools appropriate for your use to avoid issues like bias and faulty responses? Reflection during use While using AI tools consider the following: Ask the tool how it is making decisions Evaluate the validity of the results Test for bias by asking bias related prompts Test if the results are consistent across time Test if the results are consistent across tools Reflection after use After using AI tools consider the following questions: How can you be transparent about how you used the tool so that others can better understand how you created content or made decisions? What might the downstream consequences be of your use, should you actually use the responses or were they not accurate enough, are there remaining concerns of bias? Ethical Development Process Here is a proposed framework for developing AI tools more responsibly. This should involve active consideration at four stages: the inception of an idea, pre-development planning, during development, and after development. Reflection during inception of the idea Consider if AI is actually needed and appropriate for the potential use. Consider the possible downstream consequences of development. Consider the following questions: What could happen if the AI system worked poorly? How might people use the tool for other unintended uses? Can you start smaller and build on your idea over time? Planning Reflections While you are planning to develop consider the following: Do you have appropriate training data to avoid issues like bias and faulty responses? Are the rights of any individuals violated by you using that data? Do you need to develop a tool that is designed for sensitive data? How might you protect that data? How large does your data really need to be - how can you avoid using unnecessary resources to train your model? Development Reflection While actively developing an AI tool, consider the following: Make design decisions based on best practices for avoiding bias Make design decisions based on best practices for protecting data and securing the system Consider how interpretable the results might be given the methods you are trying Test the tool as you develop for bias, toxic or harmful responses, inaccuracy, or inconsistency Can you design your tool in a way that supports transparency, perhaps generate logs about usage for users Post-development Reflection Consider the following after developing an AI tool: Continual auditing is needed to make sure no unexpected behavior occurs, that the responses are adequately interpretable, accurate, and not harmful especially with new data, new uses, or updates Consider how others might use or be using the tool for alternative usage Deploy your tool with adequate transparency about how the tool works, how it was made, and who to contact if there are issues Summary In summary, to use and develop AI ethically consideration for impact should be occur across the entire process from the stage of forming an idea, to planning, to active use or development, and afterwards. We hope these frameworks help you to consider your AI use and development more responsibly. –> "],["introduction-to-determining-ai-needs.html", "Introduction to Determining AI Needs Motivation Target Audience Curriculum", " Introduction to Determining AI Needs Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Motivation There are an ever increasing number of options, strategies and solutions for integrating AI solutions into a project. It can feel overwhelming to understand what these options entail let alone understand how to decide what solution best fits a use case. In this course we aim to give individuals the basic info they need to make basic plans for integrating AI tools into their project. Target Audience The course is intended for individuals who have an AI related project in mind or think that they might need to incorporate AI into their project. They are likely the leader who is guiding others in an AI related project and not necessarily the person who will write code or carry out the technical aspects of the project. Curriculum What this course covers: What are the practical aspects of AI that need to be understood before endeavoring on an AI project? What makes an AI model good? How do you determine what kinds of custom AI solutions your project needs if any? What aspects of your resources and your project should you consider when evaluating AI strategies? What would better suite your needs an “out of the box” AI product or building an AI model solution “from scratch”? Examples of currently existing AI solutions that may suit an individual’s AI needs. What this course does NOT cover: This is NOT a comprehensive survey of the AI tools and products in existence. Even if it was comprehensive at this time, there are new tools and developments constantly arriving. We merely give examples of solutions that show a possible AI solution. There may be competitors or similar solutions out there that would even better fit a project’s needs. This does NOT cover in depth aspects of algorithms, statistics or mathematics behind AI algorithms – these are numerous and not always necessary to understand in fine detail for making decisions about projects. This does NOT cover how to complete or write code for an AI project. This is not a tutorial for building an AI tool. Instead we merely give strategies you could employ but we do not give details on how you might employ them. There are too many ways that AI tools may be built – this is outside the scope of this course. –> "],["what-are-the-components-of-ai.html", "What are the components of AI? Learning objectives: Intro What makes an AI model accurate? What makes an AI model efficient? Putting it together", " What are the components of AI? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand what makes a good AI model Describe what makes a model accurate Understand fundamentals about what makes AI models computationally efficient Describe components of LLMs and other AI models and how training data is critical to their accuracy Intro What makes the AI chatbots’ performance today so vastly improved from previous chatbots? Like those that resembled office supplies and helped us write documents? In this chapter we’ll discuss some generalities of how AI works and what makes an AI tool good. A good AI model is accurate – you need it to give answers that are correct or at least useful. They are also computationally efficient because we need them to give the answer back in a reasonable amount of time. We also don’t want to spend tons of money on the computation it takes for the chatbot to work. What makes an AI model accurate? Let’s talk about the basics of what makes an AI model accurate. In order to understand this, we need to discuss some principles behind Machine learning. Picture you were teaching someone (like an AI model) to identify apples from bananas. The training data you might give them would be a series of apples and bananas and you would label which were bananas versus apples. You could then test the model’s abilities ability to identify apples and bananas based on this training by giving them a fruit to identity. Assuming the fruit you gave them is reasonably identifiable from their training, they should accurately identify an apple. However, if the test you give the model is outside the kind of data they were trained on, they might not do well with it. For example if you didn’t provide any green apples and then you test the model with a green apple. It may or may not succeed. To address this gap in the model’s knowledge, you might add supplementary training data and retrain it so that it understands that apples could also be green. However, this added training data may help for the identification of green apples, but if given something similar to an apple but not – say a pear. It may incorrectly identify a pear as an apple if it hasn’t ALSO been trained on pears. This may feel silly to you – why couldn’t it identify a pear – but this is because you are a really well trained AI. (Actually just the I, you presumably aren’t artificial). You’ve seen lots of fruit in your life – you’ve collected a lot of training data on this task and have no problem identifying a pear from an apple. But we could throw you off too. When you look at this image of a hybrid apple-banana, if AI models could feel, this is how they would feel. What makes an AI model efficient? Let’s talk about the basics of what makes an AI model accurate. In order to understand this, we need to discuss some principles behind Machine learning. Let’s return to apples. With the above image, you don’t need much time to look at that picture and know that that is an apple. You don’t have to think about this for very long. With the above image, you don’t need much time to look at that picture and know that that is an apple. You don’t have to think about this for very long. You didn’t take in one piece of information at a time. This type of information processing is what neural networks are based on. Neural networks are when computers mimic how brains work to process information. Think about how you’d read the following paragraph: Did you read each word, in order from start to end? OR Did you pick out keywords by skimming and getting the gist? Maybe later going back to pick up context you missed? The old way AI models worked is that they would read sequentially – from start to finish. And as you may sense, that is a slower way to read. Alternatively, the new algorithms often use Attention mechanisms. These algorithms work analogous to skimming the input text. However, you could also probably sense that just because the new way of attention mechanisms are faster doesn’t mean that for all uses they are more accurate – by skimming you sometimes can miss important information. Regardless of that, let’s walk through more about this analogy to get a sense of how attention mechanisms can work. First we might highlight keywords in this paragraph. And meanwhile the words and phrases that are processed would be chunked into units called “tokens” the most important tokens we would focus on first with those attention mechanisms we referred to. When we connect these relationship between these words we might already start pulling out some of the meaning of this paragraph. Grabbing these relational words will help us piece together more meaning. Lastly we might pull out some contextual information from the other words we left behind. Let’s here it straight from an AI model. We asked bard to tell us what phrases it would pull out as keywords with attention mechanisms if we gave it this paragraph. Without these recent advancements in attention mechanism algorithms, the large language models that we see today would not be possible. Its these computationally efficient mechanisms that have allowed large language models to be possible in addition to the physical hardware improvements in computing. Putting it together In summary, a good AI model is accurate – this is largely determine by its training data being high quality, relevant and properly processed. A good AI tool is also computationally efficient. We need to use algorithms that can efficiently and properly process data. Let’s talk about the process of an AI query in a general sense. If we give input like an image of an apple, the AI tool will observe that input. It will use its prior experience of training data to digest that input. It will then formulate a response to return to us to tell us it’s conclusion. If it was trained properly, its returned response to us will be that it is indeed an apple. We can then visualized a “machine learning machine” to describe AI. AI models can take a lot of different forms and functions and this visual is merely a tool to understand generalities about components of AI. It is not meant to be a detailed representation of any given AI model. But we can discuss AI tools in terms of their: input what is the user of the tool providing? processing (including algorithms) – what are we going to do with that input? training data - how was the mode trained? what information was it trained on? output - what are we returning to the user of this AI tool? Each of these components can get very complicated very quickly. Although we won’t go through the details of these in this course, we will discuss practical aspects of these in terms of customization for AI needs. Large language models are one popular type of AI tool. So we can talk about the components of these models in the context of this visual. Large language models are one popular type of AI tool. So we can talk about the components of these models in the context of this visual. Tokens are units of a language (these might be words or phrases). Transformers are what organize tokens to find the meaning/context. Meanwhile to do this processing tokens are coded as Embeddings these are numerical representations of tokens. Encoders are what processes input text from a user. Meanwhile Decoders generate output text that is sent back to the user. In summary: One more important point about AI models. Their training and training data is critical. You have likely seen and heard about many biased things that large language models have said. This is because the language they were trained on – the language of human beings in our society – was also very biased. To summarize, for AI models can only be as good as their training. So garbage training data in means biased garbage as output. –> "],["determining-your-ai-needs.html", "Determining your AI needs Learning objectives: Intro Generalized Custom AI Use Cases Customized Security Customized Interface The Whole Picture Example project strategies Conclusion", " Determining your AI needs Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Establish your AI project goals Detail how these goals are not currently accomplished by a currently existing AI tool Identify what kinds of customizations your AI project requires. Evaluate the resources and staffing needs you will have for this project. Intro A project that is ill defined is doomed to fail or worse be a chronic headache and source of stress before it fails. In this chapter we will describe the questions and considerations you should contemplate while planning for an AI project. The first of such considerations is basic: What are your goals and uses cases and how are these use cases something that is not currently achievable by currently existing other products? Please take a moment now to jot down answers to the above questions for your project’s goals. Let’s return to our oversimplified machine learning machine to discuss our possible case categories. Recall that we’ve described AI tools as having the following: input what is the user of the tool providing? processing (including algorithms) – what are we going to do with that input? training data - how was the mode trained? what information was it trained on? output - what are we returning to the user of this AI tool? Please take a moment now to jot down the answers of what these items are for your impending AI tool project. Generalized Custom AI Use Cases Here we will discuss three bins of AI tool customization needs that we will discuss for the rest of the course. Keep in mind that these customization categories are for the purposes of discussion and not necessarily google-able terms. Note that these categories of AI customization needs almost never mutually exclusive. It is possible (and probable) that your project may have multiple or all of these needs. The more needs you have the more complicated your project will likely be. So carefully consider what is truly needed for your project. However, these are increasingly doable needs to address. There are a growing number of helpful communities and developers who are experts in customizing interfaces, security, parameters, algorithms, data handling and more for AI tools. Customized Knowledge Perhaps the most common AI need is customized knowledge. This means that existing AI tools are not properly trained for the use case. Perhaps the input is domain specific and the training data or training methods have not adequately prepared existing models to provide useful output. Perhaps the output of existing AI models is incorrect, not useful or even harmful. This means some better training is needed in order to meet your AI needs. Customized Security Many field have data that could benefit from AI tools but may be dealing with data that is private and needs protection. It is highly dangerous and probably illegal in many cases to share protected data with commercial AI platforms. So customized security solutions for AI tools is not an uncommon use case. This doesn’t mean protected data can’t be used with AI tools, but it does mean the AI solutions involved with projects with protected data need to be very carefully planned and constructed. And respective experts should be consulted about these solutions to make sure patients or customer’s data is being kept safe! Customized Interface Perhaps your project could benefit from the power of AI but you need to do this automatically or you need your users to access AI tools from a customized interface. This may be the most straightforward of the AI needs. An increasing number of AI tools have APIs available that can be used underneath the hood of your AI project. The upcoming chapters will discuss each of these customized AI needs and examples of existing options in more details. Generalized strategies for these needs Take a moment to categories which of these AI needs are the largest priorities for your AI project. Note that the more customized needs you have the more work you will have that will be required by your team. And if you don’t have a large amount of technical expertise on your team, this will be required if you have a lot of custom AI needs. Lastly, if none of the above describe your customized needs; you may want to consider whether you truly need a custom solution! It could be that commercially available AI platforms will fit your needs OR you may NOT need AI as a part of your project at all! Don’t let the glitter of AI commit you and your team to a project that is ill defined! Carefully consider what the project truly needs. Which of these needs are non negotiable versus “nice to have”? Note that if you are working with protected data, protecting this data is never negotiable, but other customized needs may be. Below is a very general breakdown of what types of solutions will likely be a part of your AI project based on what needs you’ve identified. For each type of need there is often a continuum of solutions that require less to more investment that we will discuss examples of in the upcoming chapters. For customized knowledge needs, you will likely be needing to train a model for your domain specific knowledge. For customized security needs, you may need to deploy an AI tool on a secure server or use some other type of security layering tool For customized interface needs, you may need to use an API of an existing AI tool or use prepackaged AI tools that you can embed in your website/app. The Whole Picture As with most management decisions, it’s never as simple as deciding what the project needs, its also necessary to evaluate what expertise, resources, and time you have available to you and your team. You need to evaluate: 1. the technical expertise you have available to you 2. Your funding situation. 3. The quickness of the deadlines to which this AI tool needs to be operational. Technical expertise needs What technical expertise you have available on your team? If you do not have the expertise needed for your strategy, will you be able to use funds to hire someone who does? Can you involve a collaborator who has a team with complementary technical expertise to what your team provides? You also need to consider possible staff turnover if you are in an academic institution or other system where this is expected. Staff turnovers will make software development projects take longer even if the knowledge transfer between staff is optimized. The more customization needs your project will need, the more you will need more technical expertise support on your team. Lone developer situations are not ideal; team work is better for development. In this table we describe what kinds of technical expertise you will likely need on your team based on what kinds of customization AI needs your project entails. Keep in mind you can likely minimize these staffing needs if you pay for products that are prepackaged. Prepackaged products (which we will discuss in future chapters) generally require less expertise but will not allow you the same freedom for more granular customization. For knowledge needs, you will likely require a team who is comfortable with data handling techniques. It will also be ideal if they have a certain knowledge of machine learning algorithms. For security needs, it’s likely you will need someone comfortable with back end development and secure computing. Depending on your strategies with this need, it would also be good if you have a front end developer’s help. For interface needs, you’ll likely need a front end developer, as well as someone who is comfortable with using APIs, which also means potentially a back end developer. Funding needs Funding needs for AI projects is not necessarily straightforward. there are a number of costs you will need to consider. Two major categories of costs include computing and staffing #### Computing costs AI projects can be costly. And this is true whether you use a “prepackaged” AI solution or build one from scratch. It is a good idea to estimate your computing costs before you begin your project. How big are the data the users would be inputting? How much would your AI tool cost per query (on average)? How many queries might a users submit? Given the answers to the above, how many users would you be able to accommodate for a given for a given day/month/year? – expect the best/worst case scenario of your tool being massively popular! Will users being paying for this service? Will the rate at which they pay cover your computing and staffing costs? Whether you build “from scratch” or borrow commercial AI tools, you will likely not be able to avoid computing costs. Keep in mind that for certain levels of usage it may not actually be more cost effective to run your own computing infrastructure. In this computing cost analysis graph from La Javaness R&D, they demonstrated how after a certain level of usage it is actually more cost effective to outsource infrastructure to ChatGPT’s API instead of building their own model and hosting it themselves. Staffing costs Custom deployments will require more technical expertise on hand as we discussed in the previous section – think salary costs. You will need to estimate whether it is more cost efficient for you to have in house developers work on this or use borrow commercial computing infrastructure. It’s not just about developers. Ideally you would also have: A user experience designer to help you make sure the AI tool you build is actually useable by human beings! A project manager that will help everyone save time and meet deadlines Administration to actually help you hire the individuals you need, negotiate data use agreements, and all the other behinds the scenes paperwork necessary to keep the ship sailing smoothly. Time needs Time is a resource. For the purposes of your AI tool project goals, you should assess how much time you have. When determining how you will meet your AI strategy needs is how quickly you need these AI needs to be met. How quickly does this need to be ready? And what is determining that deadline? Can these deadlines be pushed? How long does this AI tool need to be maintained? Note that more customized deployments will require more development time as compared to “prepackaged” AI tools. If you rush development technical debt will be incurred. Technical debt will need to be paid at some point for this project to be sustainable. Example project strategies Up until this point we’ve been discuss strategies in very vague terms. To bring this discussion to specifics, we will discuss some example AI tool project strategies you may employ based on what combination of customization AI needs you have. These example project strategies are in the order of least to most resource and time investment. In the left most column is described what kinds of customizations are able to be made given the described strategy. In the example column we have links to resources and platforms that would be a central point or product for this strategy. The technical expertise column describes vaguely how much technical expertise in house you would need to deploy the example strategy. The funding column describes approximately the funding costs that would be associated with the strategy (but not this is highly variable given specifics of a project). The time column describes how long it would take to deploy this solution. Cogniflow example Cogniflow is example of an AI tool that meets customized interface needs. It is a service that does not require code but has prepackaged AI tool solutions like chatbots and receipt digesters that can be readily deployed to a website. It is a subscription service but does not take much time to set up or maintain. This would not allow for much customization but it is a ready to go solution that would not necessitate hiring more staff. OctoML is a similar type of premade machine learning tool that is ready for usage in your own tool. But it does allow for more customization and model training than Cogniflow. They have premade training models that are appropriate for a lot of common use cases. PrivateAI PrivateAI is an example of an AI tool that meets customized security needs but not really other customizations. This services has security layers that allows you to use other commercial AI platforms with PII and PHI. It is HIPAA compliant and is a pay-by-use service. It also would not take much time to use and would not require additional time to use. Of course, due to the importance of keeping protected data protected, it should be confirmed that PrivateAI is an acceptable use based on any legal agreements. ChatGPT API ChatGPT’s API services are an example of an AI tool that meets a customized interface and knowledge needs. Using an API would allow you to use the power of chatGPT but from underneath the hood of a custom made app or website. Additionally chatGPT’s API does allow for training models which means you could make it domain specific. Using an API would require more technical expertise than the previous two example strategies, but would not require building from scratch. This strategy can be a very customizable but not entirely from the ground strategy. It does of course, involve paying chatGPT for computing costs, so that as well as the staffing needs should be considered when employing this strategy. Hugging Face Hugging Faceis a community and repository of open source AI and machine learning models of all kinds of varieties. The resources available on Hugging Face would allow you to customize an AI tool to meet all the needs you might have. The open source nature of the AI models, datasets, and examples available on Hugging Face means that this not completely from scratch either but would require more technical expertise to utilize the resources here. The tutorials and resources on hugging face would allow you to control and build a AI model that fits every need you might have. But remember computing and staffing are always costs, hence why we have not said that this is necessarily a less expensive strategy. Depending on the size and technical expertise of your team it will likely take more time than the other strategies. Conclusion In the upcoming chapters we will discuss the ins and outs of customized AI needs and propose other strategies and considerations you will need to grapple with. –> "],["customized-knowledge-for-ai.html", "Customized Knowledge for AI Learning objectives: Intro Summary of possible strategies Example strategies for Fine tuning", " Customized Knowledge for AI Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind customized knowledge needs for AI tools Discuss a variety of low to high investment strategies for meeting customized knowledge needs Define and be able to contrast the differences between prompt engineering, promoting tuning, fine tuning, and training a model from scratch. Intro Customized knowledge needs are perhaps the most common AI tool need. And intuitively many can understand this. Just as you wouldn’t ask your primary physician to fix your motorbike, neither should you depend on an AI model for something it isn’t trained for. When we are discussing customized knowledge needs, we are describing that existing AI models are not accurately addressing the needs we have. The output from the AI models is incorrect or not useful. Summary of possible strategies If the goal of customized knowledge is to get better output from an AI model, there are multiple strategies we can employ to achieve this goal. We will discuss them in order to lowest to highest investment. It’s generally best to see if the low investment strategies can meet your needs before turning to the higher investment strategies. In summary, we will cover 4 different strategies for obtaining better output from an AI model. prompt engineering is when the user asks a better question as opposed to retraining the model. prompt tuning is when we use an iterative prompt and feedback strategy to make the model work better. It is lower investment attempt than fine tuning. fine tuning is when we give additional training to the model to have it perform better. So in our opening example, fine tuning is like sending the primary care fellowship to learn an additional skill set. training from scratch is when we quite literally build a whole new model from the beginning. For most use cases this is not necessary and it is prohibitively expensive. Prompt engineering Sometimes it’s not the model who needs training, it’s the user. AI models, just like humans, are not mind readers, and just as we all learned how to google, we also need to learn how to engineer prompts. Best practices for prompt engineering according to Google Know the model’s strengths and weaknesses Be as specific as possible Utilize contextual prompts Provide AI models with examples Experiment with prompts and personas Try chain-of-thought prompting In addition to prompt engineering it’s also vital to note that different AI models are trained on different things. So if one is not giving you the output you need, try another! For LLMs, you can use https://poe.com/ or https://gpt.h2o.ai/ to test out prompts on multiple AI platforms side by side. Prompt tuning or “P-tuning” Because prompt tuning sounds a lot like prompt engineering it would be easy to think these strategies are the same, but they are not. Prompt tuning is a lower stakes type of tuning where you use your prompts to help train an LLM. It’s more efficient than fine tuning. But it may or may not address your customization needs. Basically you can think of prompt tuning like giving the LLM more context and instructions around what you are trying to receive back from it. A good analogy from this IBM article is that prompt tuning is like crossword puzzle clues for the LLM. It guides the model toward the right answer. You can test out prompt tuning without doing software engineering by trying out the gpt.h2o platform Fine Tuning First some context around fine tuning. Let’s make a hiring analog. If you needed someone to fulfill a specialized education job, you wouldn’t train a baby who has almost no knowledge as a starting point. This would be unnecessarily time costly and inefficient. Instead, you find a person who has a lot of the training you need and then fine-tune their skills. ChatGPT cost ~$100 million to create Training models from scratch requires an insane amount of data and computing costs It’s almost never where you will want to start. So instead we will use the strategy of fine tuning. We aren’t going to create a model from scratch, instead we’re going to find one that has the training that most closely overlaps with our needs but we will provide them with additional training for our specific needs. But fine tuning also might cost money, so before we jump to this strategy we need to check one more time whether we’ve surveyed the available models for their fitness of our project’s needs. Are you sure no other model works? if you’ve only tried ChatGPT go try other AI platforms. If an LLM is what you are looking for you can read our paper for a summary. Find a base model to start with For us to fine tune a model, we’ll first need to identify the base model that gets closest to what we are looking for. When looking for a base model we want to consider at least these items congruently: Which is trained on data most similar to your application? Which models have performed the best based on your prior testing? No need to unnecessarily increase our computing costs, try to find the smallest one that performs the best. Note that bigger doesn’t mean it performs better – think jack of all trades master of none. You want a model that isn’t too general. Speaking of the size of models, here is a visual demonstrating the sizes of a lot of the LLM’s in existence as of March 2023 It also might be worth considering how these models are related. Perhaps an earlier, smaller version would be easier for you to train than using the latest, biggest large language model that doesn’t contain better information for your purposes. Here’s places you can learn about AI models that are out there: This repository has a nice summary of a lot of currently available open source LLMs. Practical Guide to LLMs HuggingFace has all of the AI models - multimodal and more that we could want Lastly, you should consider how you will evaluate the AI model’s performance? Where did the existing models you tried fall short? What information do you think would help close the knowledge gap of the existing models to meet your needs? Do you have the data you might be able to fine tune a model to help it perform better? How much cleaning will this data need? Is this data unprotected and freely able to be shared or submitted to an open source repository? We’ll discuss strategies for evaluation and data privacy in the upcoming chapters. But now is a good time to keep this in mind. Example strategies for Fine tuning Just because you may have identified you require fine tuning for an AI project doesn’t necessarily mean you will need a lot of technical expertise. There are some solutions like MonsterAPI and H2o that allow for fine tuning without code. These might be good platforms to explore either as a way to meet needs or to experiment to determine a larger strategy. As described in the previous chapter, Hugging Face also has many tutorials on how to fine tune. This strategy would involve more code, frameworks, and software development. If you do decide to build using open source models from Hugging Face or elsewhere you should consider these stages for your project timelines: In this course we have already discuss defining the use case and selecting an existing model and adapting that model. But in the upcoming chapters we will discuss deployment and evaluation of models. –> "],["customized-security-for-ai.html", "Customized Security for AI Learning objectives: Intro Data security basics Secure AI solutions for protected data Data obscuring techniques Example Security Customization strategies Always double, triple, quadruple, check", " Customized Security for AI Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind customized security needs for AI tools Discuss a variety of low to high investment strategies for meeting customized security needs Define and be able to contrast the differences between secure AI services, deidentifying data, and deploying existing models on secure computing resources. Intro Customized security needs are perhaps one of the largest barriers keeping individuals in certain fields from using AI tools to their full potential. There are many legitimate reasons commercial AI tools cannot and should not be used with protected data. Commercial AI products do not have data use agreements. They do not have to tell you what they do with your data. And if you work with protected data types that generally means you can’t use them. Not all data types are safe to share for a variety of reasons. To protect patients or customers, Personal Identifiable Information (PII) Protected Health Information (PHI) cannot be used with online AI services or shared with others. Controlled Unclassified Information is another type of protected data that may be related to national security matters. But protected data projects could highly benefit from AI! AI analysis tools as helpful diagnostic aids for use with health data, imaging data, genomic data. AI chatbots as an aid for financial or health guidance for patients or customers. AI to help detect when protected data has been leaked. Protected data might be useful as training data for a problem and/or patients or customers may want to input data into a secure AI tool. Protected data needs should always be taken seriously! Before employing any AI solutions that involve protected data consult your legal experts and IRB! Data security basics Before we dive into AI related solutions, it’s a good idea that we remind ourselves of some of the best practices for data privacy Fewest individuals have access to the data as possible Least privileges as needed to complete a task. This is known as the “principles of least privilege”. Individuals who do have access should have to provide authentication to make sure only authorized individuals can see the data. Data use agreements need to be used when individuals need to be added to the authorized list Secure AI solutions for protected data Solutions exist for AI tools for protected data – some require more careful planning thought and expertise. Here we have ordered these example strategies in order of least to most investment. Whenever possible consult data security and legal experts to be sure that you are not exposing patients or customers’ data and risking their privacy or finances. Use AI services that keep data private – some AI tools have specialized tools that allow you to keep data private. Be sure to carefully read their terms of use to gain an understanding of how they keep the data secure. Consult security Obscure protected data type by hand and use AI models. In some cases this is not possible to do and still have meaningful data. And, care must be taken to make sure that data it thoroughly and properly secured. Deploy existing model on secure servers. This takes the most technical expertise to carry out. Data obscuring techniques Whether you have an AI service perform this or you do it yourself (or both) there are multiple strategies for obscuring data. It is often not a bad idea to employ multiple safety nets to keep data safe. In summary, here’s just a few of the techniques that can help make data sharing HIPAA compliant. Data Aggregation - summarize values to a higher level of grouping Data Masking - Replace data with symbols Data Anonymization - Replace data with randomized, fake data. Data Redaction - Remove the sensitive data To further understand what this looks like, here’s an example of how these techniques might look with a toy dataset In this toy example will illustrate roughly how a given technique may obscure the original data in the top row. This can give you a sense that some types of data are better for certain types of obscuring methods than others. But this also depends on what your goals for your AI project are. Keep in mind that data anonymization may be more difficult with smaller datasets because of a concept called K anonymity. This principles means that you need to make sure that k number of individuals share the same attributes so that it is nearly impossible to identify any specific subject. The strategy you choose should definitely include these two questions: What protected items are included in your data Your goals with said data with AI – what is the minimum amount of information you could include in the AI model or input in order to achieve the desired goals? Example Security Customization strategies In the table below we show three examples illustrating example strategies for using AI tools with protected data. These examples are in the order of least to most amount of technical expertise needed to implement. PrivateAI PrivateAI is a platform that allows you to use various AI models with private data. It works by detecting and redacting information that is likely protected like PII and PHI. It also has containerization options that allow you to run AI but not on their or other’s servers. It requires the least technical expertise to implement, but care must be taken to make sure that it will properly deal with your project’s particular type of protected data. deidentify deidentify is a Python package that can assist with deidentifying medical records using natural language programming. This is illustrative of one way you might attempt to deidentify data before using it with AI tools. Care must be taken to make sure that the deidentification process is thorough. You may also want to couple this with other tools that can detect PII or PHI data before you submit to an AI tool. As always you will want to make sure that it properly handles your project’s data and that before you submit the data you’ve deidentified you have other reputable sources double or triple check that no protected information is being leaked. AWS servers + HuggingFace Amazon web services (and its competitors) generally offer HIPAA compliant computing solutions. Whether you use this service, an institutional cluster, or some other server is a decision you will have to make on a case by case basis. But regardless, this is the most technically involved solution. This is most likely the strategy you’d need to employ if security is not your only customization you need. In this instance you would borrow a model and set up from HuggingFace and build your AI tool more or less from scratch (but don’t build the model from scratch) Always double, triple, quadruple, check As opposed to other types of AI customizations, the strategies we’ve discussed in this chapter are the most imperative that you get cleared through the proper channels before deploying (if you do this incorrectly it may be illegal). It is a matter of privacy and safety for patients/customers that you get this right. So it makes sent to check with your in-house experts like institutional review boards and data security experts! –> "],["customized-interfaces-for-ai.html", "Customized Interfaces for AI Learning objectives: Intro General strategies for custom interfaces Examples of AI customized interface strategies Premade AI tools AI tool APIs Custom builds", " Customized Interfaces for AI Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind customized interfaces needs for AI tools Discuss a variety of low to high investment strategies for meeting customized interface needs Define and be able to contrast the differences graphics user interfaces and command line interfaces. Intro Sometimes you need the power of AI underneath the hood of your own website/app. There are multiple strategies you can employ to achieve this. In this scenario, you need to have your website or app that has its own interface. Interfaces are how people will use your app. There’s two main interface types that are most common: GUIs and Command line - GUI - stands for “Graphics user interface” – it’s the most widely used. Here users point and click buttons to tell a computer and its software what they’d like to do. - Command line - generally for software that is for individuals with more technical expertise and comfort with programming. In this interface, users type commands in order to perform tasks In this scenario we could maintain all of the same machinery but merely have a different platform where our users arrive. General strategies for custom interfaces There are multiple strategies for empowering your website or app with an AI model or tool. These are arranged in order from lowest to highest investment. Embed premade AI tools in your app - There are prepackaged solutions for standard AI tools that you might want for your website. These are subscription services but require minimal expertise to employ. Use an API (Application programming interface) underneath the hood of your app – an increasing number of LLMs and other AI tools have APIs available. APIs allow one to access a website or tool programmatically. This means that you can build your tool in such a way that underneath the hood it is powered by an AI tool. Deploy existing model in your own app - this is the most technically intensive solution but would be necessary if you require other types of customizations for your AI tool. Usability experts are going to be really helpful for carrying out this kind of need. Interface designs can make or break a tool’s usability and hence popularity! Examples of AI customized interface strategies Premade AI tools Some services like Cogniflow and OctoML offer prepackaged AI services like chatbots that you can embed in your website or tool. This has the advantage of being minimal maintenance or software development knowledge needed. You generally don’t always have the ability to highly customize these options. OctoML also allows for customization and fine tuning of models should you need customized knowledge as well. OctoML is a pay for what you use service whereas Cogniflow is a subscription. AI tool APIs Pre-package tools may only have certain options… In contrast, APIs can be very powerful and allow you to incorporate all the power of an AI tool into your website/app. They also free you team from having to do as much back end development. Although not all AI tools have API access, an increasing number of them are developing this as an option. Currently ChatGPT’s API is the most well developed for LLM (it appears at the time of writing this) but of course it requires a higher cost subscription plan. Bard may have a beta version of their API being further developed and released. Other types of AI models (not LLMs) often have API access as well like Google Cloud’s speech to text API. Custom builds If you need more than a custom interface but also custom knowledge, security you, or handling you will likely need to build custom AI solutions – again this requires more staff expertise In the next chapter we will discuss custom AI builds. –> "],["evaluating-your-customized-ai-tool.html", "Evaluating your customized AI tool Learning objectives: Intro Evaluating Accuracy of an AI model Evaluating Computational Efficiency of an AI model Evaluating Usability of an AI model", " Evaluating your customized AI tool Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind evaluating your customized AI tool Define your own goals for evaluating the accuracy, computational efficiency, and usability of your AI tool Recognize metrics that could be used for evaluation of accuracy, computational efficiency, and usability. Intro Evaluating a software tool is critical. This is for multiple reasons that feed into each other. Evaluating your AI tool will help identify areas for improvement Evaluating your AI tool will demonstrate value to funders so you can actually make those improvements It’s important to keep the pulse on your project as it is developing. Ideally, you should be monitoring your eh I’s performance when it comes to bias ad performance throughout the project. But once you have a stable ah I tool, it is an especially good time to gather more evaluations. As a reminder, generally a good AI tool is accurate in that it gives output that is useful. It is also computationally efficient in that we won’t be able to actually deploy the tool if it is computationally costly or takes too long to run a query. But for the purposes of evaluation, we’re going to add one more point of evaluation which is a good AI tool is usable. Even if you do not have “users” in the traditional sense; you are designing your tool only for within your team or organization, you will still need it to be functionally usable by the individuals you have intended to use it. Otherwise the fact that it is accurate and computationally efficient will be irrelevant if no one can experience that accuracy and efficiency. Evaluating Accuracy of an AI model How you evaluate the accuracy of your AI model will be highly dependent on what kind of AI model you have – text to speech, text to image, large language model chatbot, a classifier, etc. This will determine what kinds of “ground truth” you have available. For a speech to text model for example, what was the speaker actually saying? What percentage of the words did the AI tool translate correctly to text? Secondly your evaluation strategies will dependent on what your goals are – how do you define success for your AI tool? What was your original goals for this AI tool? Are they meeting those goals? LLM chatbots can be a bit tricky to evaluate accuracy – how do you know if the response it gave a user was what the user was looking for? But there’s a number of options and groups who are working on establishing methods and standards for LLM evaluation. Some examples at this time: MOdel kNowledge relIabiliTy scORe (MONITOR) google/BIG-bench GLUE Benchmark Measuring Massive Multitask Language Understanding Evaluating Computational Efficiency of an AI model Evaluating computational efficiency is important not only for the amount of time it takes to get useful output from your AI tool, but also will influence your computing bills each month. As mentioned previously, you’ll want to strike a balance between having an efficient but also accurate AI tool. Besides being shocked by your computing bill each month, there’s more fore thinking ways you can keep tabs on your computational efficiency. Examples of metrics you may consider collecting: Average time per job - How much time Capacity - Total jobs that can be run at once FLOPs (Floating Point Operations) - measure the computational cost or complexity of a model or calculation More about FLOPs. Evaluating Usability of an AI model Usability and user experience (UX) experts are highly valuable to have on staff. But whether or not you have the funds for an expert is UX to be on staff, more informal user testing is more helpful than no testing at all! Here’s a very quick overview of what a usability testing workflow might look like: Decide what features of your AI tool you’d like to get feedback on Recruit and compensate participants Write a script for usability testing - always need to emphasize that if they the participant doesn’t know how to do something it is not their fault, its something that needs to be fixed with the tool! Watch 3 - 5 people try to do the task – often 3 is enough to illuminate a lot of problems to be fixed! Observe and take notes on what was tricky Ask participants questions! We recommend reading this great article about user testing or reading more from this Documentation and Usability course. There’s many ways to obtain user feedback, and surveys, and interface analytics. Some examples of metrics you may want to collect: Success rate - how many users were able to successfully complete the task? Task time - how long does it take them to do Net Promoter Score (NPS) - scale of 0 - 10 summarized stat to understand what percentage of users would actively recommend your tool to others. Qualitative data and surveys - don’t underestimate the power of asking people their thoughts! "],["introduction-to-developing-ai-policy.html", "Introduction to Developing AI Policy Motivation Target Audience Curriculum Learning Objectives", " Introduction to Developing AI Policy This course is intended to equip you with the knowledge you need to develop an effective AI policy for your organization. Motivation AI tools are already changing how we work, and they will continue to do so for years. Over the next few years, we’re likely going to see AI used in ways we’ve never imagined and are not anticipating. This course empowers you to make informed decisions and confidently create an AI policy that matches your organizational goals. Target Audience This course is targeted toward industry and non-profit leaders and decision makers. Curriculum In this course, you’ll learn why you need an AI policy, what an AI policy might include, who can help you create and develop a policy, the state of existing AI laws, other laws and regulations that can apply to AI systems and products, and considerations for creating a strong AI policy for your organization. Learning Objectives During this course, learners will: Understand the reasons why organizations need an AI policy. Identify the key elements of a good AI policy. Describe the roles and responsibilities of different team members involved in guiding AI use. Identify key regulations outlined in the EU AI Act, including risk classifications, transparency requirements, and prohibited applications. Understand how existing industry-specific AI policies can inform your organization’s policy. Identify key legal categories relevant to AI use, including intellectual property, data privacy, and liability. Understand the limitations of relying solely on an AI policy without supporting infrastructure and training. Recognize the importance of involving diverse stakeholders from various departments in AI policy creation. Identify strategies for building flexible and adaptable AI policies, such as living documents and separate best practices guidelines. Appreciate the role of effective training in promoting policy compliance and ethical AI use. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["why-do-i-need-an-ai-policy.html", "Why do I need an AI policy?", " Why do I need an AI policy? Big technological shifts always trigger a period of explosive growth where the technology and what’s possible changes incredibly quickly. We’re in that stage right now with AI systems. Everyone is curious, scared, and interested in AI. Chat GPT accumulated 100 million users in 2 months, which is faster than many other major apps. The future workforce is already regularly using AI and bans will not be practical nor effective. Over 40% of university students use ChatGPT for coursework. Thirty-nine percent of prospective students say they wouldn’t consider going to a college that banned Chat GPT and other LLMs. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Each month regularly brings new opportunities and surprises, many of which we can’t anticipate and require organizations to adapt quickly. Everyone is using AI, but no one really knows how to use it properly. The rapid changes are making new capabilities feasible while also bringing to light new and unique concerns. However, adopting this new technology at the right time and in a way that minimizes mistakes and bad outcomes can make great things happen for your organization. It’s a bit like catching the tail of a rocket ship just being launched, but catching it in a way that doesn’t burn you to a crisp. New technology adoption is scary, and while caution is advisable, all-out bans are not practical. Thirty or so years ago, we had a similar technological shift with the advent of the Internet. At the time, using the Internet for common, everyday tasks was a big deal, and there was fear about how it would change how we work. Now, we have accepted the Internet as a way of life and it’s a normal experience to look things up on Google or shop on the internet. In 30 years, AI systems will be the same. Employees will use AI and this will make them more effective. Thoughtful AI policies can balance your employees’ use with safety and security measures. "],["elements-of-an-ai-policy.html", "Elements of an AI policy", " Elements of an AI policy A good AI policy should be a living document that evolves as your company adapts to AI use. As AI tools advance, so should the policy surrounding them. It should provide clear guidance and frameworks for developing, deploying, and using AI systems in a responsible and ethical manner. Having a policy in place that is well communicated can provide an extra level of security for your organization and employees. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. When writing an AI policy, you might consider whether asynchronous collaboration versus synchronous collaboration is right for you. With an asynchronous approach, people write their individual sections of a document by a deadline, after which the full policy is polished and edited. With a synchronous approach, an organization might convene a set of meetings with experts over a length or time to work on the document together. There are benefits and drawbacks to both approaches, and you will know which best fits your organization’s needs. In general, a policy might have sections devoted to the following topics: Purpose and Scope. In this section, you might define what your organization’s goals and plans for AI use, as well as what types of AI systems the policy will cover. This section might also contain definitions of specific terms, like what your organization considers AI or generative AI. A purpose and scope section can ensure everyone is aligned and avoid ambiguity. Values and Principles. This section states how your organization’s core values and principles will guide your use and development of AI tools. Some possible principles might be fairness, transparency, accountability, safety, or privacy. Governance and Oversight. You may want to establish a clear governance strategy for overseeing AI initiatives. This includes the roles of those involved in decision-making, as well as their responsibilities. Data Management and Privacy. This section outlines data governance practices that ensure data quality, security, and responsible use in AI systems. You should make sure your guidelines are compliant with relevant data privacy regulations like GDPR, CCPA, and other industry-specific regulations. Fairness and Non-discrimination. In this section, you can lay out how you might monitor and audit AI systems for possible bias. This section can also include guidelines for developing or deploying AI in ways to avoid perpetuating or exacerbating bias or discrimination based on protected characteristics. Risk Management, Safety, and Oversight. A section like this might lay out robust testing procedures to monitor, identify, and mitigate potential risks associated with AI systems, including security vulnerabilities, safety hazards, and unintended consequences. It can also identify ways to ensure oversight and accountability for AI systems, ensuring humans remain ultimately responsible for AI-driven decisions. Education and Training. This section describes how your organization will provide training and education programs on AI systems on responsible AI development, deployment, and use. You can also detail how these training modules will be created and what topics are necessary for different groups of employees. Feedback and Review. In this section, you can establish a mechanism for regularly reviewing and updating the AI policy as technology and best practices evolve. You may also want to implement procedures for employees to give feedback about AI issues or concerns within your organization. "],["building-an-ai-advisory-team.html", "Building an AI advisory team", " Building an AI advisory team AI Policy is a teamwork endeavor. Experts from many different fields need to come together to bring the latest updates. As someone in charge of making sure your organization is using AI wisely and properly, staying up-to-date on the laws, regulations, and computational resources is vital. It’s also something that is really difficult to do alone. Building a team of individuals that can help you confidently navigate the evolving landscape should be one of your top priorities as a leader. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. There are multiple possible roles that you could fill, depending on your organization’s AI needs and uses. Having representation from technical, policy and social science backgrounds helps ensure a multidisciplinary, holistic approach to building and overseeing responsible AI. This list is a starting point for you when deciding what sort of roles you need for your own team and is not written in any particular order. This is not an exhaustive list of all possible experts that you can gather. You should consult with your legal council, board members, and other oversight staff in order to properly address your own specialized needs. Legal counsel that understands AI and the nuances of the rapidly changing laws can advise on regulations relevant to your organization. Policy and governance analysts can research and draft internal policies on transparency, auditability, harm mitigation, and appropriate AI uses. They can also advise and assist with compliance. Data protection officers who can aid with implementing privacy-by-design principles and handling personally identifiable information legally and securely are especially important for organizations that deal with personal data. Ethicists are experts who can provide guidance on ethical issues and review systems for potential biases, risks, and policy compliance. Trainers and educators can create and run programs aimed at keeping all employees aware of responsibilities in developing and using AI respectfully and in compliance with AI policies and regulations. Oversight committee members are experts who review research studies (both before a study begins and while it is ongoing). Their job is to make sure researchers are protecting the welfare, rights, and privacy of research subjects. Oversight committees like institutional review boards are especially important for organizations involved in any human research that uses AI. Technical experts understand how to design, build, and deploy AI models. They can also offer advice on the algorithms, data, and computational infrastructure your organization might need. They might be AI or machine learning experts, data scientists, DevOps engineers, cloud architects, or systems engineers. Information security architects are vital for identifying and mitigating the security risks associated with AI systems. They can provide advice on data privacy measures, security weak points, and incident response plans. The specific roles and their required skillsets will vary depending on the size, industry, and AI maturity of the company. Having a balanced team with both technical and strategic expertise is key to successfully implementing AI policies in your organization. Remember, effective communication and collaboration between these roles is crucial for a successful AI implementation. "],["considerations-for-creating-an-ai-policy.html", "Considerations for creating an AI Policy An AI policy alone is not enough Get lots of voices weighing in from the beginning Consider how to keep your guidance agile Make it easy for people to follow your policy through effective training", " Considerations for creating an AI Policy How you create an AI policy, as well as what you cover in it, is going to be highly dependent on what your organization’s needs are. Unfortunately, there is no “one size fits all” approach to AI policies, governance, and training. However, we can offer some considerations based on our experiences creating and implementing policies. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. An AI policy alone is not enough It is probably not enough to build only an AI policy. Building an AI support system that makes it possible for the people in your organization to adopt AI in safe and ethical ways is also important. An AI policy support system might include a governance plan describing how new AI use cases will be reviewed and guidance for implementation provided, a clear understanding of the necessary components of your infrastructure to plan to use AI tools on, and training materials that include specific best practices for different types of AI tools and use cases. Thinking about your AI policy as just the beginning, not the entire thing, can be a way to protect your employees, your organization, and the people you serve. Get lots of voices weighing in from the beginning AI systems are being integrated into every aspect of the work environment. Sometimes these tools are very obvious such as when an AI tool is applied to your data to do predictive work. Other times they are less obvious such as when they are integrated into common desktop software as co-pilots or “auto-fill”. You likely need a lot of different people with different perspectives to weigh in to get even close to what you want in terms of a comprehensive AI policy. Limiting policy and governance plan creation to just the Chief Data Officer’s office or the IT department or the legal department might make things faster, but the trade-off is that you are likely only covering a fraction of what you need. At minimum, most organizations probably need representatives from legal, compliance and governance, IT , offices of diversity, equity, inclusion, ethical review, and training. Creating a meaningful policy and getting the necessary supports put in place is easier when you have people with varied and broad expertise creating the policy from the beginning. Consider how to keep your guidance agile The speed at which AI technology is changing is fast enough that creating useful guidelines around its use is difficult. An AI policy requires you to get a diverse set of opinions together and make it cohesive and coherent, and that takes time. The last thing you want to do is create a policy that no longer applies in 3 months when AI systems have changed again. One possible approach is to think of your AI Policy as an ongoing living document as opposed to a one time effort. Another way includes creating both an AI Policy and an AI Best Practices document, where the policy changes infrequently while the best practices evolves more quickly. For example, the policy document might say something like “you should use infrastructure that matches current best practices.” This allows you to create a policy that is still useful over time as your organization learns what AI practices and infrastructure is best for it. This still requires you to communicate frequently with your employees on the state of the best practices for AI use. However, the best practices can be tailored to fit specific departments and change as those departments need it to do so. This also allows an organization to communicate to specific departments and employees who might be affected by an update to their best practices guidelines. However you choose to do it, systematize the process of creating your policy so that you can easily update it when necessary Make it easy for people to follow your policy through effective training Good AI policies are most effective when they are easy for people to follow. This can be particularly challenging in periods of explosive technological growth like we’re experiencing now with AI. What is possible with AI, and how to safely and ethically use AI, is changing quickly, making it a challenge for people to always know how to comply with an AI policy. This is an opportunity to make your AI governance plan include specific points about communication, training and guidance so users have regular updates from the governance group that enable them to use AI tools ethically and securely. In situations like these, one way to approach training is to focus on major points people should consider, clearly outline the steps people can take to do the right thing, and identify who people can approach when they have questions. Many people may not solidly know the answers to all questions, but the right people can help you find the answer. Training people how to loop in the proper people, and to ask for help from the very beginning, might save them stress later. "],["ai-acts-orders-and-regulations.html", "AI acts, orders, and regulations The EU AI Act Industry-specific policies", " AI acts, orders, and regulations A good AI policy should guide an organization on AI uses that adhere to the necessary laws and regulations. With generative AI still new in many fields, from medicine to law, regulations are rapidly evolving. A landmark provisional deal on AI regulation was reached by the European Parliament and Council on December 8, 2023, with the EU AI Act). These guidelines laid out in this document apply to AI regulation and use within the 27-member EU bloc, as well as to foreign companies that operate within the EU. It is likely the EU AI Act will serve as a model for AI laws around the globe, for both individual countries and industries. Countries outside of the EU are drafting their own laws, orders, and standards surrounding AI use, so you and your employees will need to do some research on what it and is not allowed in your local area. Always consult your legal council about the AI regulations that apply to you. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. The EU AI Act According to EU policymakers involved in writing the AI Act, the goal of the Act is to regulate AI in order to limit its capacity to cause harm. The political agreement covers the use of AI in biometric surveillance (such as facial recognition), as well as guidance on regulations for LLMs. The EU AI Act divides AI-based products into levels based on how much risk each product might pose to things like data privacy and protection. Higher-risk products with a greater capacity to cause harm face more stringent rules and regulations. Most current AI uses (like systems that make recommendations) will not fall into this higher-risk category. Final details are still being worked out, but we do know several important aspects of this Act. All content generated by AI must be clearly identified. Companies must also make it clear when customers are interacting with chatbots, emotion recognition systems, and models using biometric categorization. Developers of foundational models like GPT as well as general purpose AI systems (GPAI) must create technical documentation and detailed summaries about the training data before they can be released on the market. High-risk AI systems must undergo mandatory rights impact and mitigation assessments. Developers will also have to conduct model evaluations, assess and track possible cybersecurity risks, and report serious incidents and breaches to the European Commission. They will also have to use high-quality datasets and have better accompanying documentation. Open-source software is excluded from regulations, with some exceptions for software that is considered a high-risk system or is a prohibited application. AI software for manipulative strategies like deepfakes and automated disinformation campaigns, systems exploiting vulnerabilities, and indiscriminate scraping of facial images from the internet or security footage to create facial recognition databases are banned. Additional prohibited applications may be added later. There are exceptions to the facial scraping ban that allow law enforcement and intelligence agencies to use AI applications for facial recognition purposes. The AI Act also lays out financial penalties for companies that violate these regulations, which can be between 1.5% and 7% of a company’s global revenue. More severe violations result in greater financial penalties. This is still a provisional agreement. It must be approved by both the European Parliament and European countries before becoming law. After approval, tech companies will have two years to implement the rules, though bans on AI uses start six months after the EU AI Act is ratified. More information about the EU’s AI Act can be found in these sources. MIT Technology Review Reuters EURACTIV.com CIO Dive Industry-specific policies Some individual industries have already begun adopting policies about generative AI. They may also have long-standing policies in place about the use of other forms of AI, like machine learning. Some countries have also begun creating policies for specific industries and fields. When in doubt, always check with the experts within your organization about what AI policies exist for your industry. We’ll discuss some specific examples of how different industries are approaching AI regulation in the next section. "],["case-studies.html", "Case Studies Education Healthcare", " Case Studies AI regulations and policies are continuing to evolve as people adapt to the use of AI. Let’s look at some real-life examples. Education For students and educators, generative AI’s capacity in writing, problem solving, and conducting research has upended the goals and evaluations of our education system. For instance, ChatGPT 4 has been able to generate college-level essays to earn passing grades at Harvard with minimal prompting for various subjects (Yglesias (2023)). Many educational institutions reacted with various policies and adaptations; first to protect the current educational environment, then to consider adapting to generative AI’s capacity. In the first few months after ChatGPT was released, many schools and universities restricted the use of AI in education. The two largest public school systems in the United States, New York City Public Schools and Los Angeles Public Schools, banned the use of ChatGPT in any school work, declaring that any use of ChatGPT counted as plagiarism Singer (2023b). Many universities also followed with similar policies. However, educators soon realized that most students embraced generative AI despite the ban for most assignments (Terry (2023), Roberts (2023)). Furthermore, enforcement to bar AI from students, such as using AI detection software or banning AI from school networks, created disparities in students. Teachers noticed that AI detection software biased against the writings of non-native English learners (Roberts (2023)). Children from wealthy families could also access AI through personal smartphones or computers (Singer (2023b)). With these lessons, some educational systems started to embrace the role of AI in students’ lives and are developing less-restrictive various policies. New York City Public School and Los Angeles Public Schools quietly rolled back their ban, as did many universities (Singer (2023b)). Groups of educators have come together to give guidelines and resources on how to teach with the use of AI, such as the Mississippi AI Institute, MIT’s Daily-AI curriculum, and Gettysburg College’s Center for Creative Teaching and Learning. Each educational institution and classroom is adapting to AI differently. The Mississippi AI Institute suggested that there are some common questions to consider (Donahue (2023)): How are we inviting students to demonstrate their knowledge, and is writing the only (or the best) way to do that? For instance, some universities have encouraged the use of in-class assignments, handwritten papers, group work and oral exams (K. Huang (2023)). What are our (new) assignment goals? And (how) might generative AI help or hinder students in reaching those goals? Some educators want to use AI to help students get over early brainstorming hurdles, and want students to focus on deeper critical thinking problems (Roberts (2023)). Many educators have started to develop AI literacy and “critical computing” curricula to teach students how to use AI effectively and critically (Singer (2023a)). If we’re asking students to do something that AI can do with equal facility, is it still worth asking students to do? And if so, why? Educators will need to think about what aspects of their lesson goals will be automated in the future, and what are critical and creative skills that students need to hone in on. If we think students will use AI to circumvent learning, why would they want to do that? How can we create conditions that motivate students to learn for themselves? Educators have started to teach young students the limits of AI creativity and what kind of bias is embedded in AI models, which has led students to think more critically about use of AI (Singer (2023a)). What structural conditions would need to change in order for AI to empower, rather than threaten, teachers and learners? How can we create those conditions? Some teachers have started to actively learn how their students use AI, and are using AI to assist with writing their teaching curriculum (Singer (2023b)). Healthcare The health care industry is an example of an industry where the speed of technology development has led to gaps in regulation, and the US recently released an Executive Order about creating healthcare-specific AI policies. The U.S. Food and Drug Administration (FDA) regulates AI-enabled medical devices and software used in disease prevention, diagnosis, and treatment. However, there are serious concerns about the adequacy of current regulation, and many other AI-enabled technologies that may have clinical applications fall out of the scope of FDA regulation (Habib and Gross (2023); Association (2023)). Other federal agencies, such as the Health and Human Services Office of Civil Rights, have important roles in the oversight of some aspects of AI use in health care, but their authority is limited. Additionally, there are existing federal and state laws and regulations, such as the Health Insurance Portability and Accountability Act (HIPAA), that impact the use and development of AI. This patchwork landscape of federal and state authority and existing laws has led the American Medical Association (AMA) to advocate for a “whole government” approach to implement a comprehensive set of policies to ensure that “the benefits of AI in health care are maximized while potential harms are minimized” (News (2023)). The AMA and health care leaders have highlighted the importance of specialized expertise in the oversight and adoption of AI products in health care delivery and operations. For example, Dr. Nigam Shah and colleagues call for the medical community to take the lead in defining how LLMs are trained and developed: By not asking how the intended medical use can shape the training of LLMs and the chatbots or other applications they power, technology companies are deciding what is right for medicine (Shah, Entwistle, and Pfeffer (2023)). The medical community should actively shape the development of AI-enabled technologies by advocating for clinically-informed standards for the training of AI, and for the evaluation of the value of AI in real-world health care settings. At an institutional level, specialized clinical expertise is required to create policies that align AI adoption with standards for health care delivery. And in-depth knowledge of U.S. health insurance system is required to understand how complexity and lack of standardization in this landscape may impact AI adoption in clinical operations (schulman2023). In summary, health care leaders and the medical community need to play an active role in the development of new AI regulations and policy. References "],["other-laws-to-consider.html", "Other laws to consider Intellectual Property Data Privacy and Information Security Liability Who can tell you about your particular legal concerns", " Other laws to consider While countries and jurisdictions are developing ans passing laws that specifically deal with AI, there are also existing laws around data that should be considered when creating an AI policy. Which ones you should consider will vary based on your organization and sector, but broadly include regulations about intellectual property, data privacy and protection, and liability. This is not an exhaustive list! This can give you a starting point of what sorts of laws and regulations you might need to consider, but you’ll have to apply your own domain knowledge to determine the specifics for your organization. Always confirm with your legal counsel whether a particular law or regulation applies to you. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Intellectual Property There are multiple concerns around generative AI and intellectual property rights, especially with regards to copyright and fair use or fair dealing laws. Copyright is “the exclusive legal right, given to an originator or an assignee to print, publish, perform, film, or record literary, artistic, or musical material, and to authorize others to do the same” (Oxford Languages). Fair use and fair dealing are legal doctrines that allows for limited use of copyrighted material without permission under certain circumstances. While fair use and fair dealing exceptions vary from country to country, they broadly allow for nonprofit, educational, commentary or criticism, satire, and highly creative works to sample copyrighted material. There are multiple concerns around generative AI and intellectual property rights, especially with regards to copyright and fair use or fair dealing laws. In order for generative AI models to work, they must be trained on vast amounts of data. This might include images, in the case of image generators like DALL-E, Stable Diffusion, and Midjourney. It might also include human writing and speech, in the case of LLMs like ChatGPT and Bard. Information about the training data sets for these tools is limited, but they likely include text and images scraped from the internet. There is concern that the text and images gathered for training data included copyrighted and trademarked books, articles, photographs, and artwork. In fact, the CEO of Midjourney has previously confirmed that copyrighted images were included in the Midjourney training data without the consent of the artists. Artists and authors have brought legal action against several AI companies, claiming their rights have been violated by the inclusion of their works in the training data. Some AI companies have argued this use case is covered by fair use agreements. As of November 30, the legal situation is still being decided There is also ongoing debate as to whether AI-generated images and text can be copyrighted. While many current copyright laws do not protect works created by machines, how these laws might apply to work that is a collaboration between humans and machine (such as art that includes some AI-generated content) is an area of active discussion. Data Privacy and Information Security An estimated 4.2 billion individuals share some form of data about themselves online. This might be information like what they’re interested in or information that can be used for identification, like their birth date or where they live, or even financial information. With vast amounts of data about us out there, privacy laws protecting the digital information of internet users are becoming increasingly common. More than 100 countries have some sort of privacy laws in place. Initial concerns around AI and information security focused on bad actors using LLMs to generate malicious code that could be used for cyberattacks. While commercially available chatbots have guardrails in place that are meant to prevent them from being used to create such code, users were able to come up with workarounds to bypass these safety checks. More recently people have begun to worry about privacy concerns related to the AI systems themselves. AI systems are trained on vast amounts of data, including data that is covered by existing privacy laws, and many systems also collect and store data from their users, potentially for use as additional training data. Data privacy is especially important to consider when working in fields like healthcare, biomedical research, and education, where personally identifiable data and personal health information is afforded special protections. Special consideration should also be taken when dealing with biometric data, or data involving human characteristics gathered from physical or behavioral traits that can be used to identify a single person. This might include things like fingerprints, palm prints, iris scans, facial scans, and voice recognition. DNA can also be considered biometric data when used for forensics. Liability As AI systems become more and more common in everyday life, it is inevitable that some of these systems will fail at some point. Who is liable when AI fails, especially when it fails in a catastrophic manner? The issue of whose fault it is when an AI system fails (and thus who is responsible for the damage) depends greatly on how and why it failed. Blame might lie with the user (if the AI was not being used according to instructions, or if limitations were known but ignored), the software developer (if the AI product was distributed before being tested thoroughly or before the algorithm was properly tuned), or the designer or manufacturer (if the AI design or production was inherently flawed). Who can tell you about your particular legal concerns As a general rule of thumb: when in doubt, talk to your legal counsel! They can offer you the best advice for your organization and your situation. The information in this course is ONLY meant as starting point for you as you create AI guidelines for your organization. You can also seek guidance from your governance and compliance experts. "],["about-the-authors.html", "About the Authors", " About the Authors These credits are based on our course contributors table guidelines.     Credits Names Pedagogy Lead Content Instructor(s) Ava Hoffman - Course 1: Exploring AI Possibilities Carrie Wright - Course 2: Avoiding AI Harm Candace Savonen - Course 3: Determining AI Needs Elizabeth Humphries - Course 4: Developing AI Policy Project Management Elizabeth Humphries, Shasta Nicholson Content Author Christopher Lo - Avoiding AI Harm - Effective Use of Training and Testing Data, Developing AI Policy - Education case study Monica Gerber - Developing AI Policy - Healthcare case study Content Editor(s)/Reviewer(s) Sitapriya Moorthi, Jeffrey Leek, Amy Paguirigan, Jennifer Weddle, Christopher Lo Content Director(s) Jeffrey Leek , Elizabeth Humphries Content Consultants Robert McDermott, Jennifer Weddle, Adina Mueller Production Content Publisher(s) Shasta Nicholson Content Publishing Reviewer(s) Ava Hoffman, Carrie Wright, Candace Savonen,Elizabeth Humphries Technical Template Publishing Engineers Candace Savonen, Carrie Wright, Ava Hoffman Publishing Maintenance Engineer Candace Savonen Technical Publishing Stylists Carrie Wright, Ava Hoffman, Candace Savonen Package Developers (ottrpal) Candace Savonen, John Muschelli, Carrie Wright Art and Design Illustrator(s) Ava Hoffman, Candace Savonen, Carrie Wright, Elizabeth Humphries, Sitapriya Moorthi Figure Artist(s) Ava Hoffman Candace Savonen, Carrie Wright, Elizabeth Humphries, Sitapriya Moorthi Funding Funder(s) The development of this course was supported by the National Cancer Institute (NCI) under Grant UE5CA254170. Funding Staff Shasta Nicholson, Maleah O’Conner, Sandy Ombrek   Tools used to create this course: ## ─ Session info ─────────────────────────────────────────────────────────────── ## setting value ## version R version 4.0.2 (2020-06-22) ## os Ubuntu 20.04.5 LTS ## system x86_64, linux-gnu ## ui X11 ## language (EN) ## collate en_US.UTF-8 ## ctype en_US.UTF-8 ## tz Etc/UTC ## date 2024-02-21 ## ## ─ Packages ─────────────────────────────────────────────────────────────────── ## package * version date lib source ## assertthat 0.2.1 2019-03-21 [1] RSPM (R 4.0.5) ## bookdown 0.24 2023-03-28 [1] Github (rstudio/bookdown@88bc4ea) ## bslib 0.4.2 2022-12-16 [1] CRAN (R 4.0.2) ## cachem 1.0.7 2023-02-24 [1] CRAN (R 4.0.2) ## callr 3.5.0 2020-10-08 [1] RSPM (R 4.0.2) ## cli 3.6.1 2023-03-23 [1] CRAN (R 4.0.2) ## crayon 1.3.4 2017-09-16 [1] RSPM (R 4.0.0) ## desc 1.2.0 2018-05-01 [1] RSPM (R 4.0.3) ## devtools 2.3.2 2020-09-18 [1] RSPM (R 4.0.3) ## digest 0.6.25 2020-02-23 [1] RSPM (R 4.0.0) ## ellipsis 0.3.1 2020-05-15 [1] RSPM (R 4.0.3) ## evaluate 0.20 2023-01-17 [1] CRAN (R 4.0.2) ## fansi 0.4.1 2020-01-08 [1] RSPM (R 4.0.0) ## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.0.2) ## fs 1.5.0 2020-07-31 [1] RSPM (R 4.0.3) ## glue 1.4.2 2020-08-27 [1] RSPM (R 4.0.5) ## hms 0.5.3 2020-01-08 [1] RSPM (R 4.0.0) ## htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.0.2) ## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.2) ## jsonlite 1.7.1 2020-09-07 [1] RSPM (R 4.0.2) ## knitr 1.33 2023-03-28 [1] Github (yihui/knitr@a1052d1) ## lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.0.2) ## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.0.2) ## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.0.2) ## ottrpal 1.0.1 2023-03-28 [1] Github (jhudsl/ottrpal@151e412) ## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.0.2) ## pkgbuild 1.1.0 2020-07-13 [1] RSPM (R 4.0.2) ## pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.0.3) ## pkgload 1.1.0 2020-05-29 [1] RSPM (R 4.0.3) ## prettyunits 1.1.1 2020-01-24 [1] RSPM (R 4.0.3) ## processx 3.4.4 2020-09-03 [1] RSPM (R 4.0.2) ## ps 1.4.0 2020-10-07 [1] RSPM (R 4.0.2) ## R6 2.4.1 2019-11-12 [1] RSPM (R 4.0.0) ## readr 1.4.0 2020-10-05 [1] RSPM (R 4.0.2) ## remotes 2.2.0 2020-07-21 [1] RSPM (R 4.0.3) ## rlang 1.1.0 2023-03-14 [1] CRAN (R 4.0.2) ## rmarkdown 2.10 2023-03-28 [1] Github (rstudio/rmarkdown@02d3c25) ## rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.0.2) ## sass 0.4.5 2023-01-24 [1] CRAN (R 4.0.2) ## sessioninfo 1.1.1 2018-11-05 [1] RSPM (R 4.0.3) ## stringi 1.5.3 2020-09-09 [1] RSPM (R 4.0.3) ## stringr 1.4.0 2019-02-10 [1] RSPM (R 4.0.3) ## testthat 3.0.1 2023-03-28 [1] Github (R-lib/testthat@e99155a) ## tibble 3.2.1 2023-03-20 [1] CRAN (R 4.0.2) ## usethis 1.6.3 2020-09-17 [1] RSPM (R 4.0.2) ## utf8 1.1.4 2018-05-24 [1] RSPM (R 4.0.3) ## vctrs 0.6.1 2023-03-22 [1] CRAN (R 4.0.2) ## withr 2.3.0 2020-09-22 [1] RSPM (R 4.0.2) ## xfun 0.26 2023-03-28 [1] Github (yihui/xfun@74c2a66) ## yaml 2.2.1 2020-02-01 [1] RSPM (R 4.0.3) ## ## [1] /usr/local/lib/R/site-library ## [2] /usr/local/lib/R/library "],["references.html", "References", " References "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]] +[["index.html", "AI for Decision Makers About this Course Specialization Sections Available course formats", " AI for Decision Makers March, 2024 About this Course This is the series of courses in Fred Hutch DaSL’s “AI for Decision Makers” specialization on Coursera. Specialization Sections Introduction Course 1: Exploring AI Possibilities Course 2: Avoiding AI Harm Course 3: Determining AI Needs Course 4: Developing AI Policy Available course formats This course is available in multiple formats which allows you to take it in the way that best suites your needs. You can take it for certificate which can be for free or fee. The material for this course can be viewed without login requirement on this Bookdown website. This format might be most appropriate for you if you rely on screen-reader technology. This course can be taken on Coursera for certification here (but it is not available for free on Coursera). Our courses are open source, you can find the source material for this course on GitHub. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["introduction.html", "Introduction Motivation Target Audience", " Introduction Motivation How can understanding AI help you be a better leader? We think understanding AI is essential for executives. It helps today’s leaders make strategic decisions, drive innovation, enhance efficiency, and foster a culture that embraces the transformative power of these technologies. Specifically, AI proficiency can help leaders in the following ways: Strategic Decision-Making: Understanding AI and machine learning equips leaders to make informed decisions about integrating these technologies into business strategies, setting their teams up for success when working with AI. Risk Mitigation: Familiarity with AI helps leaders assess risks associated with implementing these technologies, ensuring that ethical considerations, data privacy, and potential biases are addressed to mitigate negative consequences. Leaders can also implement more informed policies for their teams. Efficiency and Experience: Leaders can explore how AI applications enhance operational efficiency, automate repetitive tasks, and assist employee learning and development, leading to increased productivity and breakthroughs. These improvements can also improve the experience of users or customers your organization serves. Resource Allocation: AI resources can be expensive, including in terms of computing resources, subscription services, and/or personnel time. Understanding AI enables leaders to allocate resources effectively, whether in building in-house AI capabilities, partnering with external experts, or investing in AI-driven solutions that align with the organization’s mission. Innovation Leadership: Leaders can foster a culture of innovation by understanding the transformative potential of AI. Awareness and knowledge can also enable leaders to identify opportunities for innovation, helping their teams match the rapidly evolving technological landscape. Data-Driven Decision Culture: Leaders can promote a data-driven decision-making culture within their organizations, using AI insights to inform strategic planning, understand their teams better, and improve other key business functions. Communication with Tech Teams: Executives and managers benefit from understanding AI even if they aren’t building tech, as it helps them effectively communicate with their technical teams. This can mean more effective collaboration and improved alignment between teams or departments. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Target Audience This specialization is intended for executives, decision-makers, and business leaders across industries, including executives in C-suite positions, managers, and directors. Our goal is for these learners to understand the strategic applications of AI and machine learning in driving innovation, improving operations, creating supportive working environments, and gaining an innovative edge. We also believe that learning is a life-long process. This specialization is targeted toward those who value continuous learning and want to stay ahead in today’s fast-paced technology landscape. "],["introduction-to-exploring-ai-possibilities.html", "Introduction to Exploring AI Possibilities Introduction Motivation Target Audience Curriculum Learning Objectives", " Introduction to Exploring AI Possibilities Introduction This course aims to help decision makers and leaders understand artificial intelligence (AI) at a strategic level. Not everyone will write an AI algorithm, and that is okay! Our rapidly evolving AI landscape means that we need executives and managers who know the essential information to make informed decisions and use AI for good. This course specifically focuses on the essentials of what AI is and what it makes possible, to better harmonize expectations and reality in the workplace. Motivation This course will help you with your understanding of AI, helping you make strategic decision and cultivate a business environment that embraces the benefits of AI, while understanding its limitations and risks. Target Audience This course is targeted toward industry and non-profit leaders and decision makers. Curriculum In this course, we’ll learn about what Artificial intelligence is, and what it isn’t. We’ll also learn the basics of how it works, and learn about different types of AI. This course will cover: Framework, or definition, of AI Essential AI examples and case studies The take-home of how AI works Key definitions of types of AI and related technologies What is possible with AI Learning Objectives We will learn how to: Determine what AI is and isn’t using our three part framework: the data, algorithm, and interface Identify common technologies and whether or not they are AI Explain the essential “behind the scenes” technology of how AI works Identify possibilities for using AI while understanding its limitations Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["what-is-artificial-intelligence.html", "What Is Artificial Intelligence Specific and General Intelligence Shifting Goalposts Our AI Definition What Is and Is Not AI Summary", " What Is Artificial Intelligence The term “artificial intelligence”, or AI, often makes people envision humanoid robots. For some of us, this prompts concerns about their ability to outsmart us. The notion of robots passing tests that blur the line between human and machine, often depicted in science fiction, adds to these worries, particularly when considering the potential for AI systems to act in self-interest and make decisions independently. Specific and General Intelligence Currently, no AI system can perform all the intellectual tasks that a human can. This is an active area of research, specifically into what’s called artificial general intelligence. We aren’t there yet. Currently, artificial intelligence systems are optimized to perform a specific task well, but not for general, multi-purpose tasks. For example, the AI application for recognizing voices can not be directly applied to drive cars, and vice versa. Similarly, a language translation app could not recognize images, and vice versa. Shifting Goalposts Defining what AI is can be tricky because what experts consider to be AI changes frequently. John McCarthy, one of the leading early figures in AI once said, “As soon as it works, no one calls it artificial intelligence anymore”. For instance, 20 years ago, the idea of an email spam checker was new. People were surprised that an algorithm could identify junk email accurately, and called it “artificial intelligence”. Since this type of algorithm has become so common, it is no longer called “artificial intelligence”. This transition happened because we no longer think it is surprising that computers can filter spam messages. Because it is not learning something new and surprising, it is no longer considered intelligent. We often look at human intelligence the same way. For example, many years ago, only a few people knew how to use the internet. These people might have been considered extremely talented and intelligent. Now, the massive growth of online resources and social media mean that fluent internet use is almost required! Artificial General Intelligence (AGI): A type of artificial intelligence that can understand, learn, and apply knowledge across a wide range of tasks, similar to the broad cognitive abilities of a human being. It represents the aspiration for machines to have versatile intelligence rather than focusing on specific, narrow domains. Check out the following lessons to learn more. Our AI Definition At its core, AI is about problem solving (Fogel 2022). But how does it do this? How hard does the problem have to be? There are no clear answers to these questions. Going forward in this course, we define AI as having the following features: Dataset: AI needs data examples that can be used to train a statistical or machine learning model to make predictions. Algorithm: AI needs an algorithm, or a set of procedures, that can be trained based on the data examples. That way, it can take a new example and execute a human-like task. For instance, the algorithm learns which images feature a cat from pre-labeled images. When given a new image, it decides whether the image has a cat in it. Interface: AI needs a physical interface or software for the trained algorithm to receive a data input and execute the human-like task in the real world. For example, you might interface with a chatbot in your web browser. As an example, consider Amazon Echo’s voice control device (Wikipedia 2023a). The data set consists of customer voices talking to Amazon Echo or other devices. The algorithm predicts what a new customer voice is asking it to do. Given human voice request, it may set a kitchen timer. Lastly, the interface, is a physical device with a microphone, speaker, and computer software running the algorithm and accessing the data. It is the part that will interact with humans. What Is and Is Not AI Let’s look at a few of examples. We can then decide whether or not the examples constitute AI. Smartphones The name “smartphone” implies these devices are making decisions and are powered by AI. Let’s consider our three criteria: Dataset: Smartphones do collect a lot of data. For example, they retain your text messages and collect motion tracking information. Algorithm: The smartphone as a whole does not usually get trained with this data. However, some features like virtual voice assistants and facial recognition do adapt given your data. Interface: Again, some features like voice assistants can be interacted with through the smartphone. While there are some features on smartphones that are powered by AI models, like virtual voice assistants and facial recognition, the device as a whole isn’t considered AI. Calculators Many of us use basic calculators, as you might find in Microsoft Excel, every day. AI also makes many calculations. Is it just a scaled-up calculator? Dataset: Calculators and spreadsheets can store data. Algorithm: Calculators do not generally use this data to train algorithms. The procedures that are performed (addition, subtraction, etc.) are almost always predefined. However, some AI-powered assistants are starting to be integrated into software like Excel and Google Sheets. Interface: Calculators do meet the criteria for an interface, whether through a physical device or software application. Traditional calculators are not considered AI, because everything they can do is predefined by people. Computer Programs Like calculators, computers follow set procedures for problem solving and computation. Everyday computers use these procedures to help automate repetitive tasks and save time. However, this isn’t generally considered AI, because the computer’s algorithms aren’t being trained with new data you supply. AI systems exhibit the ability to adapt and handle new inputs for tasks that might be more complicated. Examples of AI In the Real World As we can see from the examples above, there are many instances of technology that are incredibly useful but are not considered AI. Without getting too into the details of how they work yet, let’s list some examples of AI in the real world and their applications. Meta’s Advantage suite of tools helps advertisers produce content and target specific social media users. Google Search is using generative AI to summarize search results in an “AI-powered snapshot of key information”. Financial institutions use AI to detect fraud. For example, detecting the 1 audit risk in a database of 10 million entries. In medicine, AI can help predict Alzheimer’s risk from MRI scans. Global Plastic Watch uses satellite imagery and artificial intelligence to detect and monitor plastic waste sites globally. Summary The definition of artificial intelligence (AI) has shifted over time. We use the three part framework of data, algorithms, and interfaces to describe AI applications. You will need to consider specific technologies and whether they meet the criteria for being classified as AI using this framework. Adaptability and training with new data are key factors to keep in mind as we move further in the course. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-is-it-ai.html", "DISCUSSION Is It AI", " DISCUSSION Is It AI Consider the following examples. Are they examples of AI? Why or why not? Click to expand and see the answer. A smartfridge that lets you know when replacement parts are needed This is not AI. The computer in the fridge is typically programmed to look for specific signs of wear or time passing. It is not typically trained with new data. Speed cameras on the highway Speed cameras on highways typically use specialized technology and are not explicitly powered by AI. These cameras are often equipped with radar sensors for measuring vehicle speed between checkpoints. While the core functionality of speed cameras relies on sensor technology and predetermined speed thresholds, AI elements may be incorporated in some advanced systems. For example, AI could be used to enhance image recognition accuracy for reading license plates. However, the fundamental operation of speed cameras is rooted in sensor-based speed detection, not AI. Suggested accounts on Instagram This is considered AI. Social media algorithms, like Instagram’s, make recommendations based on user behavior. For example, if you spend a lot of time viewing a page that was recommended, the system interprets that as positive feedback and will make similar recommendations. Typically, these recommendations get better over time as the user generates more user-specific data. You supply data through your behaviors, the algorithm gets trained, and you interact with the suggestions via the app. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["how-ai-works.html", "How AI Works Early Warning for Skin Cancer Collecting Datapoints Understanding the Algorithm Interfacing with AI Understanding the AI Spring Summary", " How AI Works Let’s briefly revisit our definition of AI. It must have some data, an algorithm, and an interface. Let’s break these down in more detail below. Early Warning for Skin Cancer Each year in the United States, 6.1 million adults are treated for skin cancer (basal cell and squamous cell carcinomas), totaling nearly $10 billion in costs (CDC 2023). It is one of the most common forms of cancer in the United States, and mortality from skin cancer is a real concern. Fortunately, early detection through regular screening can increase survival rates to over 95% (Melarkode et al. 2023). Cost and accessibility of screening providers, however, means that many people aren’t getting the preventative care they need. Increasingly, AI is being used to flag potential skin cancer. AI focused on skin cancer detection could be used by would-be patients to motivate them to seek a professional opinion, or by clinicians to validate their findings or help with continuous learning. Data: Images with and without skin cancer present Algorithm: Detection of possible skin cancer Interface: Web portal or app where you can submit a new picture Collecting Datapoints Let’s say a clinician, Dr. Derma, is learning how to screen for skin cancer. Dr. D goes to their first day at the clinic and sees their first instance of skin cancer. Dr. D now has one data point. Dr. D could make future diagnoses based on this single data point, but these diagnoses probably won’t be very accurate. Over time, as Dr. D does more screenings of skin with and without cancer, they will get a better and better idea of what skin cancer looks like. This is part of what we do best. Human beings are powerhouses when it comes to pattern recognition and processing (Mattson 2014). Like Dr. D, AI will get better at finding the right patterns with more data. In order to train an AI algorithm to detect possible skin cancer, we’ll first want to gather as many pictures of normal and cancerous skin as we can. This is the raw data (Leek and Narayanan 2017). What Is Data In our skin cancer screening example, our data is all of the information stored in an image. However, data comes in many shapes and forms. Data can be structured, such as a spreadsheet of the time of day plus traffic volume or counts of viral particles in different patients. Data can also be unstructured, such as might be found in social media text or genome sequence data. Other kinds of data can be collected and used to train algorithms. These might include survey data collected directly from consumers, medical data collected in a healthcare setting, purchase or transaction tracking, and online tracking of your time on certain web pages (Cote 2022). Quantity and quality of data are very important. More data makes it easier to detect and account for minor differences among observations. However, that shouldn’t come at the cost of quality. It is sometimes better to have fewer, high resolution or high quality images in our dataset than many images that are blurry, discolored, or in other ways questionable. Representative diversity of datasets is crucial for the effectiveness of AI. For instance, if an AI used for skin cancer screening only encounters instances of skin cancer on lighter skin tones, it might fail to alert individuals with darker skin tones. The tech industry’s lack of diversity contributes to these issues, often leading to the discovery of failures only after harm has occurred. Large Language Models (LLMs), which we will cover later, are great examples of using high quantity and quality of data. Think about how much text information is freely available on the internet! Throughout the internet, we’re much more likely to see the phrase “cancer is a disease” than “cancer is a computer program”. Many LLMs are trained on sources like Wikipedia, which are typically grammatically sound and informative, leading to higher quality output. It is essential that you and your team think critically about data sources. Many companies releasing generative AI systems have come under fire for training these systems on data that doesn’t belong to them (Walsh 2023). Individual people also have a right to data privacy. No personal data should be used without permission, even if that data could be interesting or useful. Preparing the Data It’s important to remember that AI systems need specific instructions to start detecting patterns. We’ll need to take our raw data and indicate which pictures are positive for skin cancer and which aren’t. This process is called labeling and has to be done by humans. Once data is labeled, either “cancer” or “not cancer”, we can use it to train the algorithm in the next step. This data is aptly called training data. Understanding the Algorithm Our goal is “detection of possible skin cancer”, but how does a computer do that? First, we’ll need to break down the image into attributes called features. This could be the presence of certain color pixels, percentage of certain shades, spot perimeter regularity, or other features. Features can be determined by computers or by data scientists who know what kind of features are important. It’s not uncommon for an AI looking at image data to have thousands of features. Because we’ve supplied a bunch of images with labels, AI can look for patterns that are present in cancerous images that are not present in others. As an example, here is a very simple algorithm with one feature (spot perimeter): Calculate the perimeter of a darker spot in the image. If the perimeter of the spot is exactly circular, label the image “not cancer”. If the perimeter of the spot is not circular, label the image “cancer”. Testing the Algorithm After setting up and quantifying the features, we want to make sure the AI is actually doing a good job. We’ll take some images the AI hasn’t seen before, called test data. We know the correct answers, but the AI does not. The AI will measure the features within each of the images to provide an educated guess of the proper label. Every time AI gets a label wrong, it will reassess parts of the algorithm. For example, it might make the tweak below: Calculate the perimeter of a darker spot in the image. If the perimeter of the spot is close to circular, label the image “not cancer”. If the perimeter of the spot is not close to circular, label the image “cancer”. Humans play a big part in what kind of scores are acceptable when producing outputs. With cancer screening, we might be very worried about missing a real instance of cancer. Therefore, we might tell the AI to score false negatives more harshly than false positives. Interfacing with AI Finally, AI would not work without an interface. This is where we can get creative. In our skin cancer screening, we might create a website where providers or patients could upload a picture of an area that needs screening. Because skin images could be considered medical data, we would need to think critically about what happens to images after they are uploaded. Are images deleted after a screening prognosis is made? Will images be used to update the training data? Telling people they might have cancer could be very upsetting for them. Our interface should provide supporting resources and clear disclaimers about its abilities. Understanding the AI Spring The “AI Spring” is the period of rapid growth and progress in artificial intelligence starting in the early 2020s. A huge component of the AI Spring is Generative AI, which includes text generation, image creation, natural speech generation, computer code production, biological molecule discovery, and more. In the example above, the AI learns to distinguish between skin conditions based on features and patterns it identifies. Its main goal is to make decisions about someone’s skin condition rather than generating new examples. This is called discriminative AI. Other examples of discriminative AI include: Classifying emails as spam Facial recognition Converting speech-to-text However, let’s imagine we wanted AI to generate examples of skin cancer. If the AI was creating new, realistic images of skin cancer, trying to generate what cancerous lesions might look like, it would be considered generative AI. Examples of generative AI include: Text generated by a chat bot Images created from a text prompt Human sounding voices from an audio clip Generative AI: Creates new, creative things that look like what it has learned. Discriminative AI: Tells things apart or makes decisions based on what it has learned. We’ll talk next about some generative AI models which have made recent breakthroughs possible. Transformer Models Transformers have been especially helpful for text generation. They work like smart readers that can understand context and relationships in language very well. Imagine you’re reading a sentence, and at each word, you want to pay attention to other words to understand the context better. The self-attention mechanism does this very efficiently. It allows the model to focus on different parts of the input (like words in a sentence) simultaneously, capturing long-range dependencies. The model then uses this training to generate new text. Take for example this paragraph from the Wikipedia entry for skin cancer. A transformer model would be able to synthesize the information to understand the relationship between UV exposure, risk factors, and the development of different types of skin cancers for different groups of individuals. It can easily distill the information into themes and topics. More than 90% of cases are caused by exposure to ultraviolet radiation from the Sun.[4] This exposure increases the risk of all three main types of skin cancer.[4] Exposure has increased, partly due to a thinner ozone layer. Tanning beds are another common source of ultraviolet radiation. For melanomas and basal-cell cancers, exposure during childhood is particularly harmful. For squamous-cell skin cancers, total exposure, irrespective of when it occurs, is more important. Between 20% and 30% of melanomas develop from moles.[6] People with lighter skin are at higher risk as are those with poor immune function such as from medications or HIV/AIDS. Diagnosis is by biopsy. Diffusion Models Like transformers, diffusion models are useful for generative AI, particularly image generation. The key to diffusion models is that they have a lot of training in how to fill in the blanks. The model starts with many “noisy” images (imagine a photo with lots of holes or black spots) and tries to reproduce the original image. This process is called “denoising score matching”. It then uses this training to generate entirely new content. Summary In our skin cancer detection example, an AI system required lots of data labeled with information (“cancer” or “not cancer”). An algorithm looked for patterns between these two groups and then provided the results via an interface. This AI is an example of discriminative AI. Since the early 2020s, generative AI has exploded in popularity, assisted by transformer and diffusion models, among other advancements. These technologies have allowed AI to excel at creating new content, by recognizing deeper context and patterns. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-how-ai-works.html", "DISCUSSION How AI Works", " DISCUSSION How AI Works Compare and contrast discriminative vs generative AI. When might each approach be most useful? What are some benefits and limitations of each? Early detection of diseases like skin cancer using AI could help save lives. What challenges or limitations might exist in real-world applications of such a system? How could the interface be designed with care, transparency and privacy in mind? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["demystifying-types-of-ai.html", "Demystifying Types of AI Machine Learning Generative AI Natural Language Processing Strengths and Weaknesses Summary", " Demystifying Types of AI We’ve learned a bit about how AI works. However there are many different types of AI with different combinations of data, algorithms, and interfaces. There are also general terms that are important to know. Let’s explore some of these below. Machine Learning Machine learning is broad concept describing how computers learn from looking at lots of examples. Imagine you are learning to tell the difference between apples and oranges. Someone first has to show you examples and say, “This is an apple, and this is an orange.” Similarly, machine learning approaches need examples of input data that is “labeled” with the correct output. The goal of machine learning is making useful or accurate predictions. Machine learning includes simpler approaches like regression, and more complicated approaches like deep learning. Below are a few examples of machine learning methods. Neural Networks Neural networks are a specific class of algorithms within machine learning. Neural networks mimic the way data is transferred between neurons in the brain. Neural networks organize data into layers, starting with an “input layer” of raw data. Data is then transferred to the next layer, called a “hidden” layer. The hidden layer combines the raw data in many ways to create levels of abstraction. You can think of an image that is very pixelated becoming more clear. Finally, results are produced in an “output layer”. Neural networks often require large amounts of labeled data for training, and their performance may continue to improve with more data. Google uses a neural network to power its search algorithm (AI Team 2023). Neural networks also do a pretty good job of recognizing human handwritten digits. Deep Learning Deep learning refers to neural networks with multiple intermediate “hidden” layers. A neural network with 2+ hidden layers could be considered a deep learning system (AI Team 2023). The advantage of deep learning is that these approaches cluster data automatically, and can detect abstraction or patterns that we might not know ahead of time. This is especially useful for complicated data, like unstructured text or images. Google Translate has used deep learning to accurately translate text since 2016 (Turner 2016). However, generative AI methods started being incorporated in the 2020s (Gu 2023). Many of the machine learning approaches we’re discussing here are supervised learning approaches. This means that data is labeled in predefined categories. An example could be “spam” or “not spam” labels attached to a data set of emails. Sometimes, we are more interested in discovering variation, regardless of how we describe, or label, the data. This is called unsupervised learning. An example of this approach could be clustering human cells based on what kind of genes they have turned on. We don’t know what type of cells they are necessarily, but can group them based on their behavior. Generative AI Artificial Intelligence exploded in the early 2020s due to advancements in Generative AI, which includes text generation, image creation, natural speech generation, computer code production, biological molecule discovery, simulated data, and more. Let’s break down some of the following terms related to generative AI. Transformer Models and Architecture In 2017, Google engineers published a paper, “Attention is all you need”, describing a type of neural network they called a transformer (Vaswani et al. 2017). Transformer architecture has revolutionized the field of natural language processing and led to an explosion in what was possible with AI. Transformers are a key feature of what drives generative AI models today, and have allowed huge leaps forward in language understanding and image processing (Tay et al. 2022). The transformer architecture uses something called self-attention to figure out how important different parts of a sentence are when making predictions. This helps the model understand how words relate to each other in a sentence, regardless of their order in the sentence. Do we say transformer model or transformer architecture? Transformer architecture refers to the overall design, or “transformers” generally speaking. We use the term “transformer model” when dealing with a specific example, such as the GPT (Generative Pre-trained Transformer) model. Large Language Model Large Language Models (LLMs) are a specific type of generative AI model, often built using the transformer architecture, that leverage a huge volume of language data. Examples include models like OpenAI’s GPT (Generative Pre-trained Transformer) series. LLMs are trained on extensive text datasets and can generate coherent and contextually relevant text passages. You might be very familiar with LLMs, as they include super popular tools like ChatGPT, Bard, Claude Instant, and Llama. The process of interpreting a user prompt for a GPT model might go as follows: A user provides a prompt, such as “Describe a nice vacation for winter time.” The encoder translates words into machine-relevant values such as numerical vectors. It also captures semantic relationships. The transformer weighs different parts of the input for better understanding. The hidden layers of the neural network further decipher complex patterns and representations. Decoders generate the output that the user sees. Diffusion Model Diffusion models are a type of deep generative model. They are particularly powerful when it comes to image generation, but can also be used for other generative AI applications, like video generation and molecule design (Yang et al. 2023). The approach behind diffusion models is that they add more and more random noise to images (the “diffusion” process). Noise is then removed to generate the most “likely” novel outputs. The key feature of these models is the denoising process. A very popular diffusion model is used by Stable Diffusion for real-time text-to-image generation. Variational Autoencoders (VAEs) Variational autoencoders are a type of deep generative model. Variational autoencoders emerged slightly earlier than diffusion models (Kingma and Welling 2013). Like diffusion models, they work with data that is noisy and not perfect. Variational autoencoders are trained and generate outputs differently, however. They detect essential features or patterns within inputs and condense them in a more concise and abstract form. This differs from diffusion models, which focus on the process of cleaning up noisy data to make it look like new images, text, etc. Generative Adversarial Networks (GANs) Generative adversarial networks are a type of deep generative model. While the end goals are similar (new generated content), GANs differ in their training and objective. Generative Adversarial Networks work like two computers competing with each other. The first component, the “generator”, creates data, while the second, the “discriminator”, determines if the sample is realistic. Imagine two professionals, one artist specializing in artwork forgery, and one a detective specializing in forgery detection. If they are constantly competing, they will both get better at their respective specialty! The website This Person Does Not Exist (https://thispersondoesnotexist.com/) creates photorealistic headshots of imaginary people using a GAN called StyleGAN2 (Karras et al. 2020). Natural Language Processing Natural language processing, or NLP, deals with interpreting text and extracting relevant information and insights. It is a field of study rather than a type of algorithm. Typically, these systems look at huge volumes of text data to understand the relationship among words, parts of words, or sentences. Natural language processing can also categorize and organize the documents themselves. For example, NLP could help read the contents of documents online and decide whether they are patents or journal articles. These documents could then be indexed in Google Scholar. Initially, NLP was accelerated by techniques such as word vectorization (ODSC 2023). In short, this makes it easier for computers to understand that the words “apple” and “orange” (both fruits) are more closely related than “apple” and “planet” (perhaps both round, but that’s less important). Many NLP approaches also use deep learning (Wikipedia 2023b). Increasingly, generative AI is part of natural language processing (ODSC 2023). Natural language processing has been used to summarize the abundance of text information available in electronic health records. For example, healthcare practitioners showed that detecting evidence and information in records could improve treatment and quality of care for patients with diabetes (Turchin and Florez Builes 2021). Strengths and Weaknesses Here is a summary of some strengths and weaknesses of different concepts in AI. These are handy to keep in mind as you are making decisions about what kind of AI to use in your workplace. Strengths Challenges Machine Learning Makes accurate predictions based on learning from labeled examples; includes a large variety of approaches, including computationally cheap ones Extensive volumes of labeled data might be needed Neural Networks Great for recognizing intricate patterns in data; automatically discovers important features in data Require large datasets; can be computationally intensive Deep Learning Captures complex representations of data, enhancing performance in tasks like image and speech recognition Require large datasets; computationally intensive; difficult for humans to interpret reasoning behind outputs (“black box”), which raises ethical concerns Transformer and Large Language Models Self-attention mechanism enables understanding of context efficiently, pivotal for language understanding and generation Very computationally intensive; difficult for humans to interpret reasoning behind outputs (“black box”) Diffusion Models Great for image generation due to the denoising score matching approach. Very computationally intensive; might only work well for a specific task (e.g., image generation) Summary Neural Networks and Deep Neural Network Learning are both key components of today’s AI. They function like human brains for advanced pattern recognition. Deep Neural Networks are a critical component of generative AI. Transformer architecture is central to many of today’s Large Language Models and allows for rapid processing of context in text. Diffusion models adjust noise to generate new content, such as images. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-ai-types.html", "DISCUSSION AI Types", " DISCUSSION AI Types Discuss some of the strengths and weaknesses of different AI techniques covered in the chapter. When might certain approaches be preferable over others given the available data or task? Reflect on the different definitions and terminology covered regarding AI types. What stood out to you? What questions do you still have? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["what-ai-makes-possible.html", "What AI Makes Possible Advancements in Text Mining Modifying and Generating Text Automating Tedious Processes Idea Generation Planning and Organizing Synthetic Data Generation Text to Speech to Text Interactive Help", " What AI Makes Possible Artificial Intelligence is opening up many possible pathways in many different fields. It has allowed: Advancements in text mining More accurate text modification and generation Automation of tedious tasks Idea generation Planning and organizing Synthetic data generation Text-to-speech and back Interactive help and debugging Let’s explore several broad ways in which AI can be used today. Advancements in Text Mining Text mining is the process of extracting meaningful insights, patterns, and knowledge from unstructured textual data. This data could include articles, documents, emails, social media posts, business records, policy records, and more. This data is digested by a computer into a structured format for analysis, allowing for the discovery of hidden patterns, relationships, and / or summary information. Historically, mining for relevant text had to follow rule-based or statistical methods that required a lot of human oversight. Generative AI has led to many advancements in text mining. Some of these include: Contextual Understanding: Generative AI, especially transformer models, has improved contextual understanding in text mining. AI can consider relationships between words in a sentence more effectively using the self-attention mechanisms available as part of transformers. This results in more accurate extraction of context-dependent information. Text Completion and Generation: Generative AI allows for the completion of partial or missing text. In text mining, this capability is useful for handling incomplete or noisy data, improving the overall quality of mined information. Domain-Specific Language Generation: Generative AI can be fine-tuned for domain-specific language generation. This is particularly beneficial in industries where specialized terminology or jargon is prevalent. By training generative models on domain-specific data, text mining models can better adapt to the nuances of the industry or discipline in general. Examples Text mining can be used for: Mining clinical patient notes to identify patients with similar symptoms Mining extensive technical or financial documents to identify relevant sections more easily Extracting Population Statistics A simplified example of text mining can be seen here with ChatGPT. We will provide some information from the Wikipedia article for the city of Seattle and ask the large language model to extract only relevant statistics related to population growth. Can you extract relevant statistics from this Wikipedia passage related to population growth? Seattle is a seaport city on the West Coast of the United States. It is the seat of King County, Washington. With a 2022 population of 749,256[10] it is the most populous city in both the state of Washington and the Pacific Northwest region of North America, and the 18th most populous city in the United States. The Seattle metropolitan area’s population is 4.02 million, making it the 15th-largest in the United States.[11] Its growth rate of 21.1% between 2010 and 2020 made it one of the country’s fastest-growing large cities.[12] Seattle is situated on an isthmus between Puget Sound (an inlet of the Pacific Ocean) and Lake Washington. It is the northernmost major city in the United States, located about 100 miles (160 km) south of the Canadian border. A major gateway for trade with East Asia, the Port of Seattle is the fourth-largest port in North America in terms of container handling as of 2021.[13] The Seattle area has been inhabited by Native Americans (such as the Duwamish, who had at least 17 villages around Elliot Bay) for at least 4,000 years before the first permanent European settlers.[14] Arthur A. Denny and his group of travelers, subsequently known as the Denny Party, arrived from Illinois via Portland, Oregon, on the schooner Exact at Alki Point on November 13, 1851.[15] The settlement was moved to the eastern shore of Elliott Bay in 1852 and named “Seattle” in honor of Chief Seattle, a prominent 19th-century leader of the local Duwamish and Suquamish tribes. Seattle currently has high populations of Native Americans alongside Americans with strong Asian, African, European, and Scandinavian ancestry, and hosts the fifth-largest LGBT community in the U.S.[16] Here is the response: Certainly! Here are the relevant statistics extracted from the provided passage related to population growth: Population Growth Rate (2010-2020): The growth rate of Seattle between 2010 and 2020 was 21.1%, making it one of the country’s fastest-growing large cities.[12] Current Population (2022): The population of Seattle in 2022 is reported to be 749,256.[10] Metropolitan Area Population: The population of the Seattle metropolitan area is 4.02 million, making it the 15th-largest in the United States.[11] These statistics highlight the significant population growth of Seattle over the specified time period, positioning it as one of the fastest-growing large cities in the United States. In reality, you might be providing a much larger volume of data or be looking for more nuanced information. Modifying and Generating Text Generative AI in the form of Large language Models like ChatGPT and Claude can be especially useful for modifying and generating text. This could mean changing the tone of a piece of writing, creating summaries based on text data input, or making writing more understandable to different audiences. Examples Here are some examples of using AI to modify or generate text: Altering an email to be more empathetic Translating text into another language, otherwise known as machine translation Summarizing multiple files within a project to make a project summary - For example, a README file across many code scripts for software development Summarizing legal documents in a way that is readable to broader audiences with less familiarity with legal jargon Transforming an Angry Email Let’s see this in practice. Take for example this email to a colleague that is really frustrating you right now. You might ask Bard to help you make it more constructive and empathetic. Can you help me transform this email into a more understanding and constructive one? Subject: RE: Proposal Revisions (ARE YOU KIDDING ME?!) Hi Ben, I just read your revisions to the proposal and frankly, it’s an insult to my intelligence and hard work. Did you even bother to skim the original document? Half your changes are nonsensical and the other half seem like you were just trying to sound fancy. This project is riding on my expertise. If you want your name anywhere near this proposal, you’d better come up with something actually useful, not just reword my sentences. And don’t even get me started on the formatting… I expect a complete rewrite by tomorrow morning, and this time, make sure it’s actually worthy of presenting to the client. Don’t waste my time or theirs. Sincerely (not really), Alex Here is the response: Subject: Proposal Revisions - Discussion Needed Hi Ben, Thanks for sending over your revisions to the proposal. I appreciate your taking the time to add your perspective. I do see some areas where our ideas diverge, and I think it would be helpful to discuss them further to ensure we present a unified front to the client. For example, I noticed your revisions to [mention specific section(s)]. While I understand your point about [mention specific reason behind Ben’s revision], I was originally thinking of [explain your rationale briefly]. Similarly, there are a few formatting choices in [mention specific sections] that might benefit from further refinement. Perhaps we could brainstorm some ways to improve visual clarity while maintaining consistency with the client’s branding guidelines. Would you be free for a quick chat sometime this afternoon to discuss these points and find a solution that incorporates both our strengths? I believe that collaborating on this will lead to a stronger proposal overall. Best, Alex We all have our moments of frustration, but AI can help us be better colleagues. By asking Bard to transform this email, we can prevent an uncomfortable and hurtful work situation. Automating Tedious Processes Generative AI can help tackle repetitive processes. You can think of it like an automation assistant. It can be especially useful for automating work that requires conversion of content from one form to another, for example text-to-speech, image-to-text, and so on. Examples Here are some examples of using AI to automate tasks: Writing a draft of code annotations to help you annotate code Generating meeting notes automatically Getting suggestions for ways to deidentify clinical data Generating audio from text for a video voiceover Converting physician dictations from a patient visit to text format Meeting Note Takers No one likes to attend a meeting, only to forget what was being discussed. Fortunately, many AI note takers have entered our world recently. The online meeting platform Zoom has an “AI assistant” that can summarize meetings (Zoom 2023). Companies like fireflies.ai and Otter.ai also offer similar functionality, recording meeting notes, offering summaries, capturing action items, and more. Idea Generation Generative AI can provide templates or scaffolds. In other words, AI can get the creative juices flowing and make it easier to start new content. AI can also help synthesize data to suggest possible new relationships. Diabetes Gene Hypotheses The Diabetes Data and Hypothesis Hub (D2H2, https://d2h2.maayanlab.cloud/) was created to make it easier for biomedical researchers to access and use data related to diabetes (Marino et al. 2023). The Hub incorporated a GPT-style chatbot that allows users to ask questions in plain terms about their data analysis needs. D2H2 also has the ability to generate hypotheses. It randomly selects gene sets related to existing gene activation signatures and compares them with gene sets found in published articles. The AI provides ideas about possible explanations for the similarities between these gene sets. Planning and Organizing You can use generative AI to suggest and break down ways to accomplish a task based on prior task planning. In other words, it can generate a template for events or projects. For example, it can: Plan the agenda for a conference or meeting Plan and assign roles for a new project task list. Meeting Agendas Let’s imagine we are in a leadership role at a research institution. Soon, the institution will be unveiling a new genomic research coordinating center, but we need to make sure the whole executive suite is on board. Let’s see how a large language model might give us a starting point for the meeting agenda: Can you provide a sample agenda for an executive meeting, where the goal is to reveal a new genomic research coordinating center at our institution? Here is the response: Executive Meeting Agenda - Unveiling the New Genomic Research Coordinating Center Meeting Date: [Date] Meeting Time: [Time] Meeting Location: [Location] Attendees: [List executive team members and other key stakeholders] Introduction & Welcome (5 minutes) Welcome by [Host/Chair] Brief overview of meeting goals and agenda Genomic Research Landscape & Challenges (10 minutes) Presentation by [Speaker]: State of the art in genomic research Current challenges in coordination and collaboration Economic and societal impact of advancements in genomics Introducing the New Genomic Research Coordinating Center (20 minutes) Presentation by [Center Director]: Center’s mission, vision, and core values Key services and capabilities offered by the center How the center addresses existing challenges and fosters collaboration Examples of ongoing or planned research initiatives Center Operations & Budget (15 minutes) Presentation by [Center Administrator]: Funding sources and budget allocation Staffing plan and key personnel introductions Technology infrastructure and resources Partnerships and collaborations with external institutions Open Discussion & Q&A (20 minutes) Encourage executives to ask questions and seek clarification Address any concerns or potential roadblocks Gather feedback and suggestions for the center’s future Next Steps & Action Items (10 minutes) Summarize key takeaways and action items Assign clear responsibilities and timelines for next steps Outline communication plan for broader institutional announcement Closing Remarks (5 minutes) Express gratitude for executive support and engagement Reiterate the center’s potential impact and contributions Positive outlook on the future of genomic research at the institution Optional Items - Depending on time constraints, you may consider adding a brief tour of the center facilities or including short demos of key technologies. - Prepare handouts with key information about the center, including contact details and resources. - Consider a light breakfast or refreshments to encourage engagement and networking. Remember to adjust the agenda based on your specific needs and priorities. The key is to ensure a clear, concise, and informative presentation of the new center, while also addressing any potential concerns and fostering enthusiasm among the executive team. I hope this sample agenda helps you prepare for a successful meeting! The above gives us a good place to start, so we aren’t staring at a blank page. Of course, it’s important to tailor responses to meet your needs. However, this could get us started, ensuring that we use time in the meeting effectively. As long as the information is not sensitive or proprietary, you could also give the large language model more context. This will help it tailor the response better. Synthetic Data Generation For more technical needs, AI can help generate synthetic data to experiment with. For example, it can help with sparse data, propose/test models for data that does not yet exist, or generate toy data for development and testing. We might be interested in who drinks the most coffee in the office, but we don’t have data yet. Before interviewing all of our colleagues, we might want to test that we can actually do the analysis. Can you create a toy time series dataset where we track individuals, time points, and coffee consumption? Here is the response: Here is a toy time series dataset tracking individuals, time points, and coffee consumption: ID Time_point Coffee_cups 1 1 2 1 2 3 1 3 1 2 1 4 2 2 2 2 3 3 3 1 1 3 2 0 3 3 2 This tracks 3 individuals over 3 time points (days) and their daily coffee consumption in cups. Individual 1 drank 2 cups on day 1, 3 cups on day 2, and 1 cup on day 3. Individual 2 drank 4 cups on day 1, 2 cups on day 2, and 3 cups on day 3. Individual 3 drank 1 cup on day 1, 0 cups on day 2, and 2 cups on day 3. Text to Speech to Text Generative AI is great at converting text to realistic sounding speech. It can also listen to you dictate and convert that speech into text. Many of the AI tools that do this are commercial and cost money. For example, companies like Eleven Labs, Natural Reader, and Speechify will read text to you, but typically cost money to use. Both Text-to-Speech, and Speech-to-Text models benefit from specific training. This is because there are likely to be acronyms and jargon specific to your industry or institution. Interactive Help Finally, we think AI is a great tool for interactive help and debugging, especially for programming. If you want to learn more about how this works, check out our other course AI for Efficient Programming on Coursera and on the web. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["discussion-ai-possibilities.html", "DISCUSSION AI Possibilities", " DISCUSSION AI Possibilities How could the various applications of AI discussed help improve life in your industry/field of work or personal life? Are there any specific problems they could help address? Do you see any risks or downsides to organizations and individuals becoming overly reliant on automated tools and AI assistants? How should we strive for a healthy human-AI relationship? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["ground-rules-for-ai.html", "Ground Rules for AI", " Ground Rules for AI The rapidly changing AI landscape has brought unexpected ethical challenges. To promote benefit over harm, we suggest following these AI use and development guidelines: Recognize guidelines. Today, there are some guidelines for ethical use. More will be developed. It is advised that you stay up-to-date on industry-specific guidelines. Consider consequences. Think about possible downstream unintended consequences for using AI. This could be in the context of creating content or developing new AI tools. Acknowledge shortcomings. AI is not perfect. It makes mistakes, is not necessarily superior to humans, and should be used as intended and trained. It is also only as up-to-date as its training data. While humans are very good at generalizing knowledge for different contexts, AI systems can sometimes struggle with this. Human oversight is needed for important and consequential uses. Understand bias. Realize that AI often perpetuates bias. AI is created using data generated by humans, and that data can be biased. It is important to use inclusive datasets and seek expert advice. Promote access. Promote equitable access to AI. Differences in access could worsen existing disparities, or create new ones. Think securely. AI poses security and privacy threats. AI needs to be used and developed carefully with these aspects in mind. Do not use proprietary or private information as prompts for consumer AI tools unless it was specifically designed for private data. Understand costs. AI could exacerbate global climate change and human welfare disparities. Developers should be considerate about their computation needs and not use larger than necessary datasets. Workers who label and curate datasets should be compensated appropriately. Be transparent. Users should be transparent about their use of AI tools. It makes it easier to locate the source of issues. It also helps to uplift human contributions to work and art. Credit sources. When developing tools, be transparent about what data you used to create your AI systems. Be careful not to use work or data from individuals who did not consent to it being used in such ways. Work thoughtfully. Ramp up AI projects gradually to identify unexpected behaviors or impacts before full deployment. Starting slowly enables recognition and resolution of issues. Acknowledge complexity. Recognize that if AI systems use overly complex models, it can be difficult to trace how decisions are made using them. Diversify usage. Check the consistency of results using multiple AI tools and timepoints, where possible. Keep learning. Educate yourself and others. To comply with ethical standards, users must be educated about best use practices. If you help set standards for an institution or group, it strongly advised that you carefully consider how to educate individuals about those standards of use. To learn more about how to responsibly use and develop AI, check out the following minicourse about Avoiding AI harm. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["ai-possibilities-case-studies.html", "AI Possibilities Case Studies Financial Forecasting", " AI Possibilities Case Studies The following are case studies that can help us conceptualize AI in the real world. Financial Forecasting In this case study, we will look at how artificial intelligence has been utilized in governmental financial services. National banks, such as the Federal Reserve of United States and the European Central bank of the European Union, have started to explore how Artificial Intelligence can be used for data mining and economic forecast prediction. There are many uses of AI for improving financial institutions, each with potential benefits and risks. Most financial institutions weigh the benefits and risks carefully before implementation. For instance, if a financial institution takes a high-risk prediction seriously, such as predicting a financial crisis or a large recession, then it would have huge impact on a bank’s policy and allows the bank to act early. However, many financial institutions are hesitant to take action based on artificial intelligence predictions because the prediction is for a high-risk situation. If the prediction is not accurate then there can be severe consequences. Additionally, data on rare events such as financial crises are not abundant, so researchers worry that there is not enough data to train accurate models (Nelson 2023). Many banks prefer to pilot AI for low-risk, repeated predictions, in which the events are common and there is a lot of data to train the model on. Let’s look at a few examples that illustrate the potential benefits and risks of artificial intelligence for improving financial institutions. Categorizing Businesses An important task in analysis of economic data is to classify business by institutional sector. For instance, given 10 million legal entities in the European Union, they need to be classified by financial sector to conduct downstream analysis. In the past, classifying legal entities was curated by expert knowledge (Moufakkir 2023). Text-based analysis and machine learning classifiers, which are all considered AI models, help reduce this manual curation time. An AI model would extract important keywords and classify into an appropriate financial sector, such as “non-profits”, “small business”, or “government”. This would be a low-risk use of AI, as one could easily validate the result to the true financial sector. Incorporating new predictors for forecasting Banks are considering expanding upon existing traditional economic models to bring in a wider data sources, such as pulling in social media feeds as an indicator of public sentiment. The National bank of France has started to use social media information to estimate the public perception of inflation. The Malaysian national bank has started to incorporate new articles into its financial model of gross domestic product estimation. However, the use of these new data sources may may raise questions about government oversight of social media and public domain information (OMFIF 2023). Using Large Language Models to predict inflation The US Federal Reserve has researched the idea of using pre-trained large language models from Google to make inflation predictions. Usually, inflation is predicted from the Survey of Professional Forecasters, which pools forecasts from a range of financial forecasts and experts. When compared to the true inflation rate, the researchers found that the large language models performed slightly better than the Survey of Professional Forecasters (Federal Reserve Bank of St. Louis 2023). A concern of using pre-trained large language models is that the data sources used for model training are not known, so the financial institution may be using data that is not in line with its policy. Also, a potential risk of using large language models that perform similarly is the convergence of predictions. If large language models make very similar predictions, banks would act similarly and make similar policies, which may lead to financial instability (OMFIF 2023). Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["introduction-to-avoiding-ai-harm.html", "Introduction to Avoiding AI Harm Motivation Target Audience Curriculum Learning Objectives", " Introduction to Avoiding AI Harm This course aims to help you recognize some of the potential consequences of using or developing AI tools. Some of this content was adapted from our course on AI for Efficient Programming. If you intend to use AI for writing code, we recommend that you review this content for a deeper dive into ethics specifically for writing code with generative AI. Motivation The use of artificial intelligence (AI) and in particular, generative AI, has raised a number of ethical concerns. We will highlight several current concerns, however please be aware that this is a dynamic field and the possible implications of this technology is continuing to develop. It is critical that society continue to evaluate and predict what the consequences of the use of AI will be, so that we can try to mitigate harmful effects. Target Audience This course is intended for leaders who might make decisions about AI at nonprofits, in industry, or academia. They may have an interest to use or develop AI tools. Curriculum This course provides a brief introduction about ethical concepts to be aware of when making decisions about AI, as well as real-world examples of situations that involved ethical challenges. The course is largely focused on generative AI considerations, although some of the content will also be applicable to other types of AI applications. The course will cover: Possible societal impacts of AI Guidelines for using AI training and testing data (optional) Concerns to be aware of for AI algorithms Strategies to adhere to AI codes of ethics Concepts for consent with AI IDARE principles (Inclusion, Diversity, Anti-Racism, and Equity) with AI A proposed process for ethical AI use and development Learning Objectives We will demonstrate how to: Describe key ethical concerns for using AI tools Discuss why human evaluation and monitoring is important and necessary Explain why AI should be thought of as a better computer, not a human replacement Discuss the potential benefits of being transparent about the use of AI tools Recognize real-world examples of AI usage that has resulted in ethical debate Identify possible mitigation strategies for major ethical concerns with regard to the algorithms underlying AI tools Describe practices that can help you to adhere to more responsible AI use and development Identify concepts and strategies for promoting social justice in AI use and development Discuss nuances involved with consent in the use of AI Describe a possible process for reflecting on ethical AI use and development Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["societal-impact.html", "Societal Impact Guidelines for Responsible Development and Use of AI. Major Ethical Considerations Intentional and Inadvertent Harm Replacing Humans Inappropriate Use and Lack of Oversight Bias Perpetuation and Disparities Security and Privacy Issues Climate Impact Tips for reducing climate impact Transparency Summary", " Societal Impact There is the potential for AI to dramatically influence society. It is our responsibility to proactively think about what uses and impacts we consider to be useful and appropriate and those we consider harmful and inappropriate. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Guidelines for Responsible Development and Use of AI. There are currently several guidelines for the responsible use and development of AI: United States Blueprint for an AI Bill of Rights United States Executive Order on the Safe, Secure, and Trustworthy Development and Use of Artificial Intelligence United States National Institute of Standards and Technology (NIST): AI Risk Management Framework European Commission Ethics Guidelines for trustworthy AI European Union AI Act United Kingdom National AI Strategy The Institute of Electrical and Electronics Engineers (IEEE) Ethically Aligned Design Version 2 As this is an emerging technology, more guidelines will be developed and updated as the technology evolves. When you read this, more guideline and updates are likely to be available. It is important to be aware of the current ethical guidelines and regulations for your respective field. Major Ethical Considerations In this chapter we will discuss the some of the major ethical considerations in terms of possible societal consequences for the use or development of AI tools: Intentional and Inadvertent Harm - Data and technology intended to serve one purpose may be reused by others for unintended purposes. How do we prevent intentional harm? Replacing Humans - AI tools can help humans, but they are not a replacement. Humans are still much better at generalizing their knowledge to other contexts (Sinz et al. (2019)). Also studies suggests that humans value content and objects created by humans more than that of AI when it relates to abstract thought or unique work (Bellaiche et al. (2023), Granulo, Fuchs, and Puntoni (2021)). Inappropriate Use and Lack of Oversight - There are situations in which using AI might not be appropriate now or in the future. A lack of human monitoring and oversight can result in harm. Bias Perpetuation and Disparities - AI models are built on data and code that were created by biased humans, thus bias can be further perpetuated by using AI tools. In some cases bias can even be exaggerated. This combined with differences in access may exacerbate disparities. Security and Privacy Issues - Data for AI systems should be collected in an ethical manner that is mindful of the rights of the individuals the data comes from. Data around usage of those tools should also be collected in an ethical manner. Commercial tool usage with proprietary or private data, code, text, images or other files may result in leaked data not only to the developers of the commercial tool, but potentially also to other users. Climate Impact - As we continue to use more and more data and computing power, we need to be ever more mindful of how we generate the electricity to store and perform our computations. Transparency - Being transparent about what AI tools you use where possible, helps others to better understand how you made decisions or created any content that was derived by AI, as well as the possible sources that the AI tools might have used when helping you. It may also help with future unknown issues related to the use of these tools. Keep in mind that some fields, organizations, and societies have guidelines or requirements for using AI, like for example the policy for the use of large language models for the International Society for Computational Biology. Be aware of the requirements/guidelines for your field. Note that this is an incomplete list; additional ethical concerns will become apparent as we continue to use these new technologies. We highly suggest that users of these tools be careful to learn more about the specific tools they are interested in and to be transparent about the use of these tools, so that as new ethical issues emerge, we will be better prepared to understand the implications. Intentional and Inadvertent Harm AI tools need to be developed with safeguards and continually audited to ensure that the AI system is not responsive to harmful requests by users. With additional usage and updates, AI tools can adapt and thus continual auditing is required. Of course using AI to help you perform a harmful action would result in intentional harm. This may sound like an obvious and easy issue to avoid, at least by those with good intent. However, the consequences may be much further reaching than might be first anticipated. Perhaps you or your company develop an AI tool that helps to identify individuals that might especially benefit from a product or service that you offer. This in and of itself is likely not harmful. However, the data you have used, the data that you may have collected, and the tool that you have created, all could be used for other malicious reasons, such as targeting specific groups of people for advertisements when they are vulnerable. Therefore it is critical that we be considerate of the downstream consequences of what we create and what might happen if that technology or data was used for other purposes. Tips for avoiding inadvertent harm For decision makers about AI use: Consider how the content or decisions generated by an AI tool might be used by others. Continually audit how AI tools that you are using are preforming. Do not implement changes to systems or make important decisions using AI tools without AI oversight. For decision makers about AI development: Consider newly developed AI tools might be used by others. Continually audit AI tools to look for unexpected and potentially harmful or biased behavior. Be transparent with users about the limitations of the tool and the data used to train the tool. Caution potential users about any potential negative consequences of use Replacing Humans While AI systems are useful, they do not replace human strengths. While AI systems are good at synthesizing lots of data, humans remain far superior at generalizing concepts to new contexts (Sinz et al. (2019)). AI systems should be thought of as better computers as opposed to replacements for humans. While there are some contexts in which human labor has already been replaced by robotics and AI, studies show that humans tend to prefer human-made goods when those goods are not strictly functional (Bellaiche et al. (2023), Granulo, Fuchs, and Puntoni (2021)). It has been proposed that there will be radical shifts in the way that humans work in many fields including health care, banking, retail, security, and more (Selenko et al. (2022)). Yet we need to implement changes gradually to allow for time to better understand the consequences and mindfully consider how such changes impact human employment and well-being. Selenko et al. (2022) have proposed a framework for considering the impact of AI usage on human workers to promote benefit and avoid harm. It suggests considering usage in a few different ways: AI for complementing work, AI for replacing tasks, and AI for generating new tasks. It suggests considering how such usages might reduce tedious or dangerous work, while also preserving work-related benefits such as self-esteem, belonging, and perceived meaningfulness. See here for the article. Example 1 AI might become much more prominent in the field of journalism and may help deliver more rapidly, deliver news from dangerous locations, and possibly even create content less biased politically or otherwise if the models are specifically trained to be objective (Latar (2015)). Yet, larger usage of AI in journalism also poses additional risks of misinformation, infiltration by outsiders, and a lack of human values if the usage lacks appropriate and sufficient human oversight. “robot journalist story writers will have instant access to new insights and information, and their new ability to compose the story and publish it in seconds may cause human journalists to become obsolete. This is alarming, as no robot journalists can replace human journalists as the guardians of democracy and human rights.” (Latar (2015)) “This potential threat to the profession of human journalism is viewed by some optimistic journalists merely as another tool that will free them of the necessity to conduct costly and, at times, dangerous investigations. The robot journalists will provide them, so the optimists hope, with an automated draft for a story that they will edit and enrich with their in-depth analysis, their perspectives and their narrative talents. The more pessimistic journalists view the new robot journalists as a real threat to their livelihood and style of working and living. Computer science is a field that has historically lacked diversity. It is also critical that we support diverse new learners of computer science, as we will continue to need human involvement in the development and use of AI tools. This can help to ensure that more diverse perspectives are accounted for in our understanding of how these tools should be used responsibly. Tips for supporting human contributions For decision makers about AI use: Avoid thinking that content by AI tools must be better than that created by humans, as this is not true (Sinz et al. (2019)). Recall that humans wrote the code to create these AI tools and that the data used to train these AI tools also came from humans. Many of the large commercial AI tools were trained on websites and other content from the internet. Be transparent where possible about when you do or do not use AI tools, give credit to the humans involved as much as possible. Make decisions about using AI tools based on ethical frameworks in terms of considering the impact on human workers. For decision makers about AI development: Be transparent about the data used to generate tools as much as possible and provide information about what humans may have been involved in the creation of the data. Make decisions about creating AI tools based on ethical frameworks in terms of considering the impact on human workers. A new term in the medical field called AI paternalism describes the concept that doctors (and others) may trust AI over their own judgment or the experiences of the patients they treat. This has already been shown to be a problem with earlier AI systems intended to help distinguish patient groups. Not all humans will necessarily fit the expectations of the AI model if it is not very good at predicting edge cases (Hamzelou n.d.). Therefore, in all fields it is important for us to not forget our value as humans in our understanding of the world. Inappropriate Use and Lack of Oversight There are situations in which we may, as a society, not want an automated response. There may even be situations in which we do not want to bias our own human judgment by that of an AI system. There may be other situations where the efficiency of AI may also be considered inappropriate. While many of these topics are still under debate and AI technology continues to improve, we challenge the readers to consider such cases given what is currently possible and what may be possible in the future. Some reasons why AI may not be appropriate for certain situation include: Despite the common misconception that AI systems have clearer judgment than humans, they are in fact typically just as prone to bias and sometimes even exacerbate bias (Pethig and Kroenung (2023)). There are some very mindful researchers working on these issues in specific contexts and making progress where AI may actually improve on human judgment, but generally speaking AI systems are currently typically biased and reflective of human judgment but in a more limited manner based on the context in which they have been trained. AI systems can behave in unexpected ways (Gichoya et al. (2022)). Humans are still better than AI at generalizing what they learn for new contexts (Sinz et al. (2019)). Humans can better understand the consequences of discussions from a humanity standpoint. Some examples where it may be considered inappropriate for AI systems to be used (even with human involvement) include: In the justice system to determine if someone is guilty of a crime or to determine the punishment of someone found guilty of a crime. It may be considered inappropriate for AI systems to be used in certain warfare circumstances. Additionally there are many contexts in which using AI without human intervention could be very problematic including: Diagnosis of disease for patients - Delivering this news should likely come from a human. Secondly, the stakes for errors in the AI system could be very high. What if the system works poorly occasionally for certain individuals? What if the system starts behaving strangely? What if a patient with an unusual situation comes in that the AI system can’t work well for? Even for seemingly benign uses, if humans do not intervene, it is possible that negative consequences could occur if the system starts working poorly or unusually. Example 2 Real-World Example Uber drivers in India experienced issues with the facial recognition technology for logging into the App. This caused many drivers to get locked out of their accounts temporarily or permanently resulting in a reduction in their capacity to work and earn a living (Bansal (2022)). Read more about this in this article. Tips for avoiding inappropriate uses and lack of oversight For decision makers about AI use: Stay up-to-date on current laws, practices, and standards for your field, especially for high-risk uses. Stay up-to-date on the news for how others have experienced their use of AI. Stay involved in discussions about appropriate uses for AI, particularly for policy. Begin using AI slowly and iteratively to allow time to determine the appropriateness of the use. Some issues will only be discovered after some experience. Involve a diverse group of individuals in discussions of intended uses to better account for a variety of perspectives. Seek outside expert opinion whenever you are unsure about your AI use plans. Consider AI alternatives if something doesn’t feel right. For decision makers about AI development: Be transparent with users about the potential risks that usage may cause. Stay up-to-date on current laws, practices, and standards for your field, especially for high-risk uses. Stay up-to-date on the news for how others may have experienced problems using AI. Stay involved in discussions about appropriate uses for AI, particularly for policy. Involve a diverse group of individuals in development to better account for a variety of perspectives. Seek outside expert opinion whenever you are unsure about your AI development plans. Consider AI alternatives if something doesn’t feel right. Design tools with safeguards to stop users from requesting harmful or irresponsible uses. Design tools with responses that may ask users to be more considerate in the usage of the tool. Bias Perpetuation and Disparities One of the biggest concerns is the potential for AI to further perpetuate bias. AI systems are trained on data created by humans. If this data used to train the system is biased (and this includes existing code that may be written in a biased manner), the resulting content from the AI tools could also be biased. This could lead to discrimination, abuse, or neglect for certain groups of people, such as those with certain ethnic or cultural backgrounds, genders, ages, sexuality, capabilities, religions or other group affiliations. It is well known that data and code are often biased (Belenguer 2022). The resulting output of AI tools should be evaluated for bias and modified where needed. Please be aware that because bias is intrinsic, it may be difficult to identify issues. Therefore, people with specialized training to recognize bias should be consulted. It is also vital that evaluations be made throughout the software development process of new AI tools to check for and consider potential perpetuation of bias. Because of differences in access to technology, disparities may be further exacerbated by the usage of AI tools. Consideration and support for under-served populations will be even more necessary. For example tools that only work well on individuals with light skin, will lead to further challenges to some individuals. Developing and scaling-up artificial intelligence-based innovations for use in low- and middle-income countries will thus require deliberate efforts to generate locally representative training data (Paul and Schaefer (2020)). In the flip side, AI has the potential if used wisely, to reduce health inequities by potentially enabling the scaling and access to expertise not yet available in some locations. Tips for avoiding bias For decision makers about AI use: Be aware of the biases in the data that is used to train AI systems. Check what data was used to train the AI tools that you use where possible. Tools that are more transparent are likely more ethically developed. Check if the developers of the AI tools you are using were/are considerate of bias issues in their development where possible. Tools that are more transparent are likely more ethically developed. Consider the possible outcomes of the use of content created by AI tools. Consider if the content could possibly be used in a manner that will result in discrimination. For decision makers about AI development: Check for possible biases within data used to train new AI tools. Are there harmful data values? Examples could include discriminatory and false associations. Are the data adequately inclusive? Examples could include a lack of data about certain ethnic or gender groups or disabled individuals, which could result in code that does not adequately consider these groups, ignores them all together, or makes false associations. Are the data of high enough quality? Examples could include data that is false about certain individuals. Evaluate the code for new AI tools for biases as it is developed. Check if any of the criteria for weighting certain data values over others are rooted in bias. Continually audit the code for potentially biased responses. Potentially seek expert help. Be transparent with users about potential bias risks. Consider the possible outcomes of the use of content created by newly developed AI tools. Consider if the content could possibly be used in a manner that will result in discrimination. See Belenguer (2022) for more guidance. We also encourage you to check out the following video for a classic example of bias in AI: For further details check out this course on Coursera about building fair algorithms. We will also describe more in the next section. Security and Privacy Issues Security and privacy are a major concern for AI usage. Here we discuss a few aspects related to this. Use the right tool for the job There are three kinds of commercial AI tools (Nigro (2023)): Consumer tools (likely not private/secure) Enterprise tools (can be secure with the right legal agreements in place) Open source tools (depends on where you use them and whether you control the computers they run on) Public commercial AI tools are often not designed to protect users from unknowingly submitting prompts that include propriety are private information. Different AI tools have different practices in terms of how they do or do not collect data about the prompts that people submit. They also have different practices in terms of if they reuse information from prompts to other users. Note that the AI system itself may not be trained on responses for how prompt data is collected or not. So asking the AI system may not give accurate answers. Thus if users of public AI tools, such as ChatGPT submit prompts that include propriety or private information, they run the risk of that information being viewable not only by the developers/maintainers of the AI tool used, but also by other users who use that same AI tool. AI can have security blind spots Furthermore, AI tools are not always trained in a way that is particularly conscious of data security. If for example, code is written using these tools by users who are less familiar with coding security concerns, protected data or important passwords may be leaked within the code itself. AI systems may also utilize data that was actually intended to be private. Data source issues It is also important to consider what data the responses that you get from a commercial AI tool might actually be using. Are these datasets from people who consented to their data being used in this manner? If you are generating your own tools, did people consent for their data to be used as you intend? Data privacy is a major issue all on it’s own: 98% of Americans still feel they should have more control over the sharing of their data (Pearce (2021)) It is important to follow legal and ethical guidance around the collection of data and to use tools that also abide by these guidelines. Tips for reducing security and privacy issues For decision makers about AI use: Check that no sensitive data, such as Personal Identifiable Information (PII) or propriety information becomes public through prompts to consumer AI systems or systems not designed or set up with the right legal agreements in place for sensitive data. Consider purchasing a license for a private AI system if needed or create your own if you wish to work with sensitive data (seek expert guidance to determine if the AI systems are secure enough). Ask AI tools for help with security when using consumer tools, but to not rely on them alone. In some cases, consumer AI tools will even provide little guidance about who developed the tool and what data it was trained on, regardless of what happens to the prompts and if they are collected and maintained in a secure way. Promote regulation of AI tools by voting for standards where possible. Possible Generative AI Prompt: Are there any methods that could be implemented to make this code more secure? For decision makers about AI development: Consult with an expert about data security if you want to design or use a AI tool that will regularly use private or propriety data. Be clear with users about the limitations and security risks associated with tools that you develop. Promote regulation of AI tools by voting for standards where possible. Possible Generative AI Prompt: Are there any possible data security or privacy issues associated with the plan you proposed? Climate Impact AI can help humans to innovate ways to improve efficiency and to devise strategies to help mitigate climate issues (Jansen et al. (2023); Cowls et al. (2023)). Importantly this needs to be done in a manner with social justice in mind, as often those that have the least resources deal with climate issues are also the most likely to be impacted (Jansen et al. (2023); Bender et al. (2021)). A few organizations are working on supporting the use of AI for climate crises mitigation uses such as: AI for the Plane: https://www.aifortheplanet.org/en Climate Change AI (CCAI): https://www.climatechange.ai/about However, AI also poses a number of climate risks (Bender et al. (2021); Hulick (2021); Jansen et al. (2023); Cowls et al. (2023)) . The data storage and computing resources needed for the development of AI tools could exacerbate climate challenges (Bender et al. (2021)) If not designed carefully, AI could also spread false solutions for climate crises or promote inefficient practices (Jansen et al. (2023)). Differences in access to AI technologies may exacerbate social inequities related to climate (Hulick (2021)) Tips for reducing climate impact For decision makers about AI use: Where possible use tools that are transparent about resource usage and that identify how they have attempted to improve efficiency For decision makers about AI development: Modify existing models as opposed to unnecessarily creating new models from scratch where possible. Avoid using models with datasets that are unnecessarily large (Bender et al. (2021)) Solutions such as federated learning, where AI models are iteratively trained in multiple locations using data at those locations, instead of collectively sharing the data to create more massive datasets can help reduce the required resources and also help preserve data privacy and security. Use emerging tools and guidelines to estimate and monitor the resource usage involved in training models (Castaño Fernández (2023)). Be transparent about resources used to train models (Castaño Fernández (2023)). Utilize data storage and computing options that are designed to be more environmentally conscious options, such as solar or wind power generated electricity. Transparency In the United States Blueprint for the AI Bill of Rights, it states: You should know that an automated system is being used and understand how and why it contributes to outcomes that impact you. This transparency is important for people to understand how decisions are made using AI, which can be especially vital to allow people to contest decisions. It also better helps us to understand what AI systems may need to be fixed or adapted if there are issues. Tips for being transparent For decision makers about AI use: Where possible include the AI tool and version that you may be using and why so people can trace back where decisions or content came from Use tools that are transparent about what data was used where possible For decision makers about AI development: Providing information about what training data was or methods used to develop new AI models can help people to better understand why it is working in a particular Summary Here is a summary of all the tips we suggested: Be mindful of how content created with AI or AI tools may be used for unintended purposes. Be aware that humans are still better at generalizing concepts to other contexts (Sinz et al. (2019)). Always have expert humans review content created by AI and value human contributions and thoughts. Carefully consider if an AI solution is appropriate for your context. Be aware that AI systems are biased and their responses are likely biased. Any content generated by an AI system should be evaluated for potential bias. Be aware that AI systems may behave in unexpected ways. Implement new AI solutions slowly to account for the unexpected. Test those systems and try to better understand how they work in different contexts. Be aware of the security and privacy concerns for AI, be sure to use the right tool for the job and train those at your institute appropriately. Consider the climate impact of your AI usage and proceed in a manner makes efficient use of resources. Be transparent about your use of AI. Overall, we hope that awareness of these concerns and the tips we shared will help us all use AI tools more responsibly. We recognize however, that as this is emerging technology and more ethical issues will emerge as we continue to use these tools in new ways. Staying up-to-date on the current ethical considerations will also help us all continue to use AI responsibly. References "],["algorithm-considerations.html", "Algorithm considerations Harmful or Toxic Responses Lack of Interpretability Misinformation and Faulty Responses Summary", " Algorithm considerations In this chapter we will discuss the some of the major ethical considerations regarding the algorithms underlying AI tools. We will provide some tips for how to deal with these issues that may be useful for creating AI guidelines at your institution. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Toxic Responses - Currently it is not clear how well generative AI models restrict harmful responses in terms of ideas, code, text, etc. Lack of Interpretability - When complicated algorithms are used within AI systems, it can be unclear how it came up with a decision. In many circumstances it is necessary to understand how the AI system works to know how to proceed. Misinformation and Faulty Responses - Fake or manipulated data used to help design algorithms could be believed to be correct and this could be further propagated. Text, code, etc. provided to users may not be correct or optimal for a given situation, and may have at times severe downstream consequences. Note that this is an incomplete list; additional ethical concerns will become apparent as we continue to use these new technologies. We highly suggest that users of these tools careful to learn more about the specific tools they are interested in and to be transparent about the use of these tools, so that as new ethical issues emerge, we will be better prepared to understand the implications. Harmful or Toxic Responses One major concern is the use of AI to generate malicious content. Secondly the AI itself may accidentally create harmful responses or suggestions. For instance, AI could start suggesting the creation of code that spreads malware or hacks into computer systems. Another issue is what is called “toxicity”, which refers to disrespectful, rude, or hateful responses (Nikulski (2021)). These responses can have very negative consequences for users. Ultimately these issues could cause severe damage to individuals and organizations, including data breaches and financial losses. AI systems need to be designed with safeguards to avoid harmful responses, to test for such responses, and to ensure that the system is not infiltrated by additional possibly harmful parties. Tips for avoiding the creation of harmful content For decision makers about AI use: Be careful about what commercial tools you employ, they should be transparent about what they do to avoid harm. Be careful about the context in which you might have people use AI - will they know how to use it responsibly? Be careful about what content you share publicly, as it could be used for malicious purposes. Consider how the content might be used by others unintended purposes. Ask the AI tools to help you, but do not rely on them alone. Possible Generative AI Prompt: What are the possible downstream uses of this content? Possible Generative AI Prompt: What are some possible negative consequences of using this content? For decision makers about AI development: If designing a system, ensure that best practices are employed to avoid harmful responses. This should be done during the design process and should the system should also be regularly evaluated. Some development systems such as Amazon Bedrock have tools for evaluating toxicity to test for harmful responses. Although such systems can be helpful to automatically test, evaluation should also be done directly by humans. Consider how the content from AI tools that you design might be used by others for unintended purposes. Monitor your tools for unusual and harmful responses. Lack of Interpretability There is risk in using AI tools, that we may encounter situations where it is unclear why the AI system came to a particular result. AI systems that use more complicated algorithms can make it difficult to trace back the decision process of the algorithm. Using content created or modified by AI, could make it difficult for others to understand if the content is adequate or appropriate, or to identify and fix any issues that may arise. This could result in negative consequences, such as for example reliance on a system that distinguishes consumers or patients based on an arbitrary factor that is actually not consequential. Decisions based on AI responses therefore need to be made extra carefully and with clarity about why the AI system may be indicating various trends or predictions. Tips for avoiding a lack of interpretability For decision makers about AI use: Content should be reviewed by those experienced in the given field. Ask AI tools to help you understand how it got to the response that it did, but get expert assistance where needed. Always consider how an AI system derived a decision if the decision is being used for something that impacts humans Possible Generative AI Prompt: Can you explain how you generated this response? For decision makers about AI developers: New AI tools should be designed with interpretability in mind, simpler models may make it easier to interpret results. Responses from new tools should be reviewed by those experienced in the given field. Provide transparency to users about how new AI tools generally create responses. Misinformation and Faulty Responses AI tools use data that may contain false or incorrect information and may therefore respond with content that is also false or incorrect. This is due to number of reasons: AI tools may “hallucinate” fake response based on artifacts of the algorithm AI tools may be trained on data that is out-of-date AI tools may be trained on data that has fake or incorrect information AI tools are not necessarily trained for every intended use and may therefore may not reflect best practices for a given task or field AI tools may also report that fake data is real, when it is in fact not real. For example, currently at the time of the writing of this course, older versions of ChatGPT will report citations with links that are not always correct and it doesn’t seem to be able to correct itself very well when challenged. Furthermore, AI models can “hallucinate” incorrect responses based on artifacts of the algorithm underneath the tool. These responses are essentially made up by the tool. It is difficult to know when a tool is hallucinating especially if it is a tool that you did not create, therefore it is important to review and check responses from AI tools. There is also a risk that content written with AI tools, may be incorrect or inappropriate for the given context of intended use, or they may not reflect best practices for a given context or field. The tools are limited to the data they were trained on, which may not reflect your intended use. It is also important to remember that content generated by AI tools is not necessarily better than content written by humans. Additionally review and auditing of AI-generated content by humans is needed to ensure that they are working properly and giving expected results. Tips for reducing misinformation & faulty responses For decision makers about AI use: Be aware that some AI tools currently make up false information based on artifacts of the algorithm called hallucinations or based on false information in the training data. Do not assume that the content generated by AI is real or correct. Realize that AI is only as good or up-to-date as what it was trained on, the content may be generated using out-of-date data. Look up responses to ensure it is up-to-date. In many cases utilizing multiple AI tools can help you to cross-check the responses (however be careful about the privacy of each tool if you use any private or propriety data in your prompts!). Ask the AI tools for extra information about if there are any potential limitations or weaknesses in the responses, but keep in mind that the tool may not be aware of issues and therefore human review is required. The information provided by the tool can however be a helpful starting point. Possible Generative AI Prompt: Are there any limitations associated with this response? Possible Generative AI Prompt: What assumptions were made in creating this content? For decision makers about AI development: Monitor newly developed tools for accuracy Be transparent with users about the limitations of the tool Consider training generative AI tools to have responses that are transparent about limitations of the tool. Example 3 Real World Example Stack Overflow, a popular community-based website where programmers help one another, has (at the time of writing this) temporarily banned users from answering questions with AI-generated code. This is because users were posting incorrect answers to questions. It is important to follow policies like this (as you may face removal from the community). This policy goes to show that you really need to check the code that you get from AI models. While they are currently helpful tools, they do not know everything. Summary Here is a summary of all the tips we suggested: Design new AI systems with interpretability in mind Don’t assume AI-generated content is real, accurate, consistent, current, or better than that of a human. Ask the AI tools to help you understand: Sources for the content that you can cite Any decision processes in how the content was created Potential limitations Potential security or privacy issues Potential downstream consequences of the use of the content Always have expert humans review/auditing and value your own contributions and thoughts. Overall, we hope that these guidelines and tips will help us all use AI tools more responsibly. We recognize however, that as this is emerging technology and more ethical issues will emerge as we continue to use these tools in new ways. AI tools can even help us to use them more responsibly when we ask the right additional questions, but remember that human review is always necessary. Staying up-to-date on the current ethical considerations will also help us all continue to use AI responsibly. References "],["adherence-practices.html", "Adherence practices Start Slow Check for Allowed Use Use Multiple AI Tools Educate Yourself and Others Summary", " Adherence practices Here we suggest some simple practices that can help you and others at your institution to better adhere to current proposed ethical guidelines. Start Slow - Starting slow can allow for time to better understand how AI systems work and any possible unexpected consequences. Check for Allowed Use - AI model responses are often not transparent about using code, text, images and other data types that may violate copyright. They are currently not necessarily trained to adequately credit those who contributed to the data that may help generate content. Use Multiple AI Tools - Using a variety of tools can help reduce the potential for ethical issues that may be specific to one tool, such as bias, misinformation, and security or privacy issues. Educate Yourself and Others - To actually comply with ethical standards, it is vital that users be educated about best practices for use. If you help set standards for an institution or group, it strongly advised that you carefully consider how to educate individuals about those standards of use. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Start Slow Launching large projects using AI before you get a chance to test them could lead to disastrous consequences. Unforeseen challenges have already come up with many uses of AI, so it is wise to start small and do evaluation first before you roll out a system to more users. This also gives you time to correspond with legal, equity, security, etc. experts about the risks of your AI use. Tips for starting slow For decision makers about AI users: Consider an early adopters program to evaluate usage. Educate early users about the limitations of AI. Consider using AI first for more specific purposes. Consult with experts about potential unforeseen challenges. Continue to assess and evaluate AI systems over time. For decision makers about AI developers: Consider developing tools for more simple specific tasks, rather than broad difficult tasks. Consider giving potential users guidance about using the tool for simpler tasks at first. Continue to assess and evaluate AI systems over time. Example 4 Real-World Example IBM created Watson, an AI system that participated and won on the game show Jeopardy! and showed promise for advancing healthcare. However IBM had lofty goals for Watson to revolutionize cancer diagnosis, yet unexpected challenges resulted in unsafe and incorrect responses. IBM poured many millions of dollars in the next few years into promoting Watson as a benevolent digital assistant that would help hospitals and farms as well as offices and factories. The challenges turned out to be far more difficult and time-consuming than anticipated. IBM insists that its revised A.I. strategy — a pared-down, less world-changing ambition — is working ((lohr_what_2021?)). See here for addition info: https://ieeexplore.ieee.org/abstract/document/8678513 Check for Allowed Use When AI systems are trained on data, they may also learn and incorporate copyrighted information or protected intellectual property. This means that AI-generated content could potentially infringe on the copyright or protection of trademarks or patents of the original author. For more extreme examples, if an AI system is trained on an essay or art or in some cases even code written by a human, the AI system could generate responses that are identical to or very similar to that of the original author, which some AI tools have done. Regardless, even training AI tools on copyrighted information where the responses are still relatively different, if the AI system uses this content without permission from the original author, this could constitute copyright or trademark infringement Brittain and Brittain (2023). Example 5 Open AI is facing lawsuits about using writing from several authors to train ChatGPT without permission from the authors. While this poses legal questions, it also poses ethical questions about the use of these tools and what it means for the people who created content that helped train AI tools. How can we properly give credit to such individuals? The lawsuits are summarized by Brittain and Brittain (2023): The lawsuit is at least the third proposed copyright-infringement class action filed by authors against Microsoft-backed OpenAI. Companies, including Microsoft (MSFT.O), Meta Platforms (META.O) and Stability AI, have also been sued by copyright owners over the use of their work in AI training The new San Francisco lawsuit said that works like books, plays and articles are particularly valuable for ChatGPT’s training as the “best examples of high-quality, long form writing.” OpenAI and other companies have argued that AI training makes fair use of copyrighted material scraped from the internet. The lawsuit requested an unspecified amount of money damages and an order blocking OpenAI’s “unlawful and unfair business practices.” AI poses questions about how we define art and if AI will reduce the opportunities for employment for human artists. See here for an interesting discussion, in which it is argued that AI may enhance our capacity to create art. This will be an important topic for society to consider. Tips for checking for allowed use For decision makers about AI use: Be transparent about what AI tools you use to create content. Ask the AI tools if the content it helped generate used any content that you can cite. Possible Generative AI Prompt: Did this content use any content from others that I can cite? For decision makers about AI development: Obtain permission from the copyright holders of any content that you use to train an AI system. Only use content that has been licensed for use. Cite all content that you can. Use Multiple AI Tools Only using one AI tool can increase the risk of the ethical issues discussed. For example, it may be easier to determine if a tool incorrect about a response if we see that a variety of tools have different answers to the same prompt. Secondly, as our technology evolves, some tools may perform better than others at specific tasks. It is also necessary to check responses over time with the same tool, to verify that a result is even consistent from the same tool. Tips for using multiple AI tools For decision makers about AI use: Check that each tool you are using meets the privacy and security restrictions that you need. Utilize platforms that make it easier to use multiple AI tools, such as https://poe.com/, which as access to many tools, or Amazon Bedrock, which actually has a feature to send the same prompt to multiple tools automatically, including for more advanced usage in the development of models based on modifying existing foundation models. Evaluate the results of the same prompt multiple times with the same tool to see how consistent it is overtime. Use slightly different prompts to see how the response may change with the same tool. Consider if using tools that work with different types of data maybe helpful for answering the same question. For decision makers about AI development: Consider if using different types of data maybe helpful for answering the same question. Consider promoting your tool on platforms that allow users to work with multiple AI tools. Educate Yourself and Others There are many studies indicating that individuals typically want to comply with ethical standards, but it becomes difficult when they do not know how (Giorgini et al. (2015)). Furthermore, individuals who receive training are much more likely to adhere to standards (Kowaleski, Sutherland, and Vetter (2019)). Properly educating those you wish to comply with standards, can better ensure that compliance actually happens. It is especially helpful if training materials are developed to be especially relevant to the actually potential uses by the individuals receiving training and if the training includes enough fundamentals so that individuals understand why policies are in place. Example 6 Real-World Example A lack of proper training at Samsung lead to a leak of proprietary data due to unauthorized use of ChatGPT by employees – see https://cybernews.com/news/chatgpt-samsung-data-leak for more details: “The information employees shared with the chatbot supposedly included the source code of software responsible for measuring semiconductor equipment. A Samsung worker allegedly discovered an error in the code and queried ChatGPT for a solution. OpenAI explicitly tells users not to share “any sensitive information in your conversations” in the company’s frequently asked questions (FAQ) section. Information that users directly provide to the chatbot is used to train the AI behind the bot. Samsung supposedly discovered three attempts during which confidential data was revealed. Workers revealed restricted equipment data to the chatbot on two separate occasions and once sent the chatbot an excerpt from a corporate meeting. Privacy concerns over ChatGPT’s security have been ramping up since OpenAI revealed that a flaw in its bot exposed parts of conversations users had with it, as well as their payment details in some cases. As a result, the Italian Data Protection Authority has banned ChatGPT, while German lawmakers have said they could follow in Italy’s footsteps.” Tips to educate yourself and others For decision makers about AI use: Emphasize the importance of training and education. Recognize that general AI literacy to better understand how AI works, can help individuals use AI more responsibly. Seek existing education content made by experts that can possibly be modified for your use case. Consider how often people will need to be reminded about best practices. Should training be required regularly? Should individuals receive reminders about best practices especially in contexts in which they might use AI tools. Make your best practices easily findable and help point people to the right individuals to ask for guidance. Recognize that best practices for AI will likely change frequently in the near future as the technology evolves, education content should be updated accordingly. For decision makers about AI development: Emphasize the importance of training and education. Recognize that more AI literacy to better understand security, privacy, bias, climate impact and more can help individuals develop AI more responsibly. Seek existing education content made by experts that can possibly be modified for your use case. Consider how often people will need to be reminded about best practices. Should training be required regularly? Should individuals receive reminders about best practices especially in contexts in which they might develop AI tools. Make your best practices easily findable and help point people to the right individuals to ask for guidance. Recognize that best practices for AI will likely change frequently in the near future as the technology evolves, education content should be updated accordingly. We have also included an optional section for new developers about considerations for testing and training data to ensure accurate assessment of performance. Effective use of Training and Testing data In the previous chapters, we started to think about the ethics of using representative data for building our AI model. In this chapter we will see that even if our data is inclusive and represents our population of interest, issues can still happen if the data is mishandled during the AI model building process. Let’s take a look at how that can happen. Population and sample The data we collect to train our model is typically a limited representation of what we want to study, and as we explored in the previous chapter, bias can arise through our choice of selection. Let us define two terms commonly used in artificial intelligence and statistics: the population is the entire group of entities we want to get information from, study, and describe. If we were building an artificial intelligence system to classify dog photographs based on their breeds, then the population is every dog photograph in the world. That’s prohibitively expensive and not easy data to acquire, so we use a sample, which is a subset of the population, to represent our desired population. Even if we are sure that the sample is representative of the population, a different type of bias, in this case statistical bias can arise. It has to do with how we use the sample data for training and evaluating the model. If we do this poorly, it can result in a model that gives skewed or inaccurate results at times, and/or we may overestimate the performance of the model. This statistical bias can also result in the other type of bias we have already described, in which a model unfairly impacts different people, often called unfairness. There are many other sources of unfairness in model development - see Baker and Hawn (2022). Training data The above image depicts some of our samples for building an artificial intelligence model to classify dog photographs based on their breeds. Each dog photograph has a corresponding label that gives the correct dog breed, and the goal of the model training process is to have the artificial intelligence model learn the association between photograph and dog breed label. For now, we will use all of our samples for training the model. The data we use for model training is called the training data. Then, once the model is trained and has learned the association between photograph and dog breed, the model will be able make new predictions: given a new dog image without its breed label, the model will make a prediction of what its breed label is. Testing data To evaluate how well this model is good as predicting dog breeds from dog images, we need to use some of our samples to evaluate our model. The samples being used for model evaluation is called the testing data. For instance, suppose we used these four images to score our model. We give it to our trained model without the true breed label, and then the model makes a prediction on the breed. Then we compare the predicted breed label with the true label to compute the model accuracy. Evaluation Suppose we get 3 out of 4 breed predictions correct. That would be an accuracy of 75 percent. Proper separation of Training and Testing data However, we have inflated our model evaluation accuracy. The samples we used for model evaluation were also used for model training! Our training and testing data are not independent of each other. Why is this a problem? When we train a model, the model will naturally perform well on the training data, because the model has seen it before. This is called Overfitting. In real life, when the dog breed image labeling system is deployed, the model would not be seeing any dog images it has seen in the training data. Our model evaluation accuracy is likely too high because it is evaluated on data it was trained on. Let’s fix this. Given a sample, we split it into two independent groups for training and testing. We use the training data for training the model, and we use the testing data for evaluating the model. They don’t overlap. When we evaluate our model under this division of training and testing data, our accuracy will look a bit lower compared to our first scenario, but that is more realistic of what happens in the real world. Our model evaluation needs to be a simulation of what happens when we deploy our model into the real world! Validation Note that there should actually be an intermediate phase called validation, where we fine tune the model to be better at performing, in other words to improve the accuracy of predicting dog breeds, this should also ideally use a dataset that is independent from the training and testing set. You may also hear people use these two terms in a different order, where testing refers to the improvement phase and validation refers to the evaluation of the general performance of the model in other contexts.Sometimes the validation set for fine tuning is also called the development set. There are clever ways of taking advantage of more of the data for validation data, such as a method called “K-Fold cross validation”, in which many training and validation data subsets are trained and evaluated and for more validation and to determine if performance is consistent across more of the data. This is especially beneficial of there is diversity within the dataset, to better ensure that the data performs well on some of the rarer data points (for example, a more rare dog breed) (“Training, Validation, and Test Data Sets” (2023)). Conclusions This seemingly small tweak in how data is partitioned during model training and evaluation can have a large impact on how artificial intelligence systems are evaluated. We always should have independence between training and testing data so that our model accuracy is not inflated. If we don’t have this independence of training and testing data, many real-life promotions of artificial intelligence efficacy may be exaggerated. Imagine that someone claimed that their cancer diagnostic model from a blood draw is 90%. But their testing data is a subset of their training data. That would over-inflate their model accuracy, and it will less accurate than advertised when used on new patient data. Doctors would make clinical decisions on inaccurate information and potentially create harm. Summary Here is a summary of all the tips we suggested: Disclose when you use AI tools to create content. Be aware that AI systems may behave in unexpected ways. Implement new AI solutions slowly to account for the unexpected. Test those systems and try to better understand how they work in different contexts. Adhere to restrictions for use of data and content created by AI systems where possible, citing the AI system itself and learning how the tool obtained permission for use can help reduce risk. Cross-check content from AI tools by using multiple AI tools and checking for consistent results over time. Check that each tool meets the privacy and security restrictions that you need. Emphasize training and education about AI and recognize that best practices will evolve as the technology evolves. Overall, we hope that these suggestions will help us all use AI tools more responsibly. We recognize however, that as this is emerging technology and more ethical issues will emerge as we continue to use these tools in new ways. AI tools can even help us to use them more responsibly when we ask the right additional questions, but remember that human review is always necessary. Staying up-to-date on the current ethical considerations will also help us all continue to use AI responsibly. References "],["consent-and-ai.html", "Consent and AI Summary", " Consent and AI Much of the world is developing data privacy regulations, as many individuals value their right to better control how others can collect and store data about them (Chaaya (2021)). While data collection concerns have been increasing up for years, AI systems present new challenges (Pearce (2021); Tucker (2018)): Accountability - It is more difficult to determine who is accountable at times when separate parties may collect versus redistribute data, versus use data (Hao (2021)) Data Persistence - Since data rapidly be redistributed it can be difficult to remove data (Gangarapu (2022); Hao (2021)). Data reuse - Data collected for one purpose is getting reused for other purposes that may dramatically change over time. For example data collected on food purchases for food companies could get reused by insurance companies to determine health risk based on dietary behavior. Data spillover - Accidental data collection due to collection for a different purpose, for example a photo of someone with other individuals in the background Trickier Consent - Consent to allow data collection is trickier as it is less clear that users understand the potential risks (Andreotta, Kirkham, and Rizzi (2022)) Easier Data Collection/Translation - AI makes it really easy to collect and record new forms of data about individuals such as transcriptions of meetings. This is making it easier for people to record people without their consent and poses privacy risks (Elefant (2023)). Consent is especially a concern for healthcare research, where potential participants need to understand the potential risks of participation. Yet, the risks of data collection continue to evolve. See https://link.springer.com/article/10.1007/s00146-021-01262-5 for deeper discussions on the topic. Example 7 Real-World Example Facial Recognition technology has been an especially debated topic. There have many instances of unethical practices including collecting and reusing data without consent, collecting data on particularly vulnerable populations that could easily be misused, and creating tools that perpetuate bias and harm, such as a tool that was aimed at predicting if someone was likely to be a criminal. A Berlin-based artist Adam Harvey created a project and website that flags questionable datasets and discussing ethical issues around facial recognition. I wanted to uncover the uncomfortable truth that many of the photos people posted online have an afterlife as training data (Van Noorden (2020)) See these articles for more information: https://www.nature.com/articles/d41586-020-03187-3 https://learn.g2.com/ethics-of-facial-recognition https://www.technologyreview.com/2021/08/13/1031836/ai-ethics-responsible-data-stewardship/ Another important consideration is consent or awareness that you are viewing AI generated content that may be fake. The EU AI act has many regulations regarding this, as well as notifying and consenting individuals about when AI tools are being used on them. Example 8 The EU AI act includes regulations for many things including banning predictive policing technology and requiring consent for emotional recognition technology in work and at school. Despite many likely very useful restrictions including around facial images, a major debate has been about a lack of restriction for live facial recognition. The potential for harm across different data types and advances in technology will continue to create new ethical challenges. It has been argued that there are possible uses that should be exempt, such as live facial recognition to locate human trafficking victims. See here to learn more about this debate. Tips to encourage responsible consent practices For decision makers about AI use: Emphasize education about consent practices Stay up-to-date on current issues related to consent. Encourage usage of tools that are transparent about using responsible consent practices. Encourage users to be careful what data they upload or allow AI tools to use. For decision makers about AI development: Emphasize education of AI developers about consent considerations, guidelines, and regulations. Stay up-to-date on current issues related to consent. Be considerate of the data that you use for AI tools and how it was collected and if individuals consented to the collection and distribution of the data Be transparent with users about what consent practices were used for the data utilized by the tool. Be transparent with users about what may happen with their responses if they are being collected. Summary Overall the consent process is particularly challenging and consideration should especially be centered on the rights of the individuals who may have data collected about them. We hope that awareness of some of the major challenges can help you to more responsibly implement any consenting processes that may be needed for AI tools that you employ or develop. We advise that you speak with ethical and legal experts. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["idare-and-ai.html", "IDARE and AI AI is biased Examples of AI Bias Be extremely careful using AI for decisions More inclusive teams means better models Access Summary", " IDARE and AI IDARE stands for Inclusion, Diversity, Anti-Racism, and Equity. It is an acronym used by some institutions (such as the Johns Hopkins Bloomberg School of Public Health, the University of California, Davis, and the University of Pennsylvania Perelman School of Medicine) to remind people about practices to improve social justice. As we strive to use AI responsibly, keeping the major principles of IDARE in mind will be helpful to better ensure that individuals of all backgrounds and life experiences more equally benefit from advances in technological and that technology is not used to perpetuate harm. AI is biased Humans are biased, therefore data from text written by humans is often also biased, which mean AI systems built on human text are trained to be biased, even those created with the best intentions (Pethig and Kroenung (2023)). To better understand your own personal bias, consider taking a test at https://implicit.harvard.edu/. It is nearly impossible to create a training dataset that is free from all possible bias and include all possible example data, so by necessity the data used to train AI systems are generally biased in some way and lack data about people across the full spectrum of backgrounds and life experiences. This can lead to AI-created products that cause discrimination, abuse, or neglect for certain groups of people, such as those with certain ethnic or cultural backgrounds, genders, ages, sexuality, capabilities, religions or other group affiliations. Our goal is to create and use AI systems that are as inclusive and unbiased as possible while also keeping in mind that the system is not perfect. To learn more about how AI algorithms become biased, see https://www.criticalracedigitalstudies.com/peoplesguide. Algorithmic Fairness - The field of algorithmic fairness aims to mitigate the effects of bias in models or algorithms in AI. Importantly issues with bias can occur in all steps of model development. (J. Huang et al. (2022), Baker and Hawn (2022)). There are experts in fairness that can help you to avoid the potential harm caused by bias in your AI development. Examples of AI Bias There are many examples in which biased AI systems were used in a context with negative consequences. Amazon’s resume system was biased against women Amazon used an AI system was to help filter candidates for jobs. They started using the system in 2014. In 2015, it was discovered that the system penalized resumes that included words like “women’s”, and also for graduates of two all-women’s colleges (Dastin (2018)). How did this happen? The model was trained on resume’s of existing Amazon employees and most of their employees were male. Thus the training data for this system was not gender inclusive, which lead to bias in the model. X-ray studies show AI surprises Algorithms used to evaluate medical images seem to be predicting the self-reported race of the individuals in the images from the images alone (Gichoya et al. (2022)). This is despite the fact that the radiologists examining those same images were not able to identify what aspect about the images helped the AI systems identify the race of the individuals. Why is this a problem? That information from models that evaluate medical images are being used to help suggest care. It is recognized that health disparities exist in the treatment of different racial groups. Therefore bias related to these disparities may be perpetuated by algorithms even when the AI system is trained in a manner that is “blind” to the self-reported race of the individuals. This example shows that AI systems can possibly amplify existing biases even when humans are unaware of the AI systems using those biases to make decisions. This is especially a problem, as some populations are under-diagnosed and therefore denied care or they receive poorer care because an AI system does not work as well for their population (Ricci Lara, Echeveste, and Ferrante (2022)). As an example, a study evaluating diagnosis of various diseases from chest X-ray images, found that certain groups of patients, such as females, those under 20, those who self report as Black or Hispanic, were more likely to be falsely flagged by AI system as healthy when they in fact had an issue (Seyyed-Kalantari et al. (2021)). Another example shows that processing of cardiac images from specific patient populations is much poorer using models where the training set was not diverse enough (Puyol-Anton et al. (2021)). However, there is promise for good AI systems to mitigate bias. For example, a team studying pain levels in osteoarthritis (a disease where under-served populations often have higher than expected levels of pain) found that using predictions of pain based on AI system examining images were much more accurate than predictions from radiologists examining those same images (Pierson et al. (2021)). A magazine article describing this work stated: In this case, researchers were training the models based on physician reports of pain, and since doctors are less likely to believe marginalized people when they report pain, this algorithm replicated this bias. When a team of computer scientists at the University of California, Berkeley, tweaked the algorithm to factor in patient pain reports rather than a physician’s, however, they eliminated that racial bias, paving the way for more equitable treatment of osteoarthritis.” (Arnold (2022)) Tips for Mitigating Bias AI tools with training data that lacks data about certain ethnic or gender groups or disabled individuals could result in responses that do not adequately consider these groups, ignores them all together, or makes false associations. For decision makers about AI use: Where possible, use tools that are transparent about what training data was used and limitations of this data and actively evaluate the data for bias including: if the dataset includes any harmful data, such as discriminatory and false associations if training data is adequately inclusive for the given needs Where possible, users of commercial AI tools should ask prompts in a manner that includes concern for equity and inclusion Always question the responses from the tool for possible bias. Obtain expert review where possible. Start slowly if rolling out the usage of new AI tools and continue to monitor used AI tools for bias. Possible Generative AI Prompt: Why did you assume that the individual was male? For decision makers about AI development: Be careful to use datasets that do not contain harmful data, such as discriminatory and false associations. Use datasets that adequately inclusive for the given needs. evaluate the training data and the model for biases and false associations as it is being developed instead of waiting to test the product after creation is finished. Verifying that the product works properly for potential use cases from a variety of ethnic, gender, ability, socioeconomic, language, and educational backgrounds. When possible, the developers should also augment the training dataset with data from groups that are missing or underrepresented in the original training dataset. Potentially consider creating different models for different populations to obtain better performance for different groups of people. However, be careful to be inclusive in the creation of such models. Seek expert evaluation of your tools for bias. Be transparent about possible bias or dataset limitations to users. Be extremely careful using AI for decisions There is a common misconception that AI tools might make better decisions for humans because they are believed to not be biased like humans (Pethig and Kroenung (2023)). However since they are built by humans and trained on human data, they are also biased. It is possible that AI systems specifically trained to avoid bias, to be inclusive, to be anti-racist, and for specific contexts may be helpful to enable a more neutral party, but that is generally not currently possible. We highly suggest caution with using AI to make or help make employment decisions about applicants or employees at this time. This includes recruitment, hiring, retention, promotions, transfers, performance monitoring, discipline, demotion, terminations, or other similar decisions. At a minimum, humans should be involved in the testing the AI system, evaluating the results of the AI system, and monitoring the system’s behavior overtime. Experts of algorithm fairness should be consulted. More inclusive teams means better models It is vital that teams hired for the development, auditing or testing of AI tools be as inclusive as possible and should follow the current best IDARE practices for standards for hiring standards. This will help to ensure that different perspectives and concerns are considered. Access Improving access for all individuals holds the power to make the benefits of AI and other technology a reality to everyone. However expanding access should be done mindfully to empower others, rather than to exploit or create further vulnerability. The Bill and Melinda Gates Foundation has suggested principles (“The First Principles Guiding Our Work with AI” (n.d.)) for their work to expand AI access responsibly, including the following summarized here : Adhering to core values of helping all people reach their full potential Promoting co-design and inclusivity by including individuals in low-income settings to be collaborators and partners and acknowledging infrastructure limitations. Proceeding responsibly with continuous improvement in a step-wise fashion Addressing Privacy and security concerns by regularly performing assessments and ensuring compliance with relevant regulations and laws, as well as careful consent practices Building equitable access - focusing not just on access distribution by on equitable ownership and maintenance and development Committing to transparency - Sharing information for public good Summary In summary we suggest you consider the following to better promote the well-being of all individuals when approaching AI: Recognize that humans are biased and AI systems created by humans are therefore biased. They typically currently enhance bias, unless mindfully engineered for specific contexts with appropriate training data. Recognize that sometimes AI works in unexpected ways and systems can be biased in ways that are not fully understood Testing, auditing, and questioning AI systems about bias can help mitigate harm Using AI for decisions at this point in time could be very harmful towards vulnerable populations. AI should not be used for any important decisions without human oversight. More inclusive AI teams can help us build more responsible and more useful models Enhancing access to AI tools has the potential to improve the well-being of individuals in places with other limited technology or healthcare access, however this needs to be done in a collaborative manner to avoid harm and exploitation Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. References "],["ethical-process.html", "Ethical process Ethical Use Process Ethical Development Process Summary", " Ethical process The concepts for ethical AI use are still highly debated as this is a rapidly evolving field. However, it is becoming apparent based on real-world situations that ethical consideration should occur in every stage of the process of use and development. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Ethical Use Process Here is a proposed framework for using AI more ethically. This should involve active consideration at three stages: the inception of an idea, during usage, and after usage. Reflection during inception of the idea Consider if AI is actually needed and appropriate for the potential use. Consider the possible downstream consequences of use. Consider the following questions: What could happen if the AI system worked poorly? Are tools mature enough for your specific use? Should you start smaller? Do you need a tool that is designed for sensitive data? Are the tools you are considering well-made for the job with transparency about how the tool works? Are the training sets for the tools appropriate for your use to avoid issues like bias and faulty responses? Reflection during use While using AI tools consider the following: Ask the tool how it is making decisions Evaluate the validity of the results Test for bias by asking bias related prompts Test if the results are consistent across time Test if the results are consistent across tools Reflection after use After using AI tools consider the following questions: How can you be transparent about how you used the tool so that others can better understand how you created content or made decisions? What might the downstream consequences be of your use, should you actually use the responses or were they not accurate enough, are there remaining concerns of bias? Ethical Development Process Here is a proposed framework for developing AI tools more responsibly. This should involve active consideration at four stages: the inception of an idea, pre-development planning, during development, and after development. Reflection during inception of the idea Consider if AI is actually needed and appropriate for the potential use. Consider the possible downstream consequences of development. Consider the following questions: What could happen if the AI system worked poorly? How might people use the tool for other unintended uses? Can you start smaller and build on your idea over time? Planning Reflections While you are planning to develop consider the following: Do you have appropriate training data to avoid issues like bias and faulty responses? Are the rights of any individuals violated by you using that data? Do you need to develop a tool that is designed for sensitive data? How might you protect that data? How large does your data really need to be - how can you avoid using unnecessary resources to train your model? Development Reflection While actively developing an AI tool, consider the following: Make design decisions based on best practices for avoiding bias Make design decisions based on best practices for protecting data and securing the system Consider how interpretable the results might be given the methods you are trying Test the tool as you develop for bias, toxic or harmful responses, inaccuracy, or inconsistency Can you design your tool in a way that supports transparency, perhaps generate logs about usage for users Post-development Reflection Consider the following after developing an AI tool: Continual auditing is needed to make sure no unexpected behavior occurs, that the responses are adequately interpretable, accurate, and not harmful especially with new data, new uses, or updates Consider how others might use or be using the tool for alternative usage Deploy your tool with adequate transparency about how the tool works, how it was made, and who to contact if there are issues Summary In summary, to use and develop AI ethically consideration for impact should be occur across the entire process from the stage of forming an idea, to planning, to active use or development, and afterwards. We hope these frameworks help you to consider your AI use and development more responsibly. –> "],["introduction-to-determining-ai-needs.html", "Introduction to Determining AI Needs Motivation Target Audience Curriculum", " Introduction to Determining AI Needs Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Motivation There are an ever increasing number of options, strategies and solutions for integrating AI solutions into a project. It can feel overwhelming to understand what these options entail let alone understand how to decide what solution best fits a use case. In this course we aim to give individuals the basic info they need to make basic plans for integrating AI tools into their project. Target Audience The course is intended for individuals who have an AI related project in mind or think that they might need to incorporate AI into their project. They are likely the leader who is guiding others in an AI related project and not necessarily the person who will write code or carry out the technical aspects of the project. Curriculum What this course covers: What are the practical aspects of AI that need to be understood before endeavoring on an AI project? What makes an AI model good? How do you determine what kinds of custom AI solutions your project needs if any? What aspects of your resources and your project should you consider when evaluating AI strategies? What would better suite your needs an “out of the box” AI product or building an AI model solution “from scratch”? Examples of currently existing AI solutions that may suit an individual’s AI needs. What this course does NOT cover: This is NOT a comprehensive survey of the AI tools and products in existence. Even if it was comprehensive at this time, there are new tools and developments constantly arriving. We merely give examples of solutions that show a possible AI solution. There may be competitors or similar solutions out there that would even better fit a project’s needs. This does NOT cover in depth aspects of algorithms, statistics or mathematics behind AI algorithms – these are numerous and not always necessary to understand in fine detail for making decisions about projects. This does NOT cover how to complete or write code for an AI project. This is not a tutorial for building an AI tool. Instead we merely give strategies you could employ but we do not give details on how you might employ them. There are too many ways that AI tools may be built – this is outside the scope of this course. –> "],["what-are-the-components-of-ai.html", "What are the components of AI? Learning objectives: Intro What makes an AI model accurate? What makes an AI model efficient? Putting it together", " What are the components of AI? Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand what makes a good AI model Describe what makes a model accurate Understand fundamentals about what makes AI models computationally efficient Describe components of LLMs and other AI models and how training data is critical to their accuracy Intro What makes the AI chatbots’ performance today so vastly improved from previous chatbots? Like those that resembled office supplies and helped us write documents? In this chapter we’ll discuss some generalities of how AI works and what makes an AI tool good. A good AI model is accurate – you need it to give answers that are correct or at least useful. They are also computationally efficient because we need them to give the answer back in a reasonable amount of time. We also don’t want to spend tons of money on the computation it takes for the chatbot to work. What makes an AI model accurate? Let’s talk about the basics of what makes an AI model accurate. In order to understand this, we need to discuss some principles behind Machine learning. Picture you were teaching someone (like an AI model) to identify apples from bananas. The training data you might give them would be a series of apples and bananas and you would label which were bananas versus apples. You could then test the model’s abilities ability to identify apples and bananas based on this training by giving them a fruit to identity. Assuming the fruit you gave them is reasonably identifiable from their training, they should accurately identify an apple. However, if the test you give the model is outside the kind of data they were trained on, they might not do well with it. For example if you didn’t provide any green apples and then you test the model with a green apple. It may or may not succeed. To address this gap in the model’s knowledge, you might add supplementary training data and retrain it so that it understands that apples could also be green. However, this added training data may help for the identification of green apples, but if given something similar to an apple but not – say a pear. It may incorrectly identify a pear as an apple if it hasn’t ALSO been trained on pears. This may feel silly to you – why couldn’t it identify a pear – but this is because you are a really well trained AI. (Actually just the I, you presumably aren’t artificial). You’ve seen lots of fruit in your life – you’ve collected a lot of training data on this task and have no problem identifying a pear from an apple. But we could throw you off too. When you look at this image of a hybrid apple-banana, if AI models could feel, this is how they would feel. What makes an AI model efficient? Let’s talk about the basics of what makes an AI model accurate. In order to understand this, we need to discuss some principles behind Machine learning. Let’s return to apples. With the above image, you don’t need much time to look at that picture and know that that is an apple. You don’t have to think about this for very long. With the above image, you don’t need much time to look at that picture and know that that is an apple. You don’t have to think about this for very long. You didn’t take in one piece of information at a time. This type of information processing is what neural networks are based on. Neural networks are when computers mimic how brains work to process information. Think about how you’d read the following paragraph: Did you read each word, in order from start to end? OR Did you pick out keywords by skimming and getting the gist? Maybe later going back to pick up context you missed? The old way AI models worked is that they would read sequentially – from start to finish. And as you may sense, that is a slower way to read. Alternatively, the new algorithms often use Attention mechanisms. These algorithms work analogous to skimming the input text. However, you could also probably sense that just because the new way of attention mechanisms are faster doesn’t mean that for all uses they are more accurate – by skimming you sometimes can miss important information. Regardless of that, let’s walk through more about this analogy to get a sense of how attention mechanisms can work. First we might highlight keywords in this paragraph. And meanwhile the words and phrases that are processed would be chunked into units called “tokens” the most important tokens we would focus on first with those attention mechanisms we referred to. When we connect these relationship between these words we might already start pulling out some of the meaning of this paragraph. Grabbing these relational words will help us piece together more meaning. Lastly we might pull out some contextual information from the other words we left behind. Let’s here it straight from an AI model. We asked bard to tell us what phrases it would pull out as keywords with attention mechanisms if we gave it this paragraph. Without these recent advancements in attention mechanism algorithms, the large language models that we see today would not be possible. Its these computationally efficient mechanisms that have allowed large language models to be possible in addition to the physical hardware improvements in computing. Putting it together In summary, a good AI model is accurate – this is largely determine by its training data being high quality, relevant and properly processed. A good AI tool is also computationally efficient. We need to use algorithms that can efficiently and properly process data. Let’s talk about the process of an AI query in a general sense. If we give input like an image of an apple, the AI tool will observe that input. It will use its prior experience of training data to digest that input. It will then formulate a response to return to us to tell us it’s conclusion. If it was trained properly, its returned response to us will be that it is indeed an apple. We can then visualized a “machine learning machine” to describe AI. AI models can take a lot of different forms and functions and this visual is merely a tool to understand generalities about components of AI. It is not meant to be a detailed representation of any given AI model. But we can discuss AI tools in terms of their: input what is the user of the tool providing? processing (including algorithms) – what are we going to do with that input? training data - how was the mode trained? what information was it trained on? output - what are we returning to the user of this AI tool? Each of these components can get very complicated very quickly. Although we won’t go through the details of these in this course, we will discuss practical aspects of these in terms of customization for AI needs. Large language models are one popular type of AI tool. So we can talk about the components of these models in the context of this visual. Large language models are one popular type of AI tool. So we can talk about the components of these models in the context of this visual. Tokens are units of a language (these might be words or phrases). Transformers are what organize tokens to find the meaning/context. Meanwhile to do this processing tokens are coded as Embeddings these are numerical representations of tokens. Encoders are what processes input text from a user. Meanwhile Decoders generate output text that is sent back to the user. In summary: One more important point about AI models. Their training and training data is critical. You have likely seen and heard about many biased things that large language models have said. This is because the language they were trained on – the language of human beings in our society – was also very biased. To summarize, for AI models can only be as good as their training. So garbage training data in means biased garbage as output. –> "],["determining-your-ai-needs.html", "Determining your AI needs Learning objectives: Intro Generalized Custom AI Use Cases Customized Security Customized Interface The Whole Picture Example project strategies Conclusion", " Determining your AI needs Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Establish your AI project goals Detail how these goals are not currently accomplished by a currently existing AI tool Identify what kinds of customizations your AI project requires. Evaluate the resources and staffing needs you will have for this project. Intro A project that is ill defined is doomed to fail or worse be a chronic headache and source of stress before it fails. In this chapter we will describe the questions and considerations you should contemplate while planning for an AI project. The first of such considerations is basic: What are your goals and uses cases and how are these use cases something that is not currently achievable by currently existing other products? Please take a moment now to jot down answers to the above questions for your project’s goals. Let’s return to our oversimplified machine learning machine to discuss our possible case categories. Recall that we’ve described AI tools as having the following: input what is the user of the tool providing? processing (including algorithms) – what are we going to do with that input? training data - how was the mode trained? what information was it trained on? output - what are we returning to the user of this AI tool? Please take a moment now to jot down the answers of what these items are for your impending AI tool project. Generalized Custom AI Use Cases Here we will discuss three bins of AI tool customization needs that we will discuss for the rest of the course. Keep in mind that these customization categories are for the purposes of discussion and not necessarily google-able terms. Note that these categories of AI customization needs almost never mutually exclusive. It is possible (and probable) that your project may have multiple or all of these needs. The more needs you have the more complicated your project will likely be. So carefully consider what is truly needed for your project. However, these are increasingly doable needs to address. There are a growing number of helpful communities and developers who are experts in customizing interfaces, security, parameters, algorithms, data handling and more for AI tools. Customized Knowledge Perhaps the most common AI need is customized knowledge. This means that existing AI tools are not properly trained for the use case. Perhaps the input is domain specific and the training data or training methods have not adequately prepared existing models to provide useful output. Perhaps the output of existing AI models is incorrect, not useful or even harmful. This means some better training is needed in order to meet your AI needs. Customized Security Many field have data that could benefit from AI tools but may be dealing with data that is private and needs protection. It is highly dangerous and probably illegal in many cases to share protected data with commercial AI platforms. So customized security solutions for AI tools is not an uncommon use case. This doesn’t mean protected data can’t be used with AI tools, but it does mean the AI solutions involved with projects with protected data need to be very carefully planned and constructed. And respective experts should be consulted about these solutions to make sure patients or customer’s data is being kept safe! Customized Interface Perhaps your project could benefit from the power of AI but you need to do this automatically or you need your users to access AI tools from a customized interface. This may be the most straightforward of the AI needs. An increasing number of AI tools have APIs available that can be used underneath the hood of your AI project. The upcoming chapters will discuss each of these customized AI needs and examples of existing options in more details. Generalized strategies for these needs Take a moment to categories which of these AI needs are the largest priorities for your AI project. Note that the more customized needs you have the more work you will have that will be required by your team. And if you don’t have a large amount of technical expertise on your team, this will be required if you have a lot of custom AI needs. Lastly, if none of the above describe your customized needs; you may want to consider whether you truly need a custom solution! It could be that commercially available AI platforms will fit your needs OR you may NOT need AI as a part of your project at all! Don’t let the glitter of AI commit you and your team to a project that is ill defined! Carefully consider what the project truly needs. Which of these needs are non negotiable versus “nice to have”? Note that if you are working with protected data, protecting this data is never negotiable, but other customized needs may be. Below is a very general breakdown of what types of solutions will likely be a part of your AI project based on what needs you’ve identified. For each type of need there is often a continuum of solutions that require less to more investment that we will discuss examples of in the upcoming chapters. For customized knowledge needs, you will likely be needing to train a model for your domain specific knowledge. For customized security needs, you may need to deploy an AI tool on a secure server or use some other type of security layering tool For customized interface needs, you may need to use an API of an existing AI tool or use prepackaged AI tools that you can embed in your website/app. The Whole Picture As with most management decisions, it’s never as simple as deciding what the project needs, its also necessary to evaluate what expertise, resources, and time you have available to you and your team. You need to evaluate: 1. the technical expertise you have available to you 2. Your funding situation. 3. The quickness of the deadlines to which this AI tool needs to be operational. Technical expertise needs What technical expertise you have available on your team? If you do not have the expertise needed for your strategy, will you be able to use funds to hire someone who does? Can you involve a collaborator who has a team with complementary technical expertise to what your team provides? You also need to consider possible staff turnover if you are in an academic institution or other system where this is expected. Staff turnovers will make software development projects take longer even if the knowledge transfer between staff is optimized. The more customization needs your project will need, the more you will need more technical expertise support on your team. Lone developer situations are not ideal; team work is better for development. In this table we describe what kinds of technical expertise you will likely need on your team based on what kinds of customization AI needs your project entails. Keep in mind you can likely minimize these staffing needs if you pay for products that are prepackaged. Prepackaged products (which we will discuss in future chapters) generally require less expertise but will not allow you the same freedom for more granular customization. For knowledge needs, you will likely require a team who is comfortable with data handling techniques. It will also be ideal if they have a certain knowledge of machine learning algorithms. For security needs, it’s likely you will need someone comfortable with back end development and secure computing. Depending on your strategies with this need, it would also be good if you have a front end developer’s help. For interface needs, you’ll likely need a front end developer, as well as someone who is comfortable with using APIs, which also means potentially a back end developer. Funding needs Funding needs for AI projects is not necessarily straightforward. there are a number of costs you will need to consider. Two major categories of costs include computing and staffing #### Computing costs AI projects can be costly. And this is true whether you use a “prepackaged” AI solution or build one from scratch. It is a good idea to estimate your computing costs before you begin your project. How big are the data the users would be inputting? How much would your AI tool cost per query (on average)? How many queries might a users submit? Given the answers to the above, how many users would you be able to accommodate for a given for a given day/month/year? – expect the best/worst case scenario of your tool being massively popular! Will users being paying for this service? Will the rate at which they pay cover your computing and staffing costs? Whether you build “from scratch” or borrow commercial AI tools, you will likely not be able to avoid computing costs. Keep in mind that for certain levels of usage it may not actually be more cost effective to run your own computing infrastructure. In this computing cost analysis graph from La Javaness R&D, they demonstrated how after a certain level of usage it is actually more cost effective to outsource infrastructure to ChatGPT’s API instead of building their own model and hosting it themselves. Staffing costs Custom deployments will require more technical expertise on hand as we discussed in the previous section – think salary costs. You will need to estimate whether it is more cost efficient for you to have in house developers work on this or use borrow commercial computing infrastructure. It’s not just about developers. Ideally you would also have: A user experience designer to help you make sure the AI tool you build is actually useable by human beings! A project manager that will help everyone save time and meet deadlines Administration to actually help you hire the individuals you need, negotiate data use agreements, and all the other behinds the scenes paperwork necessary to keep the ship sailing smoothly. Time needs Time is a resource. For the purposes of your AI tool project goals, you should assess how much time you have. When determining how you will meet your AI strategy needs is how quickly you need these AI needs to be met. How quickly does this need to be ready? And what is determining that deadline? Can these deadlines be pushed? How long does this AI tool need to be maintained? Note that more customized deployments will require more development time as compared to “prepackaged” AI tools. If you rush development technical debt will be incurred. Technical debt will need to be paid at some point for this project to be sustainable. Example project strategies Up until this point we’ve been discuss strategies in very vague terms. To bring this discussion to specifics, we will discuss some example AI tool project strategies you may employ based on what combination of customization AI needs you have. These example project strategies are in the order of least to most resource and time investment. In the left most column is described what kinds of customizations are able to be made given the described strategy. In the example column we have links to resources and platforms that would be a central point or product for this strategy. The technical expertise column describes vaguely how much technical expertise in house you would need to deploy the example strategy. The funding column describes approximately the funding costs that would be associated with the strategy (but not this is highly variable given specifics of a project). The time column describes how long it would take to deploy this solution. Cogniflow example Cogniflow is example of an AI tool that meets customized interface needs. It is a service that does not require code but has prepackaged AI tool solutions like chatbots and receipt digesters that can be readily deployed to a website. It is a subscription service but does not take much time to set up or maintain. This would not allow for much customization but it is a ready to go solution that would not necessitate hiring more staff. OctoML is a similar type of premade machine learning tool that is ready for usage in your own tool. But it does allow for more customization and model training than Cogniflow. They have premade training models that are appropriate for a lot of common use cases. PrivateAI PrivateAI is an example of an AI tool that meets customized security needs but not really other customizations. This services has security layers that allows you to use other commercial AI platforms with PII and PHI. It is HIPAA compliant and is a pay-by-use service. It also would not take much time to use and would not require additional time to use. Of course, due to the importance of keeping protected data protected, it should be confirmed that PrivateAI is an acceptable use based on any legal agreements. ChatGPT API ChatGPT’s API services are an example of an AI tool that meets a customized interface and knowledge needs. Using an API would allow you to use the power of chatGPT but from underneath the hood of a custom made app or website. Additionally chatGPT’s API does allow for training models which means you could make it domain specific. Using an API would require more technical expertise than the previous two example strategies, but would not require building from scratch. This strategy can be a very customizable but not entirely from the ground strategy. It does of course, involve paying chatGPT for computing costs, so that as well as the staffing needs should be considered when employing this strategy. Hugging Face Hugging Faceis a community and repository of open source AI and machine learning models of all kinds of varieties. The resources available on Hugging Face would allow you to customize an AI tool to meet all the needs you might have. The open source nature of the AI models, datasets, and examples available on Hugging Face means that this not completely from scratch either but would require more technical expertise to utilize the resources here. The tutorials and resources on hugging face would allow you to control and build a AI model that fits every need you might have. But remember computing and staffing are always costs, hence why we have not said that this is necessarily a less expensive strategy. Depending on the size and technical expertise of your team it will likely take more time than the other strategies. Conclusion In the upcoming chapters we will discuss the ins and outs of customized AI needs and propose other strategies and considerations you will need to grapple with. –> "],["customized-knowledge-for-ai.html", "Customized Knowledge for AI Learning objectives: Intro Summary of possible strategies Example strategies for Fine tuning", " Customized Knowledge for AI Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind customized knowledge needs for AI tools Discuss a variety of low to high investment strategies for meeting customized knowledge needs Define and be able to contrast the differences between prompt engineering, promoting tuning, fine tuning, and training a model from scratch. Intro Customized knowledge needs are perhaps the most common AI tool need. And intuitively many can understand this. Just as you wouldn’t ask your primary physician to fix your motorbike, neither should you depend on an AI model for something it isn’t trained for. When we are discussing customized knowledge needs, we are describing that existing AI models are not accurately addressing the needs we have. The output from the AI models is incorrect or not useful. Summary of possible strategies If the goal of customized knowledge is to get better output from an AI model, there are multiple strategies we can employ to achieve this goal. We will discuss them in order to lowest to highest investment. It’s generally best to see if the low investment strategies can meet your needs before turning to the higher investment strategies. In summary, we will cover 4 different strategies for obtaining better output from an AI model. prompt engineering is when the user asks a better question as opposed to retraining the model. prompt tuning is when we use an iterative prompt and feedback strategy to make the model work better. It is lower investment attempt than fine tuning. fine tuning is when we give additional training to the model to have it perform better. So in our opening example, fine tuning is like sending the primary care fellowship to learn an additional skill set. training from scratch is when we quite literally build a whole new model from the beginning. For most use cases this is not necessary and it is prohibitively expensive. Prompt engineering Sometimes it’s not the model who needs training, it’s the user. AI models, just like humans, are not mind readers, and just as we all learned how to google, we also need to learn how to engineer prompts. Best practices for prompt engineering according to Google Know the model’s strengths and weaknesses Be as specific as possible Utilize contextual prompts Provide AI models with examples Experiment with prompts and personas Try chain-of-thought prompting In addition to prompt engineering it’s also vital to note that different AI models are trained on different things. So if one is not giving you the output you need, try another! For LLMs, you can use https://poe.com/ or https://gpt.h2o.ai/ to test out prompts on multiple AI platforms side by side. Prompt tuning or “P-tuning” Because prompt tuning sounds a lot like prompt engineering it would be easy to think these strategies are the same, but they are not. Prompt tuning is a lower stakes type of tuning where you use your prompts to help train an LLM. It’s more efficient than fine tuning. But it may or may not address your customization needs. Basically you can think of prompt tuning like giving the LLM more context and instructions around what you are trying to receive back from it. A good analogy from this IBM article is that prompt tuning is like crossword puzzle clues for the LLM. It guides the model toward the right answer. You can test out prompt tuning without doing software engineering by trying out the gpt.h2o platform Fine Tuning First some context around fine tuning. Let’s make a hiring analog. If you needed someone to fulfill a specialized education job, you wouldn’t train a baby who has almost no knowledge as a starting point. This would be unnecessarily time costly and inefficient. Instead, you find a person who has a lot of the training you need and then fine-tune their skills. ChatGPT cost ~$100 million to create Training models from scratch requires an insane amount of data and computing costs It’s almost never where you will want to start. So instead we will use the strategy of fine tuning. We aren’t going to create a model from scratch, instead we’re going to find one that has the training that most closely overlaps with our needs but we will provide them with additional training for our specific needs. But fine tuning also might cost money, so before we jump to this strategy we need to check one more time whether we’ve surveyed the available models for their fitness of our project’s needs. Are you sure no other model works? if you’ve only tried ChatGPT go try other AI platforms. If an LLM is what you are looking for you can read our paper for a summary. Find a base model to start with For us to fine tune a model, we’ll first need to identify the base model that gets closest to what we are looking for. When looking for a base model we want to consider at least these items congruently: Which is trained on data most similar to your application? Which models have performed the best based on your prior testing? No need to unnecessarily increase our computing costs, try to find the smallest one that performs the best. Note that bigger doesn’t mean it performs better – think jack of all trades master of none. You want a model that isn’t too general. Speaking of the size of models, here is a visual demonstrating the sizes of a lot of the LLM’s in existence as of March 2023 It also might be worth considering how these models are related. Perhaps an earlier, smaller version would be easier for you to train than using the latest, biggest large language model that doesn’t contain better information for your purposes. Here’s places you can learn about AI models that are out there: This repository has a nice summary of a lot of currently available open source LLMs. Practical Guide to LLMs HuggingFace has all of the AI models - multimodal and more that we could want Lastly, you should consider how you will evaluate the AI model’s performance? Where did the existing models you tried fall short? What information do you think would help close the knowledge gap of the existing models to meet your needs? Do you have the data you might be able to fine tune a model to help it perform better? How much cleaning will this data need? Is this data unprotected and freely able to be shared or submitted to an open source repository? We’ll discuss strategies for evaluation and data privacy in the upcoming chapters. But now is a good time to keep this in mind. Example strategies for Fine tuning Just because you may have identified you require fine tuning for an AI project doesn’t necessarily mean you will need a lot of technical expertise. There are some solutions like MonsterAPI and H2o that allow for fine tuning without code. These might be good platforms to explore either as a way to meet needs or to experiment to determine a larger strategy. As described in the previous chapter, Hugging Face also has many tutorials on how to fine tune. This strategy would involve more code, frameworks, and software development. If you do decide to build using open source models from Hugging Face or elsewhere you should consider these stages for your project timelines: In this course we have already discuss defining the use case and selecting an existing model and adapting that model. But in the upcoming chapters we will discuss deployment and evaluation of models. –> "],["customized-security-for-ai.html", "Customized Security for AI Learning objectives: Intro Data security basics Secure AI solutions for protected data Data obscuring techniques Example Security Customization strategies Always double, triple, quadruple, check", " Customized Security for AI Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind customized security needs for AI tools Discuss a variety of low to high investment strategies for meeting customized security needs Define and be able to contrast the differences between secure AI services, deidentifying data, and deploying existing models on secure computing resources. Intro Customized security needs are perhaps one of the largest barriers keeping individuals in certain fields from using AI tools to their full potential. There are many legitimate reasons commercial AI tools cannot and should not be used with protected data. Commercial AI products do not have data use agreements. They do not have to tell you what they do with your data. And if you work with protected data types that generally means you can’t use them. Not all data types are safe to share for a variety of reasons. To protect patients or customers, Personal Identifiable Information (PII) Protected Health Information (PHI) cannot be used with online AI services or shared with others. Controlled Unclassified Information is another type of protected data that may be related to national security matters. But protected data projects could highly benefit from AI! AI analysis tools as helpful diagnostic aids for use with health data, imaging data, genomic data. AI chatbots as an aid for financial or health guidance for patients or customers. AI to help detect when protected data has been leaked. Protected data might be useful as training data for a problem and/or patients or customers may want to input data into a secure AI tool. Protected data needs should always be taken seriously! Before employing any AI solutions that involve protected data consult your legal experts and IRB! Data security basics Before we dive into AI related solutions, it’s a good idea that we remind ourselves of some of the best practices for data privacy Fewest individuals have access to the data as possible Least privileges as needed to complete a task. This is known as the “principles of least privilege”. Individuals who do have access should have to provide authentication to make sure only authorized individuals can see the data. Data use agreements need to be used when individuals need to be added to the authorized list Secure AI solutions for protected data Solutions exist for AI tools for protected data – some require more careful planning thought and expertise. Here we have ordered these example strategies in order of least to most investment. Whenever possible consult data security and legal experts to be sure that you are not exposing patients or customers’ data and risking their privacy or finances. Use AI services that keep data private – some AI tools have specialized tools that allow you to keep data private. Be sure to carefully read their terms of use to gain an understanding of how they keep the data secure. Consult security Obscure protected data type by hand and use AI models. In some cases this is not possible to do and still have meaningful data. And, care must be taken to make sure that data it thoroughly and properly secured. Deploy existing model on secure servers. This takes the most technical expertise to carry out. Data obscuring techniques Whether you have an AI service perform this or you do it yourself (or both) there are multiple strategies for obscuring data. It is often not a bad idea to employ multiple safety nets to keep data safe. In summary, here’s just a few of the techniques that can help make data sharing HIPAA compliant. Data Aggregation - summarize values to a higher level of grouping Data Masking - Replace data with symbols Data Anonymization - Replace data with randomized, fake data. Data Redaction - Remove the sensitive data To further understand what this looks like, here’s an example of how these techniques might look with a toy dataset In this toy example will illustrate roughly how a given technique may obscure the original data in the top row. This can give you a sense that some types of data are better for certain types of obscuring methods than others. But this also depends on what your goals for your AI project are. Keep in mind that data anonymization may be more difficult with smaller datasets because of a concept called K anonymity. This principles means that you need to make sure that k number of individuals share the same attributes so that it is nearly impossible to identify any specific subject. The strategy you choose should definitely include these two questions: What protected items are included in your data Your goals with said data with AI – what is the minimum amount of information you could include in the AI model or input in order to achieve the desired goals? Example Security Customization strategies In the table below we show three examples illustrating example strategies for using AI tools with protected data. These examples are in the order of least to most amount of technical expertise needed to implement. PrivateAI PrivateAI is a platform that allows you to use various AI models with private data. It works by detecting and redacting information that is likely protected like PII and PHI. It also has containerization options that allow you to run AI but not on their or other’s servers. It requires the least technical expertise to implement, but care must be taken to make sure that it will properly deal with your project’s particular type of protected data. deidentify deidentify is a Python package that can assist with deidentifying medical records using natural language programming. This is illustrative of one way you might attempt to deidentify data before using it with AI tools. Care must be taken to make sure that the deidentification process is thorough. You may also want to couple this with other tools that can detect PII or PHI data before you submit to an AI tool. As always you will want to make sure that it properly handles your project’s data and that before you submit the data you’ve deidentified you have other reputable sources double or triple check that no protected information is being leaked. AWS servers + HuggingFace Amazon web services (and its competitors) generally offer HIPAA compliant computing solutions. Whether you use this service, an institutional cluster, or some other server is a decision you will have to make on a case by case basis. But regardless, this is the most technically involved solution. This is most likely the strategy you’d need to employ if security is not your only customization you need. In this instance you would borrow a model and set up from HuggingFace and build your AI tool more or less from scratch (but don’t build the model from scratch) Always double, triple, quadruple, check As opposed to other types of AI customizations, the strategies we’ve discussed in this chapter are the most imperative that you get cleared through the proper channels before deploying (if you do this incorrectly it may be illegal). It is a matter of privacy and safety for patients/customers that you get this right. So it makes sent to check with your in-house experts like institutional review boards and data security experts! –> "],["customized-interfaces-for-ai.html", "Customized Interfaces for AI Learning objectives: Intro General strategies for custom interfaces Examples of AI customized interface strategies Premade AI tools AI tool APIs Custom builds", " Customized Interfaces for AI Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind customized interfaces needs for AI tools Discuss a variety of low to high investment strategies for meeting customized interface needs Define and be able to contrast the differences graphics user interfaces and command line interfaces. Intro Sometimes you need the power of AI underneath the hood of your own website/app. There are multiple strategies you can employ to achieve this. In this scenario, you need to have your website or app that has its own interface. Interfaces are how people will use your app. There’s two main interface types that are most common: GUIs and Command line - GUI - stands for “Graphics user interface” – it’s the most widely used. Here users point and click buttons to tell a computer and its software what they’d like to do. - Command line - generally for software that is for individuals with more technical expertise and comfort with programming. In this interface, users type commands in order to perform tasks In this scenario we could maintain all of the same machinery but merely have a different platform where our users arrive. General strategies for custom interfaces There are multiple strategies for empowering your website or app with an AI model or tool. These are arranged in order from lowest to highest investment. Embed premade AI tools in your app - There are prepackaged solutions for standard AI tools that you might want for your website. These are subscription services but require minimal expertise to employ. Use an API (Application programming interface) underneath the hood of your app – an increasing number of LLMs and other AI tools have APIs available. APIs allow one to access a website or tool programmatically. This means that you can build your tool in such a way that underneath the hood it is powered by an AI tool. Deploy existing model in your own app - this is the most technically intensive solution but would be necessary if you require other types of customizations for your AI tool. Usability experts are going to be really helpful for carrying out this kind of need. Interface designs can make or break a tool’s usability and hence popularity! Examples of AI customized interface strategies Premade AI tools Some services like Cogniflow and OctoML offer prepackaged AI services like chatbots that you can embed in your website or tool. This has the advantage of being minimal maintenance or software development knowledge needed. You generally don’t always have the ability to highly customize these options. OctoML also allows for customization and fine tuning of models should you need customized knowledge as well. OctoML is a pay for what you use service whereas Cogniflow is a subscription. AI tool APIs Pre-package tools may only have certain options… In contrast, APIs can be very powerful and allow you to incorporate all the power of an AI tool into your website/app. They also free you team from having to do as much back end development. Although not all AI tools have API access, an increasing number of them are developing this as an option. Currently ChatGPT’s API is the most well developed for LLM (it appears at the time of writing this) but of course it requires a higher cost subscription plan. Bard may have a beta version of their API being further developed and released. Other types of AI models (not LLMs) often have API access as well like Google Cloud’s speech to text API. Custom builds If you need more than a custom interface but also custom knowledge, security you, or handling you will likely need to build custom AI solutions – again this requires more staff expertise In the next chapter we will discuss custom AI builds. –> "],["evaluating-your-customized-ai-tool.html", "Evaluating your customized AI tool Learning objectives: Intro Evaluating Accuracy of an AI model Evaluating Computational Efficiency of an AI model Evaluating Usability of an AI model", " Evaluating your customized AI tool Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Learning objectives: Understand the motivation behind evaluating your customized AI tool Define your own goals for evaluating the accuracy, computational efficiency, and usability of your AI tool Recognize metrics that could be used for evaluation of accuracy, computational efficiency, and usability. Intro Evaluating a software tool is critical. This is for multiple reasons that feed into each other. Evaluating your AI tool will help identify areas for improvement Evaluating your AI tool will demonstrate value to funders so you can actually make those improvements It’s important to keep the pulse on your project as it is developing. Ideally, you should be monitoring your eh I’s performance when it comes to bias ad performance throughout the project. But once you have a stable ah I tool, it is an especially good time to gather more evaluations. As a reminder, generally a good AI tool is accurate in that it gives output that is useful. It is also computationally efficient in that we won’t be able to actually deploy the tool if it is computationally costly or takes too long to run a query. But for the purposes of evaluation, we’re going to add one more point of evaluation which is a good AI tool is usable. Even if you do not have “users” in the traditional sense; you are designing your tool only for within your team or organization, you will still need it to be functionally usable by the individuals you have intended to use it. Otherwise the fact that it is accurate and computationally efficient will be irrelevant if no one can experience that accuracy and efficiency. Evaluating Accuracy of an AI model How you evaluate the accuracy of your AI model will be highly dependent on what kind of AI model you have – text to speech, text to image, large language model chatbot, a classifier, etc. This will determine what kinds of “ground truth” you have available. For a speech to text model for example, what was the speaker actually saying? What percentage of the words did the AI tool translate correctly to text? Secondly your evaluation strategies will dependent on what your goals are – how do you define success for your AI tool? What was your original goals for this AI tool? Are they meeting those goals? LLM chatbots can be a bit tricky to evaluate accuracy – how do you know if the response it gave a user was what the user was looking for? But there’s a number of options and groups who are working on establishing methods and standards for LLM evaluation. Some examples at this time: MOdel kNowledge relIabiliTy scORe (MONITOR) google/BIG-bench GLUE Benchmark Measuring Massive Multitask Language Understanding Evaluating Computational Efficiency of an AI model Evaluating computational efficiency is important not only for the amount of time it takes to get useful output from your AI tool, but also will influence your computing bills each month. As mentioned previously, you’ll want to strike a balance between having an efficient but also accurate AI tool. Besides being shocked by your computing bill each month, there’s more fore thinking ways you can keep tabs on your computational efficiency. Examples of metrics you may consider collecting: Average time per job - How much time Capacity - Total jobs that can be run at once FLOPs (Floating Point Operations) - measure the computational cost or complexity of a model or calculation More about FLOPs. Evaluating Usability of an AI model Usability and user experience (UX) experts are highly valuable to have on staff. But whether or not you have the funds for an expert is UX to be on staff, more informal user testing is more helpful than no testing at all! Here’s a very quick overview of what a usability testing workflow might look like: Decide what features of your AI tool you’d like to get feedback on Recruit and compensate participants Write a script for usability testing - always need to emphasize that if they the participant doesn’t know how to do something it is not their fault, its something that needs to be fixed with the tool! Watch 3 - 5 people try to do the task – often 3 is enough to illuminate a lot of problems to be fixed! Observe and take notes on what was tricky Ask participants questions! We recommend reading this great article about user testing or reading more from this Documentation and Usability course. There’s many ways to obtain user feedback, and surveys, and interface analytics. Some examples of metrics you may want to collect: Success rate - how many users were able to successfully complete the task? Task time - how long does it take them to do Net Promoter Score (NPS) - scale of 0 - 10 summarized stat to understand what percentage of users would actively recommend your tool to others. Qualitative data and surveys - don’t underestimate the power of asking people their thoughts! "],["introduction-to-developing-ai-policy.html", "Introduction to Developing AI Policy Motivation Target Audience Curriculum Learning Objectives", " Introduction to Developing AI Policy This course is intended to equip you with the knowledge you need to develop an effective AI policy for your organization. Motivation AI tools are already changing how we work, and they will continue to do so for years. Over the next few years, we’re likely going to see AI used in ways we’ve never imagined and are not anticipating. This course empowers you to make informed decisions and confidently create an AI policy that matches your organizational goals. Target Audience This course is targeted toward industry and non-profit leaders and decision makers. Curriculum In this course, you’ll learn why you need an AI policy, what an AI policy might include, who can help you create and develop a policy, the state of existing AI laws, other laws and regulations that can apply to AI systems and products, and considerations for creating a strong AI policy for your organization. Learning Objectives During this course, learners will: Understand the reasons why organizations need an AI policy. Identify the key elements of a good AI policy. Describe the roles and responsibilities of different team members involved in guiding AI use. Identify key regulations outlined in the EU AI Act, including risk classifications, transparency requirements, and prohibited applications. Understand how existing industry-specific AI policies can inform your organization’s policy. Identify key legal categories relevant to AI use, including intellectual property, data privacy, and liability. Understand the limitations of relying solely on an AI policy without supporting infrastructure and training. Recognize the importance of involving diverse stakeholders from various departments in AI policy creation. Identify strategies for building flexible and adaptable AI policies, such as living documents and separate best practices guidelines. Appreciate the role of effective training in promoting policy compliance and ethical AI use. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. "],["why-do-i-need-an-ai-policy.html", "Why do I need an AI policy?", " Why do I need an AI policy? Big technological shifts always trigger a period of explosive growth where the technology and what’s possible changes incredibly quickly. We’re in that stage right now with AI systems. Everyone is curious, scared, and interested in AI. Chat GPT accumulated 100 million users in 2 months, which is faster than many other major apps. The future workforce is already regularly using AI and bans will not be practical nor effective. Over 40% of university students use ChatGPT for coursework. Thirty-nine percent of prospective students say they wouldn’t consider going to a college that banned Chat GPT and other LLMs. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Each month regularly brings new opportunities and surprises, many of which we can’t anticipate and require organizations to adapt quickly. Everyone is using AI, but no one really knows how to use it properly. The rapid changes are making new capabilities feasible while also bringing to light new and unique concerns. However, adopting this new technology at the right time and in a way that minimizes mistakes and bad outcomes can make great things happen for your organization. It’s a bit like catching the tail of a rocket ship just being launched, but catching it in a way that doesn’t burn you to a crisp. New technology adoption is scary, and while caution is advisable, all-out bans are not practical. Thirty or so years ago, we had a similar technological shift with the advent of the Internet. At the time, using the Internet for common, everyday tasks was a big deal, and there was fear about how it would change how we work. Now, we have accepted the Internet as a way of life and it’s a normal experience to look things up on Google or shop on the internet. In 30 years, AI systems will be the same. Employees will use AI and this will make them more effective. Thoughtful AI policies can balance your employees’ use with safety and security measures. "],["elements-of-an-ai-policy.html", "Elements of an AI policy", " Elements of an AI policy A good AI policy should be a living document that evolves as your company adapts to AI use. As AI tools advance, so should the policy surrounding them. It should provide clear guidance and frameworks for developing, deploying, and using AI systems in a responsible and ethical manner. Having a policy in place that is well communicated can provide an extra level of security for your organization and employees. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. When writing an AI policy, you might consider whether asynchronous collaboration versus synchronous collaboration is right for you. With an asynchronous approach, people write their individual sections of a document by a deadline, after which the full policy is polished and edited. With a synchronous approach, an organization might convene a set of meetings with experts over a length or time to work on the document together. There are benefits and drawbacks to both approaches, and you will know which best fits your organization’s needs. In general, a policy might have sections devoted to the following topics: Purpose and Scope. In this section, you might define what your organization’s goals and plans for AI use, as well as what types of AI systems the policy will cover. This section might also contain definitions of specific terms, like what your organization considers AI or generative AI. A purpose and scope section can ensure everyone is aligned and avoid ambiguity. Values and Principles. This section states how your organization’s core values and principles will guide your use and development of AI tools. Some possible principles might be fairness, transparency, accountability, safety, or privacy. Governance and Oversight. You may want to establish a clear governance strategy for overseeing AI initiatives. This includes the roles of those involved in decision-making, as well as their responsibilities. Data Management and Privacy. This section outlines data governance practices that ensure data quality, security, and responsible use in AI systems. You should make sure your guidelines are compliant with relevant data privacy regulations like GDPR, CCPA, and other industry-specific regulations. Fairness and Non-discrimination. In this section, you can lay out how you might monitor and audit AI systems for possible bias. This section can also include guidelines for developing or deploying AI in ways to avoid perpetuating or exacerbating bias or discrimination based on protected characteristics. Risk Management, Safety, and Oversight. A section like this might lay out robust testing procedures to monitor, identify, and mitigate potential risks associated with AI systems, including security vulnerabilities, safety hazards, and unintended consequences. It can also identify ways to ensure oversight and accountability for AI systems, ensuring humans remain ultimately responsible for AI-driven decisions. Education and Training. This section describes how your organization will provide training and education programs on AI systems on responsible AI development, deployment, and use. You can also detail how these training modules will be created and what topics are necessary for different groups of employees. Feedback and Review. In this section, you can establish a mechanism for regularly reviewing and updating the AI policy as technology and best practices evolve. You may also want to implement procedures for employees to give feedback about AI issues or concerns within your organization. "],["building-an-ai-advisory-team.html", "Building an AI advisory team", " Building an AI advisory team AI Policy is a teamwork endeavor. Experts from many different fields need to come together to bring the latest updates. As someone in charge of making sure your organization is using AI wisely and properly, staying up-to-date on the laws, regulations, and computational resources is vital. It’s also something that is really difficult to do alone. Building a team of individuals that can help you confidently navigate the evolving landscape should be one of your top priorities as a leader. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. There are multiple possible roles that you could fill, depending on your organization’s AI needs and uses. Having representation from technical, policy and social science backgrounds helps ensure a multidisciplinary, holistic approach to building and overseeing responsible AI. This list is a starting point for you when deciding what sort of roles you need for your own team and is not written in any particular order. This is not an exhaustive list of all possible experts that you can gather. You should consult with your legal council, board members, and other oversight staff in order to properly address your own specialized needs. Legal counsel that understands AI and the nuances of the rapidly changing laws can advise on regulations relevant to your organization. Policy and governance analysts can research and draft internal policies on transparency, auditability, harm mitigation, and appropriate AI uses. They can also advise and assist with compliance. Data protection officers who can aid with implementing privacy-by-design principles and handling personally identifiable information legally and securely are especially important for organizations that deal with personal data. Ethicists are experts who can provide guidance on ethical issues and review systems for potential biases, risks, and policy compliance. Trainers and educators can create and run programs aimed at keeping all employees aware of responsibilities in developing and using AI respectfully and in compliance with AI policies and regulations. Oversight committee members are experts who review research studies (both before a study begins and while it is ongoing). Their job is to make sure researchers are protecting the welfare, rights, and privacy of research subjects. Oversight committees like institutional review boards are especially important for organizations involved in any human research that uses AI. Technical experts understand how to design, build, and deploy AI models. They can also offer advice on the algorithms, data, and computational infrastructure your organization might need. They might be AI or machine learning experts, data scientists, DevOps engineers, cloud architects, or systems engineers. Information security architects are vital for identifying and mitigating the security risks associated with AI systems. They can provide advice on data privacy measures, security weak points, and incident response plans. The specific roles and their required skillsets will vary depending on the size, industry, and AI maturity of the company. Having a balanced team with both technical and strategic expertise is key to successfully implementing AI policies in your organization. Remember, effective communication and collaboration between these roles is crucial for a successful AI implementation. "],["considerations-for-creating-an-ai-policy.html", "Considerations for creating an AI Policy An AI policy alone is not enough Get lots of voices weighing in from the beginning Consider how to keep your guidance agile Make it easy for people to follow your policy through effective training", " Considerations for creating an AI Policy How you create an AI policy, as well as what you cover in it, is going to be highly dependent on what your organization’s needs are. Unfortunately, there is no “one size fits all” approach to AI policies, governance, and training. However, we can offer some considerations based on our experiences creating and implementing policies. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. An AI policy alone is not enough It is probably not enough to build only an AI policy. Building an AI support system that makes it possible for the people in your organization to adopt AI in safe and ethical ways is also important. An AI policy support system might include a governance plan describing how new AI use cases will be reviewed and guidance for implementation provided, a clear understanding of the necessary components of your infrastructure to plan to use AI tools on, and training materials that include specific best practices for different types of AI tools and use cases. Thinking about your AI policy as just the beginning, not the entire thing, can be a way to protect your employees, your organization, and the people you serve. Get lots of voices weighing in from the beginning AI systems are being integrated into every aspect of the work environment. Sometimes these tools are very obvious such as when an AI tool is applied to your data to do predictive work. Other times they are less obvious such as when they are integrated into common desktop software as co-pilots or “auto-fill”. You likely need a lot of different people with different perspectives to weigh in to get even close to what you want in terms of a comprehensive AI policy. Limiting policy and governance plan creation to just the Chief Data Officer’s office or the IT department or the legal department might make things faster, but the trade-off is that you are likely only covering a fraction of what you need. At minimum, most organizations probably need representatives from legal, compliance and governance, IT , offices of diversity, equity, inclusion, ethical review, and training. Creating a meaningful policy and getting the necessary supports put in place is easier when you have people with varied and broad expertise creating the policy from the beginning. Consider how to keep your guidance agile The speed at which AI technology is changing is fast enough that creating useful guidelines around its use is difficult. An AI policy requires you to get a diverse set of opinions together and make it cohesive and coherent, and that takes time. The last thing you want to do is create a policy that no longer applies in 3 months when AI systems have changed again. One possible approach is to think of your AI Policy as an ongoing living document as opposed to a one time effort. Another way includes creating both an AI Policy and an AI Best Practices document, where the policy changes infrequently while the best practices evolves more quickly. For example, the policy document might say something like “you should use infrastructure that matches current best practices.” This allows you to create a policy that is still useful over time as your organization learns what AI practices and infrastructure is best for it. This still requires you to communicate frequently with your employees on the state of the best practices for AI use. However, the best practices can be tailored to fit specific departments and change as those departments need it to do so. This also allows an organization to communicate to specific departments and employees who might be affected by an update to their best practices guidelines. However you choose to do it, systematize the process of creating your policy so that you can easily update it when necessary Make it easy for people to follow your policy through effective training Good AI policies are most effective when they are easy for people to follow. This can be particularly challenging in periods of explosive technological growth like we’re experiencing now with AI. What is possible with AI, and how to safely and ethically use AI, is changing quickly, making it a challenge for people to always know how to comply with an AI policy. This is an opportunity to make your AI governance plan include specific points about communication, training and guidance so users have regular updates from the governance group that enable them to use AI tools ethically and securely. In situations like these, one way to approach training is to focus on major points people should consider, clearly outline the steps people can take to do the right thing, and identify who people can approach when they have questions. Many people may not solidly know the answers to all questions, but the right people can help you find the answer. Training people how to loop in the proper people, and to ask for help from the very beginning, might save them stress later. "],["ai-acts-orders-and-regulations.html", "AI acts, orders, and regulations The EU AI Act Industry-specific policies", " AI acts, orders, and regulations A good AI policy should guide an organization on AI uses that adhere to the necessary laws and regulations. With generative AI still new in many fields, from medicine to law, regulations are rapidly evolving. A landmark provisional deal on AI regulation was reached by the European Parliament and Council on December 8, 2023, with the EU AI Act). These guidelines laid out in this document apply to AI regulation and use within the 27-member EU bloc, as well as to foreign companies that operate within the EU. It is likely the EU AI Act will serve as a model for AI laws around the globe, for both individual countries and industries. Countries outside of the EU are drafting their own laws, orders, and standards surrounding AI use, so you and your employees will need to do some research on what it and is not allowed in your local area. Always consult your legal council about the AI regulations that apply to you. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. The EU AI Act According to EU policymakers involved in writing the AI Act, the goal of the Act is to regulate AI in order to limit its capacity to cause harm. The political agreement covers the use of AI in biometric surveillance (such as facial recognition), as well as guidance on regulations for LLMs. The EU AI Act divides AI-based products into levels based on how much risk each product might pose to things like data privacy and protection. Higher-risk products with a greater capacity to cause harm face more stringent rules and regulations. Most current AI uses (like systems that make recommendations) will not fall into this higher-risk category. Final details are still being worked out, but we do know several important aspects of this Act. All content generated by AI must be clearly identified. Companies must also make it clear when customers are interacting with chatbots, emotion recognition systems, and models using biometric categorization. Developers of foundational models like GPT as well as general purpose AI systems (GPAI) must create technical documentation and detailed summaries about the training data before they can be released on the market. High-risk AI systems must undergo mandatory rights impact and mitigation assessments. Developers will also have to conduct model evaluations, assess and track possible cybersecurity risks, and report serious incidents and breaches to the European Commission. They will also have to use high-quality datasets and have better accompanying documentation. Open-source software is excluded from regulations, with some exceptions for software that is considered a high-risk system or is a prohibited application. AI software for manipulative strategies like deepfakes and automated disinformation campaigns, systems exploiting vulnerabilities, and indiscriminate scraping of facial images from the internet or security footage to create facial recognition databases are banned. Additional prohibited applications may be added later. There are exceptions to the facial scraping ban that allow law enforcement and intelligence agencies to use AI applications for facial recognition purposes. The AI Act also lays out financial penalties for companies that violate these regulations, which can be between 1.5% and 7% of a company’s global revenue. More severe violations result in greater financial penalties. This is still a provisional agreement. It must be approved by both the European Parliament and European countries before becoming law. After approval, tech companies will have two years to implement the rules, though bans on AI uses start six months after the EU AI Act is ratified. More information about the EU’s AI Act can be found in these sources. MIT Technology Review Reuters EURACTIV.com CIO Dive Industry-specific policies Some individual industries have already begun adopting policies about generative AI. They may also have long-standing policies in place about the use of other forms of AI, like machine learning. Some countries have also begun creating policies for specific industries and fields. When in doubt, always check with the experts within your organization about what AI policies exist for your industry. We’ll discuss some specific examples of how different industries are approaching AI regulation in the next section. "],["case-studies.html", "Case Studies Education Healthcare", " Case Studies AI regulations and policies are continuing to evolve as people adapt to the use of AI. Let’s look at some real-life examples. Education For students and educators, generative AI’s capacity in writing, problem solving, and conducting research has upended the goals and evaluations of our education system. For instance, ChatGPT 4 has been able to generate college-level essays to earn passing grades at Harvard with minimal prompting for various subjects (Yglesias (2023)). Many educational institutions reacted with various policies and adaptations; first to protect the current educational environment, then to consider adapting to generative AI’s capacity. In the first few months after ChatGPT was released, many schools and universities restricted the use of AI in education. The two largest public school systems in the United States, New York City Public Schools and Los Angeles Public Schools, banned the use of ChatGPT in any school work, declaring that any use of ChatGPT counted as plagiarism Singer (2023b). Many universities also followed with similar policies. However, educators soon realized that most students embraced generative AI despite the ban for most assignments (Terry (2023), Roberts (2023)). Furthermore, enforcement to bar AI from students, such as using AI detection software or banning AI from school networks, created disparities in students. Teachers noticed that AI detection software biased against the writings of non-native English learners (Roberts (2023)). Children from wealthy families could also access AI through personal smartphones or computers (Singer (2023b)). With these lessons, some educational systems started to embrace the role of AI in students’ lives and are developing less-restrictive various policies. New York City Public School and Los Angeles Public Schools quietly rolled back their ban, as did many universities (Singer (2023b)). Groups of educators have come together to give guidelines and resources on how to teach with the use of AI, such as the Mississippi AI Institute, MIT’s Daily-AI curriculum, and Gettysburg College’s Center for Creative Teaching and Learning. Each educational institution and classroom is adapting to AI differently. The Mississippi AI Institute suggested that there are some common questions to consider (Donahue (2023)): How are we inviting students to demonstrate their knowledge, and is writing the only (or the best) way to do that? For instance, some universities have encouraged the use of in-class assignments, handwritten papers, group work and oral exams (K. Huang (2023)). What are our (new) assignment goals? And (how) might generative AI help or hinder students in reaching those goals? Some educators want to use AI to help students get over early brainstorming hurdles, and want students to focus on deeper critical thinking problems (Roberts (2023)). Many educators have started to develop AI literacy and “critical computing” curricula to teach students how to use AI effectively and critically (Singer (2023a)). If we’re asking students to do something that AI can do with equal facility, is it still worth asking students to do? And if so, why? Educators will need to think about what aspects of their lesson goals will be automated in the future, and what are critical and creative skills that students need to hone in on. If we think students will use AI to circumvent learning, why would they want to do that? How can we create conditions that motivate students to learn for themselves? Educators have started to teach young students the limits of AI creativity and what kind of bias is embedded in AI models, which has led students to think more critically about use of AI (Singer (2023a)). What structural conditions would need to change in order for AI to empower, rather than threaten, teachers and learners? How can we create those conditions? Some teachers have started to actively learn how their students use AI, and are using AI to assist with writing their teaching curriculum (Singer (2023b)). Healthcare The health care industry is an example of an industry where the speed of technology development has led to gaps in regulation, and the US recently released an Executive Order about creating healthcare-specific AI policies. The U.S. Food and Drug Administration (FDA) regulates AI-enabled medical devices and software used in disease prevention, diagnosis, and treatment. However, there are serious concerns about the adequacy of current regulation, and many other AI-enabled technologies that may have clinical applications fall out of the scope of FDA regulation (Habib and Gross (2023); Association (2023)). Other federal agencies, such as the Health and Human Services Office of Civil Rights, have important roles in the oversight of some aspects of AI use in health care, but their authority is limited. Additionally, there are existing federal and state laws and regulations, such as the Health Insurance Portability and Accountability Act (HIPAA), that impact the use and development of AI. This patchwork landscape of federal and state authority and existing laws has led the American Medical Association (AMA) to advocate for a “whole government” approach to implement a comprehensive set of policies to ensure that “the benefits of AI in health care are maximized while potential harms are minimized” (News (2023)). The AMA and health care leaders have highlighted the importance of specialized expertise in the oversight and adoption of AI products in health care delivery and operations. For example, Dr. Nigam Shah and colleagues call for the medical community to take the lead in defining how LLMs are trained and developed: By not asking how the intended medical use can shape the training of LLMs and the chatbots or other applications they power, technology companies are deciding what is right for medicine (Shah, Entwistle, and Pfeffer (2023)). The medical community should actively shape the development of AI-enabled technologies by advocating for clinically-informed standards for the training of AI, and for the evaluation of the value of AI in real-world health care settings. At an institutional level, specialized clinical expertise is required to create policies that align AI adoption with standards for health care delivery. And in-depth knowledge of U.S. health insurance system is required to understand how complexity and lack of standardization in this landscape may impact AI adoption in clinical operations (schulman2023). In summary, health care leaders and the medical community need to play an active role in the development of new AI regulations and policy. References "],["other-laws-to-consider.html", "Other laws to consider Intellectual Property Data Privacy and Information Security Liability Who can tell you about your particular legal concerns", " Other laws to consider While countries and jurisdictions are developing ans passing laws that specifically deal with AI, there are also existing laws around data that should be considered when creating an AI policy. Which ones you should consider will vary based on your organization and sector, but broadly include regulations about intellectual property, data privacy and protection, and liability. This is not an exhaustive list! This can give you a starting point of what sorts of laws and regulations you might need to consider, but you’ll have to apply your own domain knowledge to determine the specifics for your organization. Always confirm with your legal counsel whether a particular law or regulation applies to you. Disclaimer: The thoughts and ideas presented in this course are not to be substituted for legal or ethical advice and are only meant to give you a starting point for gathering information about AI policy and regulations to consider. Intellectual Property There are multiple concerns around generative AI and intellectual property rights, especially with regards to copyright and fair use or fair dealing laws. Copyright is “the exclusive legal right, given to an originator or an assignee to print, publish, perform, film, or record literary, artistic, or musical material, and to authorize others to do the same” (Oxford Languages). Fair use and fair dealing are legal doctrines that allows for limited use of copyrighted material without permission under certain circumstances. While fair use and fair dealing exceptions vary from country to country, they broadly allow for nonprofit, educational, commentary or criticism, satire, and highly creative works to sample copyrighted material. There are multiple concerns around generative AI and intellectual property rights, especially with regards to copyright and fair use or fair dealing laws. In order for generative AI models to work, they must be trained on vast amounts of data. This might include images, in the case of image generators like DALL-E, Stable Diffusion, and Midjourney. It might also include human writing and speech, in the case of LLMs like ChatGPT and Bard. Information about the training data sets for these tools is limited, but they likely include text and images scraped from the internet. There is concern that the text and images gathered for training data included copyrighted and trademarked books, articles, photographs, and artwork. In fact, the CEO of Midjourney has previously confirmed that copyrighted images were included in the Midjourney training data without the consent of the artists. Artists and authors have brought legal action against several AI companies, claiming their rights have been violated by the inclusion of their works in the training data. Some AI companies have argued this use case is covered by fair use agreements. As of November 30, the legal situation is still being decided There is also ongoing debate as to whether AI-generated images and text can be copyrighted. While many current copyright laws do not protect works created by machines, how these laws might apply to work that is a collaboration between humans and machine (such as art that includes some AI-generated content) is an area of active discussion. Data Privacy and Information Security An estimated 4.2 billion individuals share some form of data about themselves online. This might be information like what they’re interested in or information that can be used for identification, like their birth date or where they live, or even financial information. With vast amounts of data about us out there, privacy laws protecting the digital information of internet users are becoming increasingly common. More than 100 countries have some sort of privacy laws in place. Initial concerns around AI and information security focused on bad actors using LLMs to generate malicious code that could be used for cyberattacks. While commercially available chatbots have guardrails in place that are meant to prevent them from being used to create such code, users were able to come up with workarounds to bypass these safety checks. More recently people have begun to worry about privacy concerns related to the AI systems themselves. AI systems are trained on vast amounts of data, including data that is covered by existing privacy laws, and many systems also collect and store data from their users, potentially for use as additional training data. Data privacy is especially important to consider when working in fields like healthcare, biomedical research, and education, where personally identifiable data and personal health information is afforded special protections. Special consideration should also be taken when dealing with biometric data, or data involving human characteristics gathered from physical or behavioral traits that can be used to identify a single person. This might include things like fingerprints, palm prints, iris scans, facial scans, and voice recognition. DNA can also be considered biometric data when used for forensics. Liability As AI systems become more and more common in everyday life, it is inevitable that some of these systems will fail at some point. Who is liable when AI fails, especially when it fails in a catastrophic manner? The issue of whose fault it is when an AI system fails (and thus who is responsible for the damage) depends greatly on how and why it failed. Blame might lie with the user (if the AI was not being used according to instructions, or if limitations were known but ignored), the software developer (if the AI product was distributed before being tested thoroughly or before the algorithm was properly tuned), or the designer or manufacturer (if the AI design or production was inherently flawed). Who can tell you about your particular legal concerns As a general rule of thumb: when in doubt, talk to your legal counsel! They can offer you the best advice for your organization and your situation. The information in this course is ONLY meant as starting point for you as you create AI guidelines for your organization. You can also seek guidance from your governance and compliance experts. "],["about-the-authors.html", "About the Authors", " About the Authors These credits are based on our course contributors table guidelines.     Credits Names Pedagogy Lead Content Instructor(s) Ava Hoffman - Course 1: Exploring AI Possibilities Carrie Wright - Course 2: Avoiding AI Harm Candace Savonen - Course 3: Determining AI Needs Elizabeth Humphries - Course 4: Developing AI Policy Project Management Elizabeth Humphries, Shasta Nicholson Content Author Christopher Lo - Avoiding AI Harm - Effective Use of Training and Testing Data, Developing AI Policy - Education case study Monica Gerber - Developing AI Policy - Healthcare case study Content Editor(s)/Reviewer(s) Sitapriya Moorthi, Jeffrey Leek, Amy Paguirigan, Jennifer Weddle, Christopher Lo Content Director(s) Jeffrey Leek , Elizabeth Humphries Content Consultants Robert McDermott, Jennifer Weddle, Adina Mueller Production Content Publisher(s) Shasta Nicholson Content Publishing Reviewer(s) Ava Hoffman, Carrie Wright, Candace Savonen,Elizabeth Humphries Technical Template Publishing Engineers Candace Savonen, Carrie Wright, Ava Hoffman Publishing Maintenance Engineer Candace Savonen Technical Publishing Stylists Carrie Wright, Ava Hoffman, Candace Savonen Package Developers (ottrpal) Candace Savonen, John Muschelli, Carrie Wright Art and Design Illustrator(s) Ava Hoffman, Candace Savonen, Carrie Wright, Elizabeth Humphries, Sitapriya Moorthi Figure Artist(s) Ava Hoffman Candace Savonen, Carrie Wright, Elizabeth Humphries, Sitapriya Moorthi Funding Funder(s) The development of this course was supported by the National Cancer Institute (NCI) under Grant UE5CA254170. Funding Staff Shasta Nicholson, Maleah O’Conner, Sandy Ombrek   Tools used to create this course: ## ─ Session info ─────────────────────────────────────────────────────────────── ## setting value ## version R version 4.0.2 (2020-06-22) ## os Ubuntu 20.04.5 LTS ## system x86_64, linux-gnu ## ui X11 ## language (EN) ## collate en_US.UTF-8 ## ctype en_US.UTF-8 ## tz Etc/UTC ## date 2024-03-04 ## ## ─ Packages ─────────────────────────────────────────────────────────────────── ## package * version date lib source ## assertthat 0.2.1 2019-03-21 [1] RSPM (R 4.0.5) ## bookdown 0.24 2023-03-28 [1] Github (rstudio/bookdown@88bc4ea) ## bslib 0.4.2 2022-12-16 [1] CRAN (R 4.0.2) ## cachem 1.0.7 2023-02-24 [1] CRAN (R 4.0.2) ## callr 3.5.0 2020-10-08 [1] RSPM (R 4.0.2) ## cli 3.6.1 2023-03-23 [1] CRAN (R 4.0.2) ## crayon 1.3.4 2017-09-16 [1] RSPM (R 4.0.0) ## desc 1.2.0 2018-05-01 [1] RSPM (R 4.0.3) ## devtools 2.3.2 2020-09-18 [1] RSPM (R 4.0.3) ## digest 0.6.25 2020-02-23 [1] RSPM (R 4.0.0) ## ellipsis 0.3.1 2020-05-15 [1] RSPM (R 4.0.3) ## evaluate 0.20 2023-01-17 [1] CRAN (R 4.0.2) ## fansi 0.4.1 2020-01-08 [1] RSPM (R 4.0.0) ## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.0.2) ## fs 1.5.0 2020-07-31 [1] RSPM (R 4.0.3) ## glue 1.4.2 2020-08-27 [1] RSPM (R 4.0.5) ## hms 0.5.3 2020-01-08 [1] RSPM (R 4.0.0) ## htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.0.2) ## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.2) ## jsonlite 1.7.1 2020-09-07 [1] RSPM (R 4.0.2) ## knitr 1.33 2023-03-28 [1] Github (yihui/knitr@a1052d1) ## lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.0.2) ## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.0.2) ## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.0.2) ## ottrpal 1.0.1 2023-03-28 [1] Github (jhudsl/ottrpal@151e412) ## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.0.2) ## pkgbuild 1.1.0 2020-07-13 [1] RSPM (R 4.0.2) ## pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.0.3) ## pkgload 1.1.0 2020-05-29 [1] RSPM (R 4.0.3) ## prettyunits 1.1.1 2020-01-24 [1] RSPM (R 4.0.3) ## processx 3.4.4 2020-09-03 [1] RSPM (R 4.0.2) ## ps 1.4.0 2020-10-07 [1] RSPM (R 4.0.2) ## R6 2.4.1 2019-11-12 [1] RSPM (R 4.0.0) ## readr 1.4.0 2020-10-05 [1] RSPM (R 4.0.2) ## remotes 2.2.0 2020-07-21 [1] RSPM (R 4.0.3) ## rlang 1.1.0 2023-03-14 [1] CRAN (R 4.0.2) ## rmarkdown 2.10 2023-03-28 [1] Github (rstudio/rmarkdown@02d3c25) ## rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.0.2) ## sass 0.4.5 2023-01-24 [1] CRAN (R 4.0.2) ## sessioninfo 1.1.1 2018-11-05 [1] RSPM (R 4.0.3) ## stringi 1.5.3 2020-09-09 [1] RSPM (R 4.0.3) ## stringr 1.4.0 2019-02-10 [1] RSPM (R 4.0.3) ## testthat 3.0.1 2023-03-28 [1] Github (R-lib/testthat@e99155a) ## tibble 3.2.1 2023-03-20 [1] CRAN (R 4.0.2) ## usethis 1.6.3 2020-09-17 [1] RSPM (R 4.0.2) ## utf8 1.1.4 2018-05-24 [1] RSPM (R 4.0.3) ## vctrs 0.6.1 2023-03-22 [1] CRAN (R 4.0.2) ## withr 2.3.0 2020-09-22 [1] RSPM (R 4.0.2) ## xfun 0.26 2023-03-28 [1] Github (yihui/xfun@74c2a66) ## yaml 2.2.1 2020-02-01 [1] RSPM (R 4.0.3) ## ## [1] /usr/local/lib/R/site-library ## [2] /usr/local/lib/R/library "],["references.html", "References", " References "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]] diff --git a/docs/no_toc/societal-impact.html b/docs/no_toc/societal-impact.html index 3d613aef..32cbba78 100644 --- a/docs/no_toc/societal-impact.html +++ b/docs/no_toc/societal-impact.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/what-ai-makes-possible.html b/docs/no_toc/what-ai-makes-possible.html index 9d7f2986..7701a89f 100644 --- a/docs/no_toc/what-ai-makes-possible.html +++ b/docs/no_toc/what-ai-makes-possible.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/what-are-the-components-of-ai.html b/docs/no_toc/what-are-the-components-of-ai.html index 6b37527b..6e2f914e 100644 --- a/docs/no_toc/what-are-the-components-of-ai.html +++ b/docs/no_toc/what-are-the-components-of-ai.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/what-is-artificial-intelligence.html b/docs/no_toc/what-is-artificial-intelligence.html index 135353d6..f12c60ff 100644 --- a/docs/no_toc/what-is-artificial-intelligence.html +++ b/docs/no_toc/what-is-artificial-intelligence.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/docs/no_toc/why-do-i-need-an-ai-policy.html b/docs/no_toc/why-do-i-need-an-ai-policy.html index 4806b66b..36644aa3 100644 --- a/docs/no_toc/why-do-i-need-an-ai-policy.html +++ b/docs/no_toc/why-do-i-need-an-ai-policy.html @@ -28,7 +28,7 @@ - + @@ -471,6 +471,7 @@

The Fred Hutch Data Science Lab

Style adapted from: rstudio4edu-book (CC-BY 2.0)

+ diff --git a/manuscript/resources/chapt_screen_images/index.png b/manuscript/resources/chapt_screen_images/index.png index 6eb479ab..9cf55513 100644 Binary files a/manuscript/resources/chapt_screen_images/index.png and b/manuscript/resources/chapt_screen_images/index.png differ diff --git a/resources/chapt_screen_images/index.png b/resources/chapt_screen_images/index.png index 6eb479ab..9cf55513 100644 Binary files a/resources/chapt_screen_images/index.png and b/resources/chapt_screen_images/index.png differ