---
This commit is contained in:
@@ -0,0 +1 @@
|
|||||||
|
.env
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
OPENAI_API_KEY=
|
||||||
|
PINECONE_API_KEY=
|
||||||
|
PINECONE_INDEX=
|
||||||
|
PINECONE_NAMESPACE=
|
||||||
|
API_PORT=
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
node_modules
|
||||||
|
.env
|
||||||
|
.env.production
|
||||||
|
redeploy.sh
|
||||||
+15
@@ -0,0 +1,15 @@
|
|||||||
|
FROM node:latest
|
||||||
|
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
COPY package.json .
|
||||||
|
COPY package-lock.json .
|
||||||
|
RUN npm install
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
COPY .env.production .env
|
||||||
|
ENV NODE_ENV production
|
||||||
|
|
||||||
|
EXPOSE 4000
|
||||||
|
|
||||||
|
CMD ["npm", "run", "start"]
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
This backend server doesn't store any user data. Data sent to the server is only used temporarily to fetch the related news articles. Chat content might be logged by OpenAI, which is not the creator of this GPT.
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
openapi: 3.1.0
|
||||||
|
info:
|
||||||
|
title: Fox News API for GPT
|
||||||
|
description: This API allows the GPT to retrieve the latest Fox News articles stored in a vector database.
|
||||||
|
version: 1.0.0
|
||||||
|
servers:
|
||||||
|
- url: https://foxgpt-backend.elliot-at-zuri.ch
|
||||||
|
description: Server to query Fox News articles
|
||||||
|
paths:
|
||||||
|
/query:
|
||||||
|
post:
|
||||||
|
operationId: getArticles
|
||||||
|
summary: Get newest news articles from Fox News.
|
||||||
|
description: This endpoint retrieves relevant news articles based on keywords to retrieve the top `k` similar items.
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
keywords:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: The keywords to search for relevant news articles.
|
||||||
|
topK:
|
||||||
|
type: integer
|
||||||
|
default: 1
|
||||||
|
description: The number of top articles to retrieve.
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A list of news articles.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
matches:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
headline:
|
||||||
|
type: string
|
||||||
|
description: Title of the news article.
|
||||||
|
articleBody:
|
||||||
|
type: string
|
||||||
|
description: Content of the news article.
|
||||||
|
datePublished:
|
||||||
|
type: string
|
||||||
|
description: The time the article was published.
|
||||||
|
dateModified:
|
||||||
|
type: string
|
||||||
|
description: The time the article was last modified.
|
||||||
|
description:
|
||||||
|
type: string
|
||||||
|
description: A short summary of the news article.
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
description: The URL at which the article is published.
|
||||||
|
'400':
|
||||||
|
description: Invalid request
|
||||||
|
'500':
|
||||||
|
description: Internal server error
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
declare global {
|
||||||
|
namespace NodeJS {
|
||||||
|
interface ProcessEnv {
|
||||||
|
OPENAI_API_KEY: string;
|
||||||
|
PINECONE_API_KEY: string;
|
||||||
|
PINECONE_INDEX: string;
|
||||||
|
PINECONE_NAMESPACE: string;
|
||||||
|
API_PORT: string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export {}
|
||||||
Executable
+13
@@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
docker build --no-cache -t elliotathelsinki/foxgpt:latest .
|
||||||
|
docker push elliotathelsinki/foxgpt:latest
|
||||||
|
ssh -i /home/elliot/.ssh/id_rsa ubuntu@elliot-at-zuri.ch "
|
||||||
|
sudo docker pull elliotathelsinki/foxgpt:latest &&
|
||||||
|
sudo dokku apps:create foxgpt &&
|
||||||
|
sudo dokku domains:set foxgpt foxgpt-backend.elliot-at-zuri.ch &&
|
||||||
|
sudo dokku ports:set foxgpt http:80:4000 &&
|
||||||
|
sudo dokku letsencrypt:enable foxgpt &&
|
||||||
|
sudo dokku git:from-image foxgpt elliotathelsinki/foxgpt:latest &&
|
||||||
|
sudo dokku ps:rebuild foxgpt
|
||||||
|
"
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
FoxGPT is your go-to news host from Fox News, designed to keep you informed about the latest events and stories. With access to the latest news articles through the getArticles action, FoxGPT provides timely updates and answers your questions with detailed summaries of the latest news. Whether you're looking for a quick headline, an in-depth summary, or need specific information about current events, FoxGPT is here to deliver reliable and up-to-date news, keeping the tone professional, informative, and engaging. FoxGPT can also retrieve and summarize the latest articles for you upon request. FoxGPT should automatically correct any spelling or grammatical error in the retrieved articles. FoxGPT should start the conversation by asking the user what they would like to know about.
|
||||||
Generated
+2498
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"name": "foxgpt-backend",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "tsx watch --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/index.ts",
|
||||||
|
"start": "tsx --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/index.ts",
|
||||||
|
"fetch": "tsx --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/fetch.ts",
|
||||||
|
"env:generate": "gen-env-types .env -o env.d.ts -e ."
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@langchain/openai": "^0.2.6",
|
||||||
|
"@pinecone-database/pinecone": "^3.0.0",
|
||||||
|
"cheerio": "^1.0.0",
|
||||||
|
"dotenv-safe": "^9.1.0",
|
||||||
|
"express": "^4.19.2",
|
||||||
|
"node-cron": "^3.0.3",
|
||||||
|
"reflect-metadata": "^0.2.2",
|
||||||
|
"tsconfig-paths": "^4.2.0",
|
||||||
|
"tsx": "^4.17.0",
|
||||||
|
"uuid": "^10.0.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/dotenv-safe": "^8.1.6",
|
||||||
|
"@types/express": "^4.17.21",
|
||||||
|
"@types/node": "^20.14.10",
|
||||||
|
"@types/node-cron": "^3.0.11",
|
||||||
|
"gen-env-types": "^1.3.4",
|
||||||
|
"typescript": "^5.5.3"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
export const __prod__ = process.env.NODE_ENV ==='production'
|
||||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,82 @@
|
|||||||
|
import * as cheerio from 'cheerio'
|
||||||
|
import { embeddings, ns, pc } from '@/src/libs'
|
||||||
|
import * as fs from 'fs/promises'
|
||||||
|
import path, { dirname } from 'path'
|
||||||
|
import { fileURLToPath } from 'url'
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url)
|
||||||
|
const __dirname = dirname(__filename)
|
||||||
|
|
||||||
|
const htmlContent = await (await fetch('https://www.foxnews.com/world')).text()
|
||||||
|
|
||||||
|
const $ = cheerio.load(htmlContent)
|
||||||
|
|
||||||
|
const articleURLs: string[] = []
|
||||||
|
$('article a').each((_, element) => {
|
||||||
|
let url = $(element).attr('href')
|
||||||
|
if (url && url.startsWith('/')) {
|
||||||
|
// Ensure it is a full URL, Fox News uses relative URLs
|
||||||
|
url = `https://www.foxnews.com${url}`
|
||||||
|
if (!url.startsWith('https://www.foxnews.com/video/') && !articleURLs.includes(url)) {
|
||||||
|
articleURLs.push(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const objects: { url: string, json: string }[] = []
|
||||||
|
const contentStrings: string[] = []
|
||||||
|
|
||||||
|
for (let i = 0;i < articleURLs.length;i++) {
|
||||||
|
const url = articleURLs[i]
|
||||||
|
console.log(`Fetching ${url}`)
|
||||||
|
const html = await (await fetch(url)).text()
|
||||||
|
|
||||||
|
const $ = cheerio.load(html)
|
||||||
|
|
||||||
|
const scriptTags = $('script[type="application/ld+json"]')
|
||||||
|
|
||||||
|
let extractedData: { headline?: string, articleBody?: string, datePublished?: string, dateModified?: string, description?: string } = {}
|
||||||
|
|
||||||
|
scriptTags.each((_, element) => {
|
||||||
|
const jsonScriptTag = $(element).html()
|
||||||
|
if (jsonScriptTag) {
|
||||||
|
try {
|
||||||
|
const jsonData = JSON.parse(jsonScriptTag)
|
||||||
|
|
||||||
|
if (jsonData.headline && jsonData.articleBody && jsonData.datePublished && jsonData.dateModified && jsonData.description) {
|
||||||
|
extractedData = {
|
||||||
|
headline: jsonData.headline,
|
||||||
|
articleBody: jsonData.articleBody,
|
||||||
|
datePublished: jsonData.datePublished,
|
||||||
|
dateModified: jsonData.dateModified,
|
||||||
|
description: jsonData.description
|
||||||
|
}
|
||||||
|
contentStrings.push(JSON.stringify(extractedData))
|
||||||
|
objects.push({ json: JSON.stringify(extractedData), url })
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to parse JSON:', error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const vectorEmbeddings = await embeddings.embedDocuments(contentStrings)
|
||||||
|
await fs.writeFile(path.join(__dirname, './embeddings.json'), JSON.stringify(vectorEmbeddings))
|
||||||
|
|
||||||
|
const indexes = (await pc.listIndexes()).indexes
|
||||||
|
if (!indexes || !indexes.find(i => i.name == process.env.PINECONE_INDEX)) {
|
||||||
|
console.log(await pc.createIndex({
|
||||||
|
name: process.env.PINECONE_INDEX,
|
||||||
|
metric: 'cosine',
|
||||||
|
dimension: 1536,
|
||||||
|
spec: {
|
||||||
|
serverless: {
|
||||||
|
cloud: 'aws',
|
||||||
|
region: 'us-east-1'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
await ns.upsert(objects.map((o, i, _) => ({ id: o.url, values: vectorEmbeddings[i], metadata: { payload: o.json } })))
|
||||||
+119
@@ -0,0 +1,119 @@
|
|||||||
|
import { __prod__ } from '@/src/constants'
|
||||||
|
import { embeddings, ns } from '@/src/libs'
|
||||||
|
import * as cheerio from 'cheerio'
|
||||||
|
import express from 'express'
|
||||||
|
import http from 'http'
|
||||||
|
import cron from 'node-cron'
|
||||||
|
|
||||||
|
const app = express()
|
||||||
|
http.createServer(app)
|
||||||
|
|
||||||
|
app.use(express.json())
|
||||||
|
|
||||||
|
app.listen(parseInt(process.env.API_PORT), () => {
|
||||||
|
if (!__prod__) {
|
||||||
|
console.log(`Server started on localhost:${process.env.API_PORT}.`)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
console.log(`Server started at ${process.env.BACKEND_ORIGIN}.`)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
app.get('/', (_, res) => {
|
||||||
|
res.send('Welcome to Express.')
|
||||||
|
})
|
||||||
|
|
||||||
|
app.post('/query', async (req, res) => {
|
||||||
|
let keywords = []
|
||||||
|
if (!req.body.keywords || req.body.keywords.length == 0) {
|
||||||
|
keywords.push('Latest news')
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
keywords = req.body.keywords
|
||||||
|
}
|
||||||
|
const embedding = (await embeddings.embedQuery(JSON.stringify(keywords)))
|
||||||
|
|
||||||
|
const result = await ns.query({
|
||||||
|
topK: req.body.topK,
|
||||||
|
vector: embedding,
|
||||||
|
includeValues: true,
|
||||||
|
includeMetadata: true
|
||||||
|
})
|
||||||
|
|
||||||
|
const payloads = result.matches.map(m => {
|
||||||
|
const url = m.id
|
||||||
|
const obj = JSON.parse(m.metadata?.payload as string)
|
||||||
|
return { ...obj, url }
|
||||||
|
})
|
||||||
|
|
||||||
|
res.json(payloads)
|
||||||
|
})
|
||||||
|
|
||||||
|
cron.schedule('0 0 * * *', async () => {
|
||||||
|
const htmlContent = await (await fetch('https://www.foxnews.com/world')).text()
|
||||||
|
|
||||||
|
const $ = cheerio.load(htmlContent)
|
||||||
|
|
||||||
|
const articleURLs: string[] = []
|
||||||
|
$('article a').each((_, element) => {
|
||||||
|
let url = $(element).attr('href')
|
||||||
|
if (url && url.startsWith('/')) {
|
||||||
|
url = `https://www.foxnews.com${url}`
|
||||||
|
if (!url.startsWith('https://www.foxnews.com/video/') && !articleURLs.includes(url)) {
|
||||||
|
articleURLs.push(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const newArticleURLs = []
|
||||||
|
|
||||||
|
for (let i = 0;i < articleURLs.length;i++) {
|
||||||
|
const url = articleURLs[i]
|
||||||
|
const result = await ns.query({ id: url, topK: 1 })
|
||||||
|
if (result.matches.length == 0) {
|
||||||
|
newArticleURLs.push(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const objects: { url: string, json: string }[] = []
|
||||||
|
const contentStrings: string[] = []
|
||||||
|
|
||||||
|
for (let i = 0;i < newArticleURLs.length;i++) {
|
||||||
|
const url = newArticleURLs[i]
|
||||||
|
console.log(`Fetching ${url}`)
|
||||||
|
const html = await (await fetch(url)).text()
|
||||||
|
|
||||||
|
const $ = cheerio.load(html)
|
||||||
|
|
||||||
|
const scriptTags = $('script[type="application/ld+json"]')
|
||||||
|
|
||||||
|
let extractedData: { headline?: string, articleBody?: string, datePublished?: string, dateModified?: string, description?: string } = {}
|
||||||
|
|
||||||
|
scriptTags.each((_, element) => {
|
||||||
|
const jsonScriptTag = $(element).html()
|
||||||
|
if (jsonScriptTag) {
|
||||||
|
try {
|
||||||
|
const jsonData = JSON.parse(jsonScriptTag)
|
||||||
|
|
||||||
|
if (jsonData.headline && jsonData.articleBody && jsonData.datePublished && jsonData.dateModified && jsonData.description) {
|
||||||
|
extractedData = {
|
||||||
|
headline: jsonData.headline,
|
||||||
|
articleBody: jsonData.articleBody,
|
||||||
|
datePublished: jsonData.datePublished,
|
||||||
|
dateModified: jsonData.dateModified,
|
||||||
|
description: jsonData.description
|
||||||
|
}
|
||||||
|
contentStrings.push(JSON.stringify(extractedData))
|
||||||
|
objects.push({ json: JSON.stringify(extractedData), url })
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to parse JSON:', error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const vectorEmbeddings = await embeddings.embedDocuments(contentStrings)
|
||||||
|
|
||||||
|
await ns.upsert(objects.map((o, i, _) => ({ id: o.url, values: vectorEmbeddings[i], metadata: { payload: o.json } })))
|
||||||
|
})
|
||||||
+14
@@ -0,0 +1,14 @@
|
|||||||
|
import { OpenAIEmbeddings } from '@langchain/openai'
|
||||||
|
import { Pinecone } from '@pinecone-database/pinecone'
|
||||||
|
|
||||||
|
export const embeddings = new OpenAIEmbeddings({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
model: 'text-embedding-3-small',
|
||||||
|
dimensions: 1536
|
||||||
|
})
|
||||||
|
|
||||||
|
export const pc = new Pinecone({
|
||||||
|
apiKey: process.env.PINECONE_API_KEY
|
||||||
|
})
|
||||||
|
|
||||||
|
export const ns = pc.index(process.env.PINECONE_INDEX).namespace(process.env.PINECONE_NAMESPACE)
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
openapi: 3.1.0
|
||||||
|
info:
|
||||||
|
title: Fox News API for GPT
|
||||||
|
description: This API allows the GPT to retrieve the latest Fox News articles stored in a vector database.
|
||||||
|
version: 1.0.0
|
||||||
|
servers:
|
||||||
|
- url: https://foxgpt-backend.elliot-at-zuri.ch
|
||||||
|
description: Server to query Fox News articles
|
||||||
|
paths:
|
||||||
|
/query:
|
||||||
|
post:
|
||||||
|
operationId: getArticles
|
||||||
|
summary: Get newest news articles from Fox News.
|
||||||
|
description: This endpoint retrieves relevant news articles based on keywords to retrieve the top `k` similar items.
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
keywords:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: The keywords to search for relevant news articles.
|
||||||
|
topK:
|
||||||
|
type: integer
|
||||||
|
default: 1
|
||||||
|
description: The number of top articles to retrieve.
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: A list of news articles.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
matches:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
headline:
|
||||||
|
type: string
|
||||||
|
description: Title of the news article.
|
||||||
|
articleBody:
|
||||||
|
type: string
|
||||||
|
description: Content of the news article.
|
||||||
|
datePublished:
|
||||||
|
type: string
|
||||||
|
description: The time the article was published.
|
||||||
|
dateModified:
|
||||||
|
type: string
|
||||||
|
description: The time the article was last modified.
|
||||||
|
description:
|
||||||
|
type: string
|
||||||
|
description: A short summary of the news article.
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
description: The URL at which the article is published.
|
||||||
|
"400":
|
||||||
|
description: Invalid request
|
||||||
|
"500":
|
||||||
|
description: Internal server error
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ESNext",
|
||||||
|
"module": "ESNext",
|
||||||
|
"lib": [
|
||||||
|
"ESNext"
|
||||||
|
],
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"sourceMap": true,
|
||||||
|
"moduleResolution": "node",
|
||||||
|
"removeComments": true,
|
||||||
|
"noImplicitAny": true,
|
||||||
|
"strictNullChecks": true,
|
||||||
|
"strictFunctionTypes": true,
|
||||||
|
"noImplicitThis": true,
|
||||||
|
"noUnusedLocals": false,
|
||||||
|
"noUnusedParameters": false,
|
||||||
|
"noImplicitReturns": true,
|
||||||
|
"noFallthroughCasesInSwitch": true,
|
||||||
|
"allowSyntheticDefaultImports": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"emitDecoratorMetadata": true,
|
||||||
|
"experimentalDecorators": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"baseUrl": ".",
|
||||||
|
"paths": {
|
||||||
|
"@/*": [
|
||||||
|
"./*"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"exclude": [
|
||||||
|
"node_modules"
|
||||||
|
],
|
||||||
|
"include": [
|
||||||
|
"env.d.ts",
|
||||||
|
"./src/**/*.ts"
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user