This commit is contained in:
2026-06-24 16:12:16 +02:00
commit 9fed7b2cbc
19 changed files with 2968 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
.env
+5
View File
@@ -0,0 +1,5 @@
OPENAI_API_KEY=
PINECONE_API_KEY=
PINECONE_INDEX=
PINECONE_NAMESPACE=
API_PORT=
+4
View File
@@ -0,0 +1,4 @@
node_modules
.env
.env.production
redeploy.sh
+15
View File
@@ -0,0 +1,15 @@
FROM node:latest
WORKDIR /usr/src/app
COPY package.json .
COPY package-lock.json .
RUN npm install
COPY . .
COPY .env.production .env
ENV NODE_ENV production
EXPOSE 4000
CMD ["npm", "run", "start"]
+1
View File
@@ -0,0 +1 @@
This backend server doesn't store any user data. Data sent to the server is only used temporarily to fetch the related news articles. Chat content might be logged by OpenAI, which is not the creator of this GPT.
+1
View File
@@ -0,0 +1 @@
# Backend for FoxGPT 🦊
+65
View File
@@ -0,0 +1,65 @@
openapi: 3.1.0
info:
title: Fox News API for GPT
description: This API allows the GPT to retrieve the latest Fox News articles stored in a vector database.
version: 1.0.0
servers:
- url: https://foxgpt-backend.elliot-at-zuri.ch
description: Server to query Fox News articles
paths:
/query:
post:
operationId: getArticles
summary: Get newest news articles from Fox News.
description: This endpoint retrieves relevant news articles based on keywords to retrieve the top `k` similar items.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
keywords:
type: array
items:
type: string
description: The keywords to search for relevant news articles.
topK:
type: integer
default: 1
description: The number of top articles to retrieve.
responses:
'200':
description: A list of news articles.
content:
application/json:
schema:
type: object
properties:
matches:
type: array
items:
type: object
properties:
headline:
type: string
description: Title of the news article.
articleBody:
type: string
description: Content of the news article.
datePublished:
type: string
description: The time the article was published.
dateModified:
type: string
description: The time the article was last modified.
description:
type: string
description: A short summary of the news article.
url:
type: string
description: The URL at which the article is published.
'400':
description: Invalid request
'500':
description: Internal server error
Vendored
+13
View File
@@ -0,0 +1,13 @@
declare global {
namespace NodeJS {
interface ProcessEnv {
OPENAI_API_KEY: string;
PINECONE_API_KEY: string;
PINECONE_INDEX: string;
PINECONE_NAMESPACE: string;
API_PORT: string;
}
}
}
export {}
+13
View File
@@ -0,0 +1,13 @@
#!/bin/bash
docker build --no-cache -t elliotathelsinki/foxgpt:latest .
docker push elliotathelsinki/foxgpt:latest
ssh -i /home/elliot/.ssh/id_rsa ubuntu@elliot-at-zuri.ch "
sudo docker pull elliotathelsinki/foxgpt:latest &&
sudo dokku apps:create foxgpt &&
sudo dokku domains:set foxgpt foxgpt-backend.elliot-at-zuri.ch &&
sudo dokku ports:set foxgpt http:80:4000 &&
sudo dokku letsencrypt:enable foxgpt &&
sudo dokku git:from-image foxgpt elliotathelsinki/foxgpt:latest &&
sudo dokku ps:rebuild foxgpt
"
+1
View File
@@ -0,0 +1 @@
FoxGPT is your go-to news host from Fox News, designed to keep you informed about the latest events and stories. With access to the latest news articles through the getArticles action, FoxGPT provides timely updates and answers your questions with detailed summaries of the latest news. Whether you're looking for a quick headline, an in-depth summary, or need specific information about current events, FoxGPT is here to deliver reliable and up-to-date news, keeping the tone professional, informative, and engaging. FoxGPT can also retrieve and summarize the latest articles for you upon request. FoxGPT should automatically correct any spelling or grammatical error in the retrieved articles. FoxGPT should start the conversation by asking the user what they would like to know about.
+2498
View File
File diff suppressed because it is too large Load Diff
+30
View File
@@ -0,0 +1,30 @@
{
"name": "foxgpt-backend",
"type": "module",
"scripts": {
"dev": "tsx watch --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/index.ts",
"start": "tsx --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/index.ts",
"fetch": "tsx --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/fetch.ts",
"env:generate": "gen-env-types .env -o env.d.ts -e ."
},
"dependencies": {
"@langchain/openai": "^0.2.6",
"@pinecone-database/pinecone": "^3.0.0",
"cheerio": "^1.0.0",
"dotenv-safe": "^9.1.0",
"express": "^4.19.2",
"node-cron": "^3.0.3",
"reflect-metadata": "^0.2.2",
"tsconfig-paths": "^4.2.0",
"tsx": "^4.17.0",
"uuid": "^10.0.0"
},
"devDependencies": {
"@types/dotenv-safe": "^8.1.6",
"@types/express": "^4.17.21",
"@types/node": "^20.14.10",
"@types/node-cron": "^3.0.11",
"gen-env-types": "^1.3.4",
"typescript": "^5.5.3"
}
}
+1
View File
@@ -0,0 +1 @@
export const __prod__ = process.env.NODE_ENV ==='production'
File diff suppressed because one or more lines are too long
+82
View File
@@ -0,0 +1,82 @@
import * as cheerio from 'cheerio'
import { embeddings, ns, pc } from '@/src/libs'
import * as fs from 'fs/promises'
import path, { dirname } from 'path'
import { fileURLToPath } from 'url'
const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)
const htmlContent = await (await fetch('https://www.foxnews.com/world')).text()
const $ = cheerio.load(htmlContent)
const articleURLs: string[] = []
$('article a').each((_, element) => {
let url = $(element).attr('href')
if (url && url.startsWith('/')) {
// Ensure it is a full URL, Fox News uses relative URLs
url = `https://www.foxnews.com${url}`
if (!url.startsWith('https://www.foxnews.com/video/') && !articleURLs.includes(url)) {
articleURLs.push(url)
}
}
})
const objects: { url: string, json: string }[] = []
const contentStrings: string[] = []
for (let i = 0;i < articleURLs.length;i++) {
const url = articleURLs[i]
console.log(`Fetching ${url}`)
const html = await (await fetch(url)).text()
const $ = cheerio.load(html)
const scriptTags = $('script[type="application/ld+json"]')
let extractedData: { headline?: string, articleBody?: string, datePublished?: string, dateModified?: string, description?: string } = {}
scriptTags.each((_, element) => {
const jsonScriptTag = $(element).html()
if (jsonScriptTag) {
try {
const jsonData = JSON.parse(jsonScriptTag)
if (jsonData.headline && jsonData.articleBody && jsonData.datePublished && jsonData.dateModified && jsonData.description) {
extractedData = {
headline: jsonData.headline,
articleBody: jsonData.articleBody,
datePublished: jsonData.datePublished,
dateModified: jsonData.dateModified,
description: jsonData.description
}
contentStrings.push(JSON.stringify(extractedData))
objects.push({ json: JSON.stringify(extractedData), url })
}
} catch (error) {
console.error('Failed to parse JSON:', error)
}
}
})
}
const vectorEmbeddings = await embeddings.embedDocuments(contentStrings)
await fs.writeFile(path.join(__dirname, './embeddings.json'), JSON.stringify(vectorEmbeddings))
const indexes = (await pc.listIndexes()).indexes
if (!indexes || !indexes.find(i => i.name == process.env.PINECONE_INDEX)) {
console.log(await pc.createIndex({
name: process.env.PINECONE_INDEX,
metric: 'cosine',
dimension: 1536,
spec: {
serverless: {
cloud: 'aws',
region: 'us-east-1'
}
}
}))
}
await ns.upsert(objects.map((o, i, _) => ({ id: o.url, values: vectorEmbeddings[i], metadata: { payload: o.json } })))
+119
View File
@@ -0,0 +1,119 @@
import { __prod__ } from '@/src/constants'
import { embeddings, ns } from '@/src/libs'
import * as cheerio from 'cheerio'
import express from 'express'
import http from 'http'
import cron from 'node-cron'
const app = express()
http.createServer(app)
app.use(express.json())
app.listen(parseInt(process.env.API_PORT), () => {
if (!__prod__) {
console.log(`Server started on localhost:${process.env.API_PORT}.`)
}
else {
console.log(`Server started at ${process.env.BACKEND_ORIGIN}.`)
}
})
app.get('/', (_, res) => {
res.send('Welcome to Express.')
})
app.post('/query', async (req, res) => {
let keywords = []
if (!req.body.keywords || req.body.keywords.length == 0) {
keywords.push('Latest news')
}
else {
keywords = req.body.keywords
}
const embedding = (await embeddings.embedQuery(JSON.stringify(keywords)))
const result = await ns.query({
topK: req.body.topK,
vector: embedding,
includeValues: true,
includeMetadata: true
})
const payloads = result.matches.map(m => {
const url = m.id
const obj = JSON.parse(m.metadata?.payload as string)
return { ...obj, url }
})
res.json(payloads)
})
cron.schedule('0 0 * * *', async () => {
const htmlContent = await (await fetch('https://www.foxnews.com/world')).text()
const $ = cheerio.load(htmlContent)
const articleURLs: string[] = []
$('article a').each((_, element) => {
let url = $(element).attr('href')
if (url && url.startsWith('/')) {
url = `https://www.foxnews.com${url}`
if (!url.startsWith('https://www.foxnews.com/video/') && !articleURLs.includes(url)) {
articleURLs.push(url)
}
}
})
const newArticleURLs = []
for (let i = 0;i < articleURLs.length;i++) {
const url = articleURLs[i]
const result = await ns.query({ id: url, topK: 1 })
if (result.matches.length == 0) {
newArticleURLs.push(url)
}
}
const objects: { url: string, json: string }[] = []
const contentStrings: string[] = []
for (let i = 0;i < newArticleURLs.length;i++) {
const url = newArticleURLs[i]
console.log(`Fetching ${url}`)
const html = await (await fetch(url)).text()
const $ = cheerio.load(html)
const scriptTags = $('script[type="application/ld+json"]')
let extractedData: { headline?: string, articleBody?: string, datePublished?: string, dateModified?: string, description?: string } = {}
scriptTags.each((_, element) => {
const jsonScriptTag = $(element).html()
if (jsonScriptTag) {
try {
const jsonData = JSON.parse(jsonScriptTag)
if (jsonData.headline && jsonData.articleBody && jsonData.datePublished && jsonData.dateModified && jsonData.description) {
extractedData = {
headline: jsonData.headline,
articleBody: jsonData.articleBody,
datePublished: jsonData.datePublished,
dateModified: jsonData.dateModified,
description: jsonData.description
}
contentStrings.push(JSON.stringify(extractedData))
objects.push({ json: JSON.stringify(extractedData), url })
}
} catch (error) {
console.error('Failed to parse JSON:', error)
}
}
})
}
const vectorEmbeddings = await embeddings.embedDocuments(contentStrings)
await ns.upsert(objects.map((o, i, _) => ({ id: o.url, values: vectorEmbeddings[i], metadata: { payload: o.json } })))
})
+14
View File
@@ -0,0 +1,14 @@
import { OpenAIEmbeddings } from '@langchain/openai'
import { Pinecone } from '@pinecone-database/pinecone'
export const embeddings = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY,
model: 'text-embedding-3-small',
dimensions: 1536
})
export const pc = new Pinecone({
apiKey: process.env.PINECONE_API_KEY
})
export const ns = pc.index(process.env.PINECONE_INDEX).namespace(process.env.PINECONE_NAMESPACE)
+65
View File
@@ -0,0 +1,65 @@
openapi: 3.1.0
info:
title: Fox News API for GPT
description: This API allows the GPT to retrieve the latest Fox News articles stored in a vector database.
version: 1.0.0
servers:
- url: https://foxgpt-backend.elliot-at-zuri.ch
description: Server to query Fox News articles
paths:
/query:
post:
operationId: getArticles
summary: Get newest news articles from Fox News.
description: This endpoint retrieves relevant news articles based on keywords to retrieve the top `k` similar items.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
keywords:
type: array
items:
type: string
description: The keywords to search for relevant news articles.
topK:
type: integer
default: 1
description: The number of top articles to retrieve.
responses:
"200":
description: A list of news articles.
content:
application/json:
schema:
type: object
properties:
matches:
type: array
items:
type: object
properties:
headline:
type: string
description: Title of the news article.
articleBody:
type: string
description: Content of the news article.
datePublished:
type: string
description: The time the article was published.
dateModified:
type: string
description: The time the article was last modified.
description:
type: string
description: A short summary of the news article.
url:
type: string
description: The URL at which the article is published.
"400":
description: Invalid request
"500":
description: Internal server error
+39
View File
@@ -0,0 +1,39 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"lib": [
"ESNext"
],
"skipLibCheck": true,
"sourceMap": true,
"moduleResolution": "node",
"removeComments": true,
"noImplicitAny": true,
"strictNullChecks": true,
"strictFunctionTypes": true,
"noImplicitThis": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true,
"allowSyntheticDefaultImports": true,
"esModuleInterop": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"resolveJsonModule": true,
"baseUrl": ".",
"paths": {
"@/*": [
"./*"
]
}
},
"exclude": [
"node_modules"
],
"include": [
"env.d.ts",
"./src/**/*.ts"
]
}