mirror of
https://github.com/dariusk/rss-to-activitypub.git
synced 2024-12-22 21:53:32 +02:00
Merge branch 'beanstalk'
This commit is contained in:
commit
2b4479fbe8
4 changed files with 59 additions and 25 deletions
|
@ -8,6 +8,8 @@ This is based on my [Express ActivityPub Server](https://github.com/dariusk/expr
|
|||
|
||||
This requires Node.js v10.10.0 or above.
|
||||
|
||||
You also need `beanstalkd` running. This is a simple and fast queueing system we use to manage polling RSS feeds. [Here are installation instructions](https://beanstalkd.github.io/download.html). On a production server you'll want to [install it as a background process](https://github.com/beanstalkd/beanstalkd/tree/master/adm).
|
||||
|
||||
## Installation
|
||||
|
||||
Clone the repository, then `cd` into its root directory. Install dependencies:
|
||||
|
@ -44,7 +46,7 @@ Go to `https://whateveryourdomainis.com:3000/convert` or whatever port you selec
|
|||
|
||||
## Sending out updates to followers
|
||||
|
||||
There is also a file called `updateFeeds.js` that needs to be run on a cron job or similar scheduler. I like to run mine once a minute. It queries every RSS feed in the database to see if there has been a change to the feed. If there is a new post, it sends out the new post to everyone subscribed to its corresponding ActivityPub Actor.
|
||||
There is also a file called `queueFeeds.js` that needs to be run on a cron job or similar scheduler. I like to run mine once a minute. It queries every RSS feed in the database to see if there has been a change to the feed. If there is a new post, it sends out the new post to everyone subscribed to its corresponding ActivityPub Actor.
|
||||
|
||||
## Local testing
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
"cors": "^2.8.4",
|
||||
"express": "^4.16.3",
|
||||
"generate-rsa-keypair": "^0.1.2",
|
||||
"jackd": "^1.2.4",
|
||||
"parse-favicon": "^2.0.0",
|
||||
"pug": "^2.0.3",
|
||||
"request": "^2.87.0",
|
||||
|
|
26
queueFeeds.js
Normal file
26
queueFeeds.js
Normal file
|
@ -0,0 +1,26 @@
|
|||
const Database = require('better-sqlite3');
|
||||
const db = new Database('bot-node.db');
|
||||
const Jackd = require('jackd');
|
||||
const beanstalkd = new Jackd();
|
||||
|
||||
|
||||
async function foo() {
|
||||
|
||||
// get all feeds from DB
|
||||
let feeds = db.prepare('select feed from feeds').all();
|
||||
|
||||
console.log('!!!',feeds.length);
|
||||
|
||||
let count = 0;
|
||||
|
||||
await beanstalkd.connect()
|
||||
|
||||
for (feed of feeds) {
|
||||
await beanstalkd.put(feed.feed)
|
||||
}
|
||||
|
||||
await beanstalkd.disconnect()
|
||||
|
||||
}
|
||||
|
||||
foo()
|
|
@ -7,29 +7,37 @@ const db = new Database('bot-node.db'),
|
|||
crypto = require('crypto'),
|
||||
parser = new Parser({timeout: 2000});
|
||||
|
||||
// get all feeds from DB
|
||||
let feeds = db.prepare('select * from feeds').all();
|
||||
const Jackd = require('jackd');
|
||||
const beanstalkd = new Jackd();
|
||||
|
||||
console.log('!!!',feeds.length);
|
||||
beanstalkd.connect()
|
||||
|
||||
let count = 0;
|
||||
|
||||
doFeed();
|
||||
|
||||
function doFeed() {
|
||||
let feed = feeds[count];
|
||||
console.log(count, feed.feed);
|
||||
if (feed === undefined) {
|
||||
return;
|
||||
async function foo() {
|
||||
while (true) {
|
||||
try {
|
||||
const { id, payload } = await beanstalkd.reserve()
|
||||
console.log(payload)
|
||||
/* ... process job here ... */
|
||||
await beanstalkd.delete(id)
|
||||
await doFeed(payload)
|
||||
} catch (err) {
|
||||
// Log error somehow
|
||||
console.error(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foo()
|
||||
|
||||
function doFeed(feedUrl) {
|
||||
return new Promise((resolve, reject) => {
|
||||
// fetch new RSS for each feed
|
||||
parser.parseURL(feed.feed, function(err, feedData) {
|
||||
parser.parseURL(feedUrl, function(err, feedData) {
|
||||
if (err) {
|
||||
console.log('error fetching', feed.feed, err);
|
||||
doFeed(++count);
|
||||
reject('error fetching ' + feedUrl + '; ' + err);
|
||||
}
|
||||
else {
|
||||
//console.log(feedData);
|
||||
let feed = db.prepare('select * from feeds where feed = ?').get(feedUrl);
|
||||
// get the old feed data from the database
|
||||
let oldFeed = JSON.parse(feed.content);
|
||||
|
||||
|
@ -72,15 +80,14 @@ function doFeed() {
|
|||
// update the DB with new contents
|
||||
let content = JSON.stringify(feedData);
|
||||
db.prepare('insert or replace into feeds(feed, username, content) values(?, ?, ?)').run(feed.feed, acct, content);
|
||||
count = count + 1;
|
||||
setTimeout(doFeed, 100);
|
||||
return resolve('done with ' + feedUrl)
|
||||
}
|
||||
else {
|
||||
count = count + 1;
|
||||
setTimeout(doFeed, 100);
|
||||
return resolve('done with ' + feedUrl + ', no change')
|
||||
}
|
||||
}
|
||||
});
|
||||
}).catch((e) => console.log(e));
|
||||
}
|
||||
|
||||
// TODO: update the display name of a feed if the feed title has changed
|
||||
|
@ -139,10 +146,8 @@ function transformContent(item) {
|
|||
});
|
||||
|
||||
// couple of hacky regexes to make sure we clean up everything
|
||||
item.content = $('body').html().replace(/^(\n|\r)/,'').replace(/>\r+</,' ').replace(/ +/g, '');
|
||||
item.content = item.content.replace(/^(\n|\r)/,'').replace(/>\r+</,' ').replace(/ +/g, '');
|
||||
// remove whitespace nodes from in between paragraphs
|
||||
item.content = item.content.replace(/p>\s<p/g,'p><p');
|
||||
item.content = $('body').html().replace(/^(\n|\r)/,'').replace(/>\r+</,'><').replace(/ +/g, '');
|
||||
item.content = item.content.replace(/^(\n|\r)/,'').replace(/>\r+</,'><').replace(/>\s*</g,'><').replace(/>\u200B+</g,'><').replace(/ +/g, '').replace(/<p><\/p>/g,'');
|
||||
return item;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue