WIP, sync

remotes/1711448277524916210/master
Sven Slootweg 2 years ago
parent 3f67beaaa6
commit 8537fb91e0

@ -0,0 +1,22 @@
"use strict";
const Promise = require("bluebird");
module.exports.up = function(knex) {
return Promise.try(() => {
return knex.schema.createTable("datasheets_products", (table) => {
table.text("id").primary();
table.text("manufacturer");
table.text("name").notNull();
table.text("description");
table.text("source").notNull();
table.text("url").notNull();
});
}).then(() => {
return knex.raw("CREATE INDEX search_index ON datasheets_products ((lower(name)) text_pattern_ops);");
});
};
module.exports.down = function(knex) {
return knex.schema.dropTable("datasheets_products");
};

@ -31,13 +31,27 @@
},
"dependencies": {
"@joepie91/express-react-views": "^1.0.1",
"@promistream/combine-sequential-streaming": "^0.1.0",
"@promistream/decode-string": "^0.1.0",
"@promistream/from-node-stream": "^0.1.3",
"@promistream/map": "^0.1.1",
"@promistream/pipe": "^0.1.6",
"@promistream/simple-sink": "^0.2.2",
"@promistream/simple-source": "^0.1.4",
"@promistream/split-lines": "^0.1.0",
"@promistream/spy": "^0.1.0",
"axios": "^0.21.1",
"bhttp": "^1.2.8",
"bluebird": "^3.7.2",
"classnames": "^2.2.6",
"default-value": "^1.0.0",
"express": "^4.17.1",
"express-promise-router": "^4.1.0",
"global": "^4.4.0",
"hyperphone": "^1.0.0",
"knex": "^0.95.2",
"match-value": "^1.1.0",
"moize": "^6.1.0",
"pg": "^8.5.1",
"react": "^17.0.1",
"react-dom": "^17.0.1",

@ -60,3 +60,9 @@ html, body {
font-size: 1.3em;
}
.counter {
margin-bottom: .5em;
font-style: italic;
font-size: .9em;
text-align: right;
}

@ -3,9 +3,37 @@
const Promise = require("bluebird");
const express = require("express");
const path = require("path");
const defaultValue = require("default-value");
const moize = require("moize");
const knex = require("knex")(require("../knexfile"));
const createSynchronizer = require("./sync")({ knex: knex });
createSynchronizer("datasheets_products", "datasheet:", (item) => {
if (item.data.url != null) {
return {
id: item.id,
manufacturer: item.data.manufacturer,
name: item.data.name,
description: item.data.description,
source: defaultValue(item.data.source, "unknown"), // FIXME: Temporary workaround for old data
url: item.data.url
};
} else {
console.warn(`[warn] Item does not have a URL: ${item.id}`);
return null;
}
});
const getDatasheetCount = moize(() => {
return Promise.try(() => {
return knex("datasheets_products").count("url");
}).then((result) => {
return result[0].count;
});
}, { maxAge: 1000 * 60 * 1 }); // 1 minute cache
let app = express();
app.set("views", path.join(__dirname, "views"));
@ -19,21 +47,35 @@ app.get("/", (req, res) => {
});
app.get("/datasheets", (req, res) => {
res.render("index");
return Promise.try(() => {
return getDatasheetCount();
}).then((datasheetCount) => {
res.render("datasheets/index", {
datasheetCount: datasheetCount
});
});
});
app.post("/search", (req, res) => {
return Promise.try(() => {
// return knex.raw(`
// SELECT
// data->>'manufacturer' AS manufacturer,
// data->>'name' AS name,
// data->>'url' AS url,
// data->>'description' AS description,
// data->>'source' AS source
// FROM srap_items WHERE
// id LIKE 'datasheet:%'
// AND lower(data->>'name') LIKE :query
// ORDER BY name
// LIMIT 20
// `, {
// query: req.query.query.toLowerCase() + "%"
// });
return knex.raw(`
SELECT
data->>'manufacturer' AS manufacturer,
data->>'name' AS name,
data->>'url' AS url,
data->>'description' AS description,
data->>'source' AS source
FROM items WHERE
id LIKE 'datasheet:%'
AND lower(data->>'name') LIKE :query
SELECT * FROM datasheets_products WHERE
lower(name) LIKE :query
ORDER BY name
LIMIT 20
`, {

@ -63,3 +63,9 @@ html, body {
}
}
.counter {
margin-bottom: .5em;
font-style: italic;
font-size: .9em;
text-align: right;
}

@ -20,6 +20,7 @@ function getSources(results) {
tme: "TME",
lcsc: "LCSC",
st: "STMicroelectronics",
farnell: "Farnell",
unknown: "Unknown"
});
})
@ -98,7 +99,7 @@ module.exports = function DatasheetSearch({}) {
return (
<div className="search">
<input type="text" className={style.query} placeholder="Start typing..." onChange={updateQuery} />
<input type="text" className={style.query} placeholder="Start typing a part number..." onChange={updateQuery} />
<div className={style.sources}>
{(results.length > 0)
? <>

@ -0,0 +1,41 @@
"use strict";
const Promise = require("bluebird");
const matchValue = require("match-value");
const pipe = require("@promistream/pipe");
const simpleSink = require("@promistream/simple-sink");
const updateStream = require("./update-stream");
module.exports = function ({ knex }) {
return function createSynchronizer(tableName, prefix, mapper) {
return pipe([
updateStream({ prefix }),
simpleSink((item) => {
return Promise.try(() => {
console.log("[sync] processing item", item);
return matchValue(item.type, {
item: () => {
let result = mapper(item);
if (result != null) {
return knex(tableName)
.insert(result)
.onConflict("id").merge();
}
},
alias: () => {
return knex(tableName)
.delete()
.where({ id: item.alias });
},
taskResult: () => {
// Ignore these for now
}
});
}).then(() => {
// FIXME: This placeholder `.then` is necessary to make this work *at all*. Investigate why this isn't working otherwise, and whether that's a bug in simple-sink
});
})
]).read();
};
};

@ -0,0 +1,14 @@
"use strict";
const pipe = require("@promistream/pipe");
const splitLines = require("@promistream/split-lines");
const map = require("@promistream/map");
const decodeString = require("@promistream/decode-string");
module.exports = function createNDJSONParseStream() {
return pipe([
decodeString("utf8"),
splitLines(),
map((line) => JSON.parse(line))
]);
};

@ -0,0 +1,18 @@
"use strict";
const Promise = require("bluebird");
const pipe = require("@promistream/pipe");
const simpleSink = require("@promistream/simple-sink");
const updateStream = require("./update-stream");
return Promise.try(() => {
return pipe([
updateStream({ prefix: "datasheet:" }),
simpleSink((item) => {
console.log(item);
})
]).read();
}).then(() => {
console.log("Done!");
});

@ -0,0 +1,63 @@
"use strict";
const Promise = require("bluebird");
const bhttp = require("bhttp");
const pipe = require("@promistream/pipe");
const simpleSource = require("@promistream/simple-source");
const createCombineSequentialStream = require("@promistream/combine-sequential-streaming");
const createSpyStream = require("@promistream/spy");
const fromNodeStream = require("@promistream/from-node-stream");
const createNDJSONParseStream = require("./ndjson-parse-stream");
module.exports = function createUpdateStream({ prefix } = {}) {
let lastTimestamp = new Date(0);
// let lastTimestamp = new Date();
return pipe([
simpleSource(() => {
function attempt() {
return Promise.try(() => {
// To ensure that we don't hammer the srap instance
return Promise.delay(5 * 1000);
}).then(() => {
// console.log({ lastTimestamp });
// console.log(`http://localhost:3000/updates?prefix=${encodeURIComponent(prefix)}&since=${Math.floor(lastTimestamp.getTime())}`);
return bhttp.get(`http://localhost:3000/updates?prefix=${encodeURIComponent(prefix)}&since=${Math.floor(lastTimestamp.getTime())}`, {
stream: true
});
}).then((response) => {
if (response.statusCode === 200) {
return fromNodeStream.fromReadable(response);
} else {
throw new Error(`Got unexpected status code ${response.statusCode}`);
}
}).catch({ code: "ECONNREFUSED" }, (_error) => {
// Scraping server is down, try again in a minute or so
console.warn("WARNING: Scraping server is not reachable! Retrying in a minute...");
return Promise.try(() => {
return Promise.delay(60 * 1000);
}).then(() => {
return attempt();
});
});
}
return attempt();
}),
createCombineSequentialStream(),
createNDJSONParseStream(),
createSpyStream((item) => {
if (item.updatedAt != null) {
// TODO: Can this be made significantly more performant by string-sorting the timestamps in ISO format directly, instead of going through a parsing cycle?
let itemDate = new Date(item.updatedAt);
if (itemDate > lastTimestamp) {
lastTimestamp = itemDate;
}
}
})
]);
};

@ -2,7 +2,7 @@
const React = require("react");
module.exports = function Index() {
module.exports = function Index({ datasheetCount }) {
return (
<html lang="en">
<head>
@ -23,6 +23,9 @@ module.exports = function Index() {
</div>
<div className="contents">
<div className="wrapper">
<div className="counter">
Searching {datasheetCount} datasheets!
</div>
<div id="datasheetSearch">
Loading, please wait...
</div>

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save