v1.0.0
parent
30a9f41d72
commit
13fcb0a7cc
@ -1,3 +1,4 @@
|
|||||||
# https://git-scm.com/docs/gitignore
|
# https://git-scm.com/docs/gitignore
|
||||||
# https://help.github.com/articles/ignoring-files
|
# https://help.github.com/articles/ignoring-files
|
||||||
# Example .gitignore files: https://github.com/github/gitignore
|
# Example .gitignore files: https://github.com/github/gitignore
|
||||||
|
/node_modules/
|
||||||
|
@ -0,0 +1,66 @@
|
|||||||
|
# hma-proxy-parse
|
||||||
|
|
||||||
|
Good game, HideMyAss, but I win :)
|
||||||
|
|
||||||
|
This module extracts (parses) proxy details including the IP address from [http://proxylist.hidemyass.com/](http://proxylist.hidemyass.com/) and anything else using the same system, despite their (rather heavy) CSS-based obfuscation.
|
||||||
|
|
||||||
|
Note that this module only does the parsing; you're responsible for fetching the source HTML yourself.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
[WTFPL](http://www.wtfpl.net/txt/copying/) or [CC0](https://creativecommons.org/publicdomain/zero/1.0/), whichever you prefer. A donation and/or attribution are appreciated, but not required.
|
||||||
|
|
||||||
|
## Donate
|
||||||
|
|
||||||
|
My income consists entirely of donations for my projects. If this module is useful to you, consider [making a donation](http://cryto.net/~joepie91/donate.html)!
|
||||||
|
|
||||||
|
You can donate using Bitcoin, PayPal, Gratipay, Flattr, cash-in-mail, SEPA transfers, and pretty much anything else.
|
||||||
|
|
||||||
|
Bitcoins can also be sent to `1KafMHn6YEDFkUSoHK6pKkqqmfJUF5Wd1C` directly :)
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
Pull requests welcome. Please make sure your modifications are in line with the overall code style, and ensure that you're editing the `.coffee` files, not the `.js` files.
|
||||||
|
|
||||||
|
Build tool of choice is `gulp`; simply run `gulp` while developing, and it will watch for changes.
|
||||||
|
|
||||||
|
Be aware that by making a pull request, you agree to release your modifications under the licenses stated above.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
You can input HTML from any source, but this example uses [`bhttp`](https://www.npmjs.com/package/bhttp) in Promises mode.
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
var hmaProxyParse = require("hma-proxy-parse");
|
||||||
|
var bhttp = require("bhttp");
|
||||||
|
var Promise = require("bluebird");
|
||||||
|
|
||||||
|
Promise.try(function(){
|
||||||
|
return bhttp.get("http://proxylist.hidemyass.com/");
|
||||||
|
}).then(function(response){
|
||||||
|
console.log(hmaProxyParse(response.body.toString()));
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
### hmaProxyParse(html)
|
||||||
|
|
||||||
|
Parses the specified `html`, and returns an array of objects with proxy information. The objects look something like this:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
updateTimestamp: '1422645602',
|
||||||
|
ip: '187.108.223.204',
|
||||||
|
port: '8080',
|
||||||
|
country: 'br',
|
||||||
|
speed: '2441',
|
||||||
|
connectionTime: '235',
|
||||||
|
protocol: 'HTTP',
|
||||||
|
anonymity: 'Low'
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
It doesn't attempt to parse the actual data provided - all data is directly as specified in the list, and you'll have to figure out what to do with it. I have no idea, for example, what the bounds on `speed` or `connectionTime` are, or what the possible options for `anonymity` are.
|
||||||
|
|
||||||
|
If this helped you, don't forget to donate ;)
|
@ -0,0 +1,28 @@
|
|||||||
|
var gulp = require('gulp');
|
||||||
|
|
||||||
|
/* CoffeeScript compile deps */
|
||||||
|
var path = require('path');
|
||||||
|
var gutil = require('gulp-util');
|
||||||
|
var concat = require('gulp-concat');
|
||||||
|
var rename = require('gulp-rename');
|
||||||
|
var coffee = require('gulp-coffee');
|
||||||
|
var cache = require('gulp-cached');
|
||||||
|
var remember = require('gulp-remember');
|
||||||
|
var plumber = require('gulp-plumber');
|
||||||
|
|
||||||
|
var source = ["lib/**/*.coffee", "index.coffee"]
|
||||||
|
|
||||||
|
gulp.task('coffee', function() {
|
||||||
|
return gulp.src(source, {base: "."})
|
||||||
|
.pipe(plumber())
|
||||||
|
.pipe(cache("coffee"))
|
||||||
|
.pipe(coffee({bare: true}).on('error', gutil.log)).on('data', gutil.log)
|
||||||
|
.pipe(remember("coffee"))
|
||||||
|
.pipe(gulp.dest("."));
|
||||||
|
});
|
||||||
|
|
||||||
|
gulp.task('watch', function () {
|
||||||
|
gulp.watch(source, ['coffee']);
|
||||||
|
});
|
||||||
|
|
||||||
|
gulp.task('default', ['coffee', 'watch']);
|
@ -0,0 +1 @@
|
|||||||
|
module.exports = require "./lib/hma-proxy-parse"
|
@ -0,0 +1 @@
|
|||||||
|
module.exports = require("./lib/hma-proxy-parse");
|
@ -0,0 +1,67 @@
|
|||||||
|
cheerio = require "cheerio"
|
||||||
|
|
||||||
|
module.exports = (html) ->
|
||||||
|
junkRegex = /\.([a-zA-Z0-9_-]+){display:none}/g
|
||||||
|
|
||||||
|
junk = []
|
||||||
|
proxies = []
|
||||||
|
|
||||||
|
match = true # Kickstarting the loop...
|
||||||
|
while match
|
||||||
|
match = junkRegex.exec html
|
||||||
|
if match
|
||||||
|
junk.push match[1]
|
||||||
|
|
||||||
|
$ = cheerio.load html
|
||||||
|
|
||||||
|
rows = $("tbody > tr")
|
||||||
|
|
||||||
|
rows.each ->
|
||||||
|
element = $(this)
|
||||||
|
timestamp = element.children(".timestamp").attr("rel")
|
||||||
|
port = element.children("td:nth-of-type(3)").text().replace("\n", "")
|
||||||
|
country = element.find("td .country").parent().attr("rel")
|
||||||
|
speed = element.find("td .progress-indicator").eq(0).attr("value")
|
||||||
|
connectionTime = element.find("td .progress-indicator").eq(1).attr("value")
|
||||||
|
protocol = element.children("td:nth-of-type(7)").text()
|
||||||
|
anonymity = element.children("td:nth-of-type(8)").text()
|
||||||
|
|
||||||
|
ipSegments = []
|
||||||
|
|
||||||
|
ipBlock = element.find("td:nth-of-type(2) > span")
|
||||||
|
|
||||||
|
ipBlock.contents().each ->
|
||||||
|
ipElement = $(this)
|
||||||
|
|
||||||
|
if this.tagName == null
|
||||||
|
if ipElement.text().trim() not in [".", ""]
|
||||||
|
ipSegments.push ipElement.text().trim().replace(".", "")
|
||||||
|
else if this.tagName in ["div", "span"]
|
||||||
|
isJunk = false
|
||||||
|
|
||||||
|
classNames = ipElement.attr("class")?.split(" ")
|
||||||
|
|
||||||
|
if classNames?
|
||||||
|
for className in classNames
|
||||||
|
if className in junk
|
||||||
|
isJunk = true
|
||||||
|
else
|
||||||
|
|
||||||
|
if ipElement.attr("style")?
|
||||||
|
if ipElement.css("display") == "none"
|
||||||
|
isJunk = true
|
||||||
|
|
||||||
|
if not isJunk and ipElement.text().trim() not in [".", ""]
|
||||||
|
ipSegments.push ipElement.text().trim().replace(".", "")
|
||||||
|
|
||||||
|
proxies.push
|
||||||
|
updateTimestamp: timestamp
|
||||||
|
ip: ipSegments.join(".")
|
||||||
|
port: port
|
||||||
|
country: country
|
||||||
|
speed: speed
|
||||||
|
connectionTime: connectionTime
|
||||||
|
protocol: protocol
|
||||||
|
anonymity: anonymity
|
||||||
|
|
||||||
|
return proxies
|
@ -0,0 +1,74 @@
|
|||||||
|
var cheerio,
|
||||||
|
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
|
||||||
|
|
||||||
|
cheerio = require("cheerio");
|
||||||
|
|
||||||
|
module.exports = function(html) {
|
||||||
|
var $, junk, junkRegex, match, proxies, rows;
|
||||||
|
junkRegex = /\.([a-zA-Z0-9_-]+){display:none}/g;
|
||||||
|
junk = [];
|
||||||
|
proxies = [];
|
||||||
|
match = true;
|
||||||
|
while (match) {
|
||||||
|
match = junkRegex.exec(html);
|
||||||
|
if (match) {
|
||||||
|
junk.push(match[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$ = cheerio.load(html);
|
||||||
|
rows = $("tbody > tr");
|
||||||
|
rows.each(function() {
|
||||||
|
var anonymity, connectionTime, country, element, ipBlock, ipSegments, port, protocol, speed, timestamp;
|
||||||
|
element = $(this);
|
||||||
|
timestamp = element.children(".timestamp").attr("rel");
|
||||||
|
port = element.children("td:nth-of-type(3)").text().replace("\n", "");
|
||||||
|
country = element.find("td .country").parent().attr("rel");
|
||||||
|
speed = element.find("td .progress-indicator").eq(0).attr("value");
|
||||||
|
connectionTime = element.find("td .progress-indicator").eq(1).attr("value");
|
||||||
|
protocol = element.children("td:nth-of-type(7)").text();
|
||||||
|
anonymity = element.children("td:nth-of-type(8)").text();
|
||||||
|
ipSegments = [];
|
||||||
|
ipBlock = element.find("td:nth-of-type(2) > span");
|
||||||
|
ipBlock.contents().each(function() {
|
||||||
|
var className, classNames, ipElement, isJunk, _i, _len, _ref, _ref1, _ref2, _ref3;
|
||||||
|
ipElement = $(this);
|
||||||
|
if (this.tagName === null) {
|
||||||
|
if ((_ref = ipElement.text().trim()) !== "." && _ref !== "") {
|
||||||
|
return ipSegments.push(ipElement.text().trim().replace(".", ""));
|
||||||
|
}
|
||||||
|
} else if ((_ref1 = this.tagName) === "div" || _ref1 === "span") {
|
||||||
|
isJunk = false;
|
||||||
|
classNames = (_ref2 = ipElement.attr("class")) != null ? _ref2.split(" ") : void 0;
|
||||||
|
if (classNames != null) {
|
||||||
|
for (_i = 0, _len = classNames.length; _i < _len; _i++) {
|
||||||
|
className = classNames[_i];
|
||||||
|
if (__indexOf.call(junk, className) >= 0) {
|
||||||
|
isJunk = true;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ipElement.attr("style") != null) {
|
||||||
|
if (ipElement.css("display") === "none") {
|
||||||
|
isJunk = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!isJunk && ((_ref3 = ipElement.text().trim()) !== "." && _ref3 !== "")) {
|
||||||
|
return ipSegments.push(ipElement.text().trim().replace(".", ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return proxies.push({
|
||||||
|
updateTimestamp: timestamp,
|
||||||
|
ip: ipSegments.join("."),
|
||||||
|
port: port,
|
||||||
|
country: country,
|
||||||
|
speed: speed,
|
||||||
|
connectionTime: connectionTime,
|
||||||
|
protocol: protocol,
|
||||||
|
anonymity: anonymity
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return proxies;
|
||||||
|
};
|
@ -0,0 +1,37 @@
|
|||||||
|
{
|
||||||
|
"name": "hma-proxy-parse",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Parses proxies out of HideMyAss' public proxy list.",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "git://github.com/joepie91/hma-proxy-parse"
|
||||||
|
},
|
||||||
|
"keywords": [
|
||||||
|
"hidemyass",
|
||||||
|
"proxy",
|
||||||
|
"parser"
|
||||||
|
],
|
||||||
|
"author": "Sven Slootweg",
|
||||||
|
"license": "WTFPL",
|
||||||
|
"dependencies": {
|
||||||
|
"cheerio": "^0.18.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"bhttp": "^1.0.2",
|
||||||
|
"bluebird": "^2.9.4",
|
||||||
|
"gulp": "~3.8.0",
|
||||||
|
"gulp-cached": "~0.0.3",
|
||||||
|
"gulp-coffee": "~2.0.1",
|
||||||
|
"gulp-concat": "~2.2.0",
|
||||||
|
"gulp-livereload": "~2.1.0",
|
||||||
|
"gulp-nodemon": "~1.0.4",
|
||||||
|
"gulp-plumber": "~0.6.3",
|
||||||
|
"gulp-remember": "~0.2.0",
|
||||||
|
"gulp-rename": "~1.2.0",
|
||||||
|
"gulp-util": "~2.2.17"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
hmaProxyParse = require "./"
|
||||||
|
bhttp = require "bhttp"
|
||||||
|
Promise = require "bluebird"
|
||||||
|
|
||||||
|
Promise.try ->
|
||||||
|
bhttp.get "http://proxylist.hidemyass.com/"
|
||||||
|
.then (response) ->
|
||||||
|
console.log hmaProxyParse(response.body.toString())
|
Loading…
Reference in New Issue