master
Sven Slootweg 9 years ago
parent 30a9f41d72
commit 13fcb0a7cc

3
.gitignore vendored

@ -1,3 +1,4 @@
# https://git-scm.com/docs/gitignore
# https://help.github.com/articles/ignoring-files
# Example .gitignore files: https://github.com/github/gitignore
# Example .gitignore files: https://github.com/github/gitignore
/node_modules/

@ -0,0 +1,66 @@
# hma-proxy-parse
Good game, HideMyAss, but I win :)
This module extracts (parses) proxy details including the IP address from [http://proxylist.hidemyass.com/](http://proxylist.hidemyass.com/) and anything else using the same system, despite their (rather heavy) CSS-based obfuscation.
Note that this module only does the parsing; you're responsible for fetching the source HTML yourself.
## License
[WTFPL](http://www.wtfpl.net/txt/copying/) or [CC0](https://creativecommons.org/publicdomain/zero/1.0/), whichever you prefer. A donation and/or attribution are appreciated, but not required.
## Donate
My income consists entirely of donations for my projects. If this module is useful to you, consider [making a donation](http://cryto.net/~joepie91/donate.html)!
You can donate using Bitcoin, PayPal, Gratipay, Flattr, cash-in-mail, SEPA transfers, and pretty much anything else.
Bitcoins can also be sent to `1KafMHn6YEDFkUSoHK6pKkqqmfJUF5Wd1C` directly :)
## Contributing
Pull requests welcome. Please make sure your modifications are in line with the overall code style, and ensure that you're editing the `.coffee` files, not the `.js` files.
Build tool of choice is `gulp`; simply run `gulp` while developing, and it will watch for changes.
Be aware that by making a pull request, you agree to release your modifications under the licenses stated above.
## Usage
You can input HTML from any source, but this example uses [`bhttp`](https://www.npmjs.com/package/bhttp) in Promises mode.
```javascript
var hmaProxyParse = require("hma-proxy-parse");
var bhttp = require("bhttp");
var Promise = require("bluebird");
Promise.try(function(){
return bhttp.get("http://proxylist.hidemyass.com/");
}).then(function(response){
console.log(hmaProxyParse(response.body.toString()));
});
```
## API
### hmaProxyParse(html)
Parses the specified `html`, and returns an array of objects with proxy information. The objects look something like this:
```javascript
{
updateTimestamp: '1422645602',
ip: '187.108.223.204',
port: '8080',
country: 'br',
speed: '2441',
connectionTime: '235',
protocol: 'HTTP',
anonymity: 'Low'
}
```
It doesn't attempt to parse the actual data provided - all data is directly as specified in the list, and you'll have to figure out what to do with it. I have no idea, for example, what the bounds on `speed` or `connectionTime` are, or what the possible options for `anonymity` are.
If this helped you, don't forget to donate ;)

@ -0,0 +1,28 @@
var gulp = require('gulp');
/* CoffeeScript compile deps */
var path = require('path');
var gutil = require('gulp-util');
var concat = require('gulp-concat');
var rename = require('gulp-rename');
var coffee = require('gulp-coffee');
var cache = require('gulp-cached');
var remember = require('gulp-remember');
var plumber = require('gulp-plumber');
var source = ["lib/**/*.coffee", "index.coffee"]
gulp.task('coffee', function() {
return gulp.src(source, {base: "."})
.pipe(plumber())
.pipe(cache("coffee"))
.pipe(coffee({bare: true}).on('error', gutil.log)).on('data', gutil.log)
.pipe(remember("coffee"))
.pipe(gulp.dest("."));
});
gulp.task('watch', function () {
gulp.watch(source, ['coffee']);
});
gulp.task('default', ['coffee', 'watch']);

@ -0,0 +1 @@
module.exports = require "./lib/hma-proxy-parse"

@ -0,0 +1 @@
module.exports = require("./lib/hma-proxy-parse");

@ -0,0 +1,67 @@
cheerio = require "cheerio"
module.exports = (html) ->
junkRegex = /\.([a-zA-Z0-9_-]+){display:none}/g
junk = []
proxies = []
match = true # Kickstarting the loop...
while match
match = junkRegex.exec html
if match
junk.push match[1]
$ = cheerio.load html
rows = $("tbody > tr")
rows.each ->
element = $(this)
timestamp = element.children(".timestamp").attr("rel")
port = element.children("td:nth-of-type(3)").text().replace("\n", "")
country = element.find("td .country").parent().attr("rel")
speed = element.find("td .progress-indicator").eq(0).attr("value")
connectionTime = element.find("td .progress-indicator").eq(1).attr("value")
protocol = element.children("td:nth-of-type(7)").text()
anonymity = element.children("td:nth-of-type(8)").text()
ipSegments = []
ipBlock = element.find("td:nth-of-type(2) > span")
ipBlock.contents().each ->
ipElement = $(this)
if this.tagName == null
if ipElement.text().trim() not in [".", ""]
ipSegments.push ipElement.text().trim().replace(".", "")
else if this.tagName in ["div", "span"]
isJunk = false
classNames = ipElement.attr("class")?.split(" ")
if classNames?
for className in classNames
if className in junk
isJunk = true
else
if ipElement.attr("style")?
if ipElement.css("display") == "none"
isJunk = true
if not isJunk and ipElement.text().trim() not in [".", ""]
ipSegments.push ipElement.text().trim().replace(".", "")
proxies.push
updateTimestamp: timestamp
ip: ipSegments.join(".")
port: port
country: country
speed: speed
connectionTime: connectionTime
protocol: protocol
anonymity: anonymity
return proxies

@ -0,0 +1,74 @@
var cheerio,
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
cheerio = require("cheerio");
module.exports = function(html) {
var $, junk, junkRegex, match, proxies, rows;
junkRegex = /\.([a-zA-Z0-9_-]+){display:none}/g;
junk = [];
proxies = [];
match = true;
while (match) {
match = junkRegex.exec(html);
if (match) {
junk.push(match[1]);
}
}
$ = cheerio.load(html);
rows = $("tbody > tr");
rows.each(function() {
var anonymity, connectionTime, country, element, ipBlock, ipSegments, port, protocol, speed, timestamp;
element = $(this);
timestamp = element.children(".timestamp").attr("rel");
port = element.children("td:nth-of-type(3)").text().replace("\n", "");
country = element.find("td .country").parent().attr("rel");
speed = element.find("td .progress-indicator").eq(0).attr("value");
connectionTime = element.find("td .progress-indicator").eq(1).attr("value");
protocol = element.children("td:nth-of-type(7)").text();
anonymity = element.children("td:nth-of-type(8)").text();
ipSegments = [];
ipBlock = element.find("td:nth-of-type(2) > span");
ipBlock.contents().each(function() {
var className, classNames, ipElement, isJunk, _i, _len, _ref, _ref1, _ref2, _ref3;
ipElement = $(this);
if (this.tagName === null) {
if ((_ref = ipElement.text().trim()) !== "." && _ref !== "") {
return ipSegments.push(ipElement.text().trim().replace(".", ""));
}
} else if ((_ref1 = this.tagName) === "div" || _ref1 === "span") {
isJunk = false;
classNames = (_ref2 = ipElement.attr("class")) != null ? _ref2.split(" ") : void 0;
if (classNames != null) {
for (_i = 0, _len = classNames.length; _i < _len; _i++) {
className = classNames[_i];
if (__indexOf.call(junk, className) >= 0) {
isJunk = true;
} else {
}
}
}
if (ipElement.attr("style") != null) {
if (ipElement.css("display") === "none") {
isJunk = true;
}
}
if (!isJunk && ((_ref3 = ipElement.text().trim()) !== "." && _ref3 !== "")) {
return ipSegments.push(ipElement.text().trim().replace(".", ""));
}
}
});
return proxies.push({
updateTimestamp: timestamp,
ip: ipSegments.join("."),
port: port,
country: country,
speed: speed,
connectionTime: connectionTime,
protocol: protocol,
anonymity: anonymity
});
});
return proxies;
};

@ -0,0 +1,37 @@
{
"name": "hma-proxy-parse",
"version": "1.0.0",
"description": "Parses proxies out of HideMyAss' public proxy list.",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git://github.com/joepie91/hma-proxy-parse"
},
"keywords": [
"hidemyass",
"proxy",
"parser"
],
"author": "Sven Slootweg",
"license": "WTFPL",
"dependencies": {
"cheerio": "^0.18.0"
},
"devDependencies": {
"bhttp": "^1.0.2",
"bluebird": "^2.9.4",
"gulp": "~3.8.0",
"gulp-cached": "~0.0.3",
"gulp-coffee": "~2.0.1",
"gulp-concat": "~2.2.0",
"gulp-livereload": "~2.1.0",
"gulp-nodemon": "~1.0.4",
"gulp-plumber": "~0.6.3",
"gulp-remember": "~0.2.0",
"gulp-rename": "~1.2.0",
"gulp-util": "~2.2.17"
}
}

@ -0,0 +1,8 @@
hmaProxyParse = require "./"
bhttp = require "bhttp"
Promise = require "bluebird"
Promise.try ->
bhttp.get "http://proxylist.hidemyass.com/"
.then (response) ->
console.log hmaProxyParse(response.body.toString())
Loading…
Cancel
Save