v1.0.0
This commit is contained in:
parent
30a9f41d72
commit
13fcb0a7cc
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
# https://git-scm.com/docs/gitignore
|
||||
# https://help.github.com/articles/ignoring-files
|
||||
# Example .gitignore files: https://github.com/github/gitignore
|
||||
# Example .gitignore files: https://github.com/github/gitignore
|
||||
/node_modules/
|
||||
|
|
66
README.md
Normal file
66
README.md
Normal file
|
@ -0,0 +1,66 @@
|
|||
# hma-proxy-parse
|
||||
|
||||
Good game, HideMyAss, but I win :)
|
||||
|
||||
This module extracts (parses) proxy details including the IP address from [http://proxylist.hidemyass.com/](http://proxylist.hidemyass.com/) and anything else using the same system, despite their (rather heavy) CSS-based obfuscation.
|
||||
|
||||
Note that this module only does the parsing; you're responsible for fetching the source HTML yourself.
|
||||
|
||||
## License
|
||||
|
||||
[WTFPL](http://www.wtfpl.net/txt/copying/) or [CC0](https://creativecommons.org/publicdomain/zero/1.0/), whichever you prefer. A donation and/or attribution are appreciated, but not required.
|
||||
|
||||
## Donate
|
||||
|
||||
My income consists entirely of donations for my projects. If this module is useful to you, consider [making a donation](http://cryto.net/~joepie91/donate.html)!
|
||||
|
||||
You can donate using Bitcoin, PayPal, Gratipay, Flattr, cash-in-mail, SEPA transfers, and pretty much anything else.
|
||||
|
||||
Bitcoins can also be sent to `1KafMHn6YEDFkUSoHK6pKkqqmfJUF5Wd1C` directly :)
|
||||
|
||||
## Contributing
|
||||
|
||||
Pull requests welcome. Please make sure your modifications are in line with the overall code style, and ensure that you're editing the `.coffee` files, not the `.js` files.
|
||||
|
||||
Build tool of choice is `gulp`; simply run `gulp` while developing, and it will watch for changes.
|
||||
|
||||
Be aware that by making a pull request, you agree to release your modifications under the licenses stated above.
|
||||
|
||||
## Usage
|
||||
|
||||
You can input HTML from any source, but this example uses [`bhttp`](https://www.npmjs.com/package/bhttp) in Promises mode.
|
||||
|
||||
```javascript
|
||||
var hmaProxyParse = require("hma-proxy-parse");
|
||||
var bhttp = require("bhttp");
|
||||
var Promise = require("bluebird");
|
||||
|
||||
Promise.try(function(){
|
||||
return bhttp.get("http://proxylist.hidemyass.com/");
|
||||
}).then(function(response){
|
||||
console.log(hmaProxyParse(response.body.toString()));
|
||||
});
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
### hmaProxyParse(html)
|
||||
|
||||
Parses the specified `html`, and returns an array of objects with proxy information. The objects look something like this:
|
||||
|
||||
```javascript
|
||||
{
|
||||
updateTimestamp: '1422645602',
|
||||
ip: '187.108.223.204',
|
||||
port: '8080',
|
||||
country: 'br',
|
||||
speed: '2441',
|
||||
connectionTime: '235',
|
||||
protocol: 'HTTP',
|
||||
anonymity: 'Low'
|
||||
}
|
||||
```
|
||||
|
||||
It doesn't attempt to parse the actual data provided - all data is directly as specified in the list, and you'll have to figure out what to do with it. I have no idea, for example, what the bounds on `speed` or `connectionTime` are, or what the possible options for `anonymity` are.
|
||||
|
||||
If this helped you, don't forget to donate ;)
|
28
gulpfile.js
Normal file
28
gulpfile.js
Normal file
|
@ -0,0 +1,28 @@
|
|||
var gulp = require('gulp');
|
||||
|
||||
/* CoffeeScript compile deps */
|
||||
var path = require('path');
|
||||
var gutil = require('gulp-util');
|
||||
var concat = require('gulp-concat');
|
||||
var rename = require('gulp-rename');
|
||||
var coffee = require('gulp-coffee');
|
||||
var cache = require('gulp-cached');
|
||||
var remember = require('gulp-remember');
|
||||
var plumber = require('gulp-plumber');
|
||||
|
||||
var source = ["lib/**/*.coffee", "index.coffee"]
|
||||
|
||||
gulp.task('coffee', function() {
|
||||
return gulp.src(source, {base: "."})
|
||||
.pipe(plumber())
|
||||
.pipe(cache("coffee"))
|
||||
.pipe(coffee({bare: true}).on('error', gutil.log)).on('data', gutil.log)
|
||||
.pipe(remember("coffee"))
|
||||
.pipe(gulp.dest("."));
|
||||
});
|
||||
|
||||
gulp.task('watch', function () {
|
||||
gulp.watch(source, ['coffee']);
|
||||
});
|
||||
|
||||
gulp.task('default', ['coffee', 'watch']);
|
1
index.coffee
Normal file
1
index.coffee
Normal file
|
@ -0,0 +1 @@
|
|||
module.exports = require "./lib/hma-proxy-parse"
|
67
lib/hma-proxy-parse.coffee
Normal file
67
lib/hma-proxy-parse.coffee
Normal file
|
@ -0,0 +1,67 @@
|
|||
cheerio = require "cheerio"
|
||||
|
||||
module.exports = (html) ->
|
||||
junkRegex = /\.([a-zA-Z0-9_-]+){display:none}/g
|
||||
|
||||
junk = []
|
||||
proxies = []
|
||||
|
||||
match = true # Kickstarting the loop...
|
||||
while match
|
||||
match = junkRegex.exec html
|
||||
if match
|
||||
junk.push match[1]
|
||||
|
||||
$ = cheerio.load html
|
||||
|
||||
rows = $("tbody > tr")
|
||||
|
||||
rows.each ->
|
||||
element = $(this)
|
||||
timestamp = element.children(".timestamp").attr("rel")
|
||||
port = element.children("td:nth-of-type(3)").text().replace("\n", "")
|
||||
country = element.find("td .country").parent().attr("rel")
|
||||
speed = element.find("td .progress-indicator").eq(0).attr("value")
|
||||
connectionTime = element.find("td .progress-indicator").eq(1).attr("value")
|
||||
protocol = element.children("td:nth-of-type(7)").text()
|
||||
anonymity = element.children("td:nth-of-type(8)").text()
|
||||
|
||||
ipSegments = []
|
||||
|
||||
ipBlock = element.find("td:nth-of-type(2) > span")
|
||||
|
||||
ipBlock.contents().each ->
|
||||
ipElement = $(this)
|
||||
|
||||
if this.tagName == null
|
||||
if ipElement.text().trim() not in [".", ""]
|
||||
ipSegments.push ipElement.text().trim().replace(".", "")
|
||||
else if this.tagName in ["div", "span"]
|
||||
isJunk = false
|
||||
|
||||
classNames = ipElement.attr("class")?.split(" ")
|
||||
|
||||
if classNames?
|
||||
for className in classNames
|
||||
if className in junk
|
||||
isJunk = true
|
||||
else
|
||||
|
||||
if ipElement.attr("style")?
|
||||
if ipElement.css("display") == "none"
|
||||
isJunk = true
|
||||
|
||||
if not isJunk and ipElement.text().trim() not in [".", ""]
|
||||
ipSegments.push ipElement.text().trim().replace(".", "")
|
||||
|
||||
proxies.push
|
||||
updateTimestamp: timestamp
|
||||
ip: ipSegments.join(".")
|
||||
port: port
|
||||
country: country
|
||||
speed: speed
|
||||
connectionTime: connectionTime
|
||||
protocol: protocol
|
||||
anonymity: anonymity
|
||||
|
||||
return proxies
|
74
lib/hma-proxy-parse.js
Normal file
74
lib/hma-proxy-parse.js
Normal file
|
@ -0,0 +1,74 @@
|
|||
var cheerio,
|
||||
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
|
||||
|
||||
cheerio = require("cheerio");
|
||||
|
||||
module.exports = function(html) {
|
||||
var $, junk, junkRegex, match, proxies, rows;
|
||||
junkRegex = /\.([a-zA-Z0-9_-]+){display:none}/g;
|
||||
junk = [];
|
||||
proxies = [];
|
||||
match = true;
|
||||
while (match) {
|
||||
match = junkRegex.exec(html);
|
||||
if (match) {
|
||||
junk.push(match[1]);
|
||||
}
|
||||
}
|
||||
$ = cheerio.load(html);
|
||||
rows = $("tbody > tr");
|
||||
rows.each(function() {
|
||||
var anonymity, connectionTime, country, element, ipBlock, ipSegments, port, protocol, speed, timestamp;
|
||||
element = $(this);
|
||||
timestamp = element.children(".timestamp").attr("rel");
|
||||
port = element.children("td:nth-of-type(3)").text().replace("\n", "");
|
||||
country = element.find("td .country").parent().attr("rel");
|
||||
speed = element.find("td .progress-indicator").eq(0).attr("value");
|
||||
connectionTime = element.find("td .progress-indicator").eq(1).attr("value");
|
||||
protocol = element.children("td:nth-of-type(7)").text();
|
||||
anonymity = element.children("td:nth-of-type(8)").text();
|
||||
ipSegments = [];
|
||||
ipBlock = element.find("td:nth-of-type(2) > span");
|
||||
ipBlock.contents().each(function() {
|
||||
var className, classNames, ipElement, isJunk, _i, _len, _ref, _ref1, _ref2, _ref3;
|
||||
ipElement = $(this);
|
||||
if (this.tagName === null) {
|
||||
if ((_ref = ipElement.text().trim()) !== "." && _ref !== "") {
|
||||
return ipSegments.push(ipElement.text().trim().replace(".", ""));
|
||||
}
|
||||
} else if ((_ref1 = this.tagName) === "div" || _ref1 === "span") {
|
||||
isJunk = false;
|
||||
classNames = (_ref2 = ipElement.attr("class")) != null ? _ref2.split(" ") : void 0;
|
||||
if (classNames != null) {
|
||||
for (_i = 0, _len = classNames.length; _i < _len; _i++) {
|
||||
className = classNames[_i];
|
||||
if (__indexOf.call(junk, className) >= 0) {
|
||||
isJunk = true;
|
||||
} else {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ipElement.attr("style") != null) {
|
||||
if (ipElement.css("display") === "none") {
|
||||
isJunk = true;
|
||||
}
|
||||
}
|
||||
if (!isJunk && ((_ref3 = ipElement.text().trim()) !== "." && _ref3 !== "")) {
|
||||
return ipSegments.push(ipElement.text().trim().replace(".", ""));
|
||||
}
|
||||
}
|
||||
});
|
||||
return proxies.push({
|
||||
updateTimestamp: timestamp,
|
||||
ip: ipSegments.join("."),
|
||||
port: port,
|
||||
country: country,
|
||||
speed: speed,
|
||||
connectionTime: connectionTime,
|
||||
protocol: protocol,
|
||||
anonymity: anonymity
|
||||
});
|
||||
});
|
||||
return proxies;
|
||||
};
|
37
package.json
Normal file
37
package.json
Normal file
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"name": "hma-proxy-parse",
|
||||
"version": "1.0.0",
|
||||
"description": "Parses proxies out of HideMyAss' public proxy list.",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://github.com/joepie91/hma-proxy-parse"
|
||||
},
|
||||
"keywords": [
|
||||
"hidemyass",
|
||||
"proxy",
|
||||
"parser"
|
||||
],
|
||||
"author": "Sven Slootweg",
|
||||
"license": "WTFPL",
|
||||
"dependencies": {
|
||||
"cheerio": "^0.18.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"bhttp": "^1.0.2",
|
||||
"bluebird": "^2.9.4",
|
||||
"gulp": "~3.8.0",
|
||||
"gulp-cached": "~0.0.3",
|
||||
"gulp-coffee": "~2.0.1",
|
||||
"gulp-concat": "~2.2.0",
|
||||
"gulp-livereload": "~2.1.0",
|
||||
"gulp-nodemon": "~1.0.4",
|
||||
"gulp-plumber": "~0.6.3",
|
||||
"gulp-remember": "~0.2.0",
|
||||
"gulp-rename": "~1.2.0",
|
||||
"gulp-util": "~2.2.17"
|
||||
}
|
||||
}
|
8
test.coffee
Normal file
8
test.coffee
Normal file
|
@ -0,0 +1,8 @@
|
|||
hmaProxyParse = require "./"
|
||||
bhttp = require "bhttp"
|
||||
Promise = require "bluebird"
|
||||
|
||||
Promise.try ->
|
||||
bhttp.get "http://proxylist.hidemyass.com/"
|
||||
.then (response) ->
|
||||
console.log hmaProxyParse(response.body.toString())
|
Loading…
Reference in a new issue