This addon comes with subscription lists, which are basically collections of regular expressions compiled by others, which are designed to cover most of the advertisement sites and scripts and block them. However, none of these lists can cover all of the ads and ad scripts out there, so you'll occasionally have to add your own rules. This is easy: You click on the ABP icon, in my case it lives in the status bar, and then click "Open blockable items".
If there's anything in the list which you suspect should be blocked, you select by right-click one of the addresses listed, which were loaded by your browser when you visited the web page you're looking at, and you choose "Block this item". You can then define the custom rule you want to add and how broad it should be.
Recently, this caught my attention:
- Code: Select all
http://input.insights.gravity.com/pigeons/v2/moth_103_packed.js
So I selected it, and opted to block it, adding this broad regular expression, covering the entire domain:
- Code: Select all
gravity.com^
Yet, somehow, my curiosity was aroused and I looked at the source. I noticed it was obfuscated or minified, as most JavaScript ad scripts are. Yet, it looked somewhat recognizable still, moreover, rather simple, so I decided to take upon myself to analyze it. Here I present the results, and you'll soon find out why.
First, it appears that freeforums.org collaborates with this advertiser/data miner, by offering variables defined inline before the script is called. The way this works is that freeforums.org knows about these variables on the server side, so the PHP script dynamically inserts it into inline JavaScript in order to pass it to the ad/data mine script from gravity.com.
An example, lifted from the source while viewing a random thread:
- Code: Select all
<script>
var gravityInsightsParams = new Object();
gravityInsightsParams.site_guid = 'f783045be20e5e1771b292d806ed6e1a';
gravityInsightsParams.thread_id = 416;
gravityInsightsParams.post_id = 0;
gravityInsightsParams.forum_id = 53;
gravityInsightsParams.user_id = 132;
gravityInsightsParams.username = 'snowcrash';
gravityInsightsParams.post_title = '';
gravityInsightsParams.thread_title = 'WTC1+Impact+-+Explosion+at+75%2F76+MER';
gravityInsightsParams.forum_title = 'WTC1+and+WTC2+-+Impacts';
gravityInsightsParams.board = 'the911forum';
document.write(unescape('%3Cscript src=\'http://input.insights.gravity.com/pigeons/v2/moth_103_packed.js\' type=\'text/javascript\'%3E%3C/script%3E'));
</script>
The last line writes the following to the HTML DOM source, right behind the last line above:
- Code: Select all
<script src="http://input.insights.gravity.com/pigeons/v2/moth_103_packed.js" type="text/javascript"></script>
This is the script I blocked. My first step was to de-minify/deobfuscate this script so I could properly study the source:
- Code: Select all
var GravityInsights = (
function(p){
endpoint = 'http://input.insights.gravity.com';
referrer : '';
site_guid : '';
thread_id : 0;
user_guid : '';
function go_amir_go(){
user_guid = checkForCookie();
if(user_guid){
remote(user_guid);
}
}
function remote(ug){
q = getParams(ug);
i = new Image();
i.src = endpoint + "/pigeons/v2/capture.php?" + q
}
function getParams(ug){
var params='';
params += "site_guid=" + p.site_guid;
params += "&action=beacon";
params += "&user_guid=" + ug;
params += "&referrer=" + escape(document.referrer);
params += "&thread_id=" + p.thread_id;
params += "&href=" + escape(location.href);
params += "&OS=" + escape(getOs());
params += "&post_id=" + p.post_id;
params += "&forum_id=" + p.forum_id;
params += "&user_id=" + p.user_id;
params += "&user_name=" + escape(p.username);
params += "&post_title=" + escape(p.post_title);
params += "&thread_title=" + escape(p.thread_title);
params += "&forum_title=" + escape(p.forum_title);
if (p.board !== undefined) {
params += "&board=" + escape(p.board);
}
else {
params += "&board=''";
}
return params;
}
function checkForCookie(){
val = readCookie('grvinsights');
if (val == ''){
remoteSetCookie();
return false;
}
else {
return val;
}
}
function remoteSetCookie(){
p = '?u=' + p.user_id + '&sg=' + p.site_guid
document.write(unescape('%3Cscript src=\'' + endpoint + '/pigeons/v2/moth_setter.php' +
p + '\' type=\'text/javascript\'%3E%3C/script%3E'));
}
function readCookie(cookieName){
var theCookie = "" + document.cookie;
var ind = theCookie.indexOf(cookieName);
if (ind == -1 || cookieName == "") return "";
var ind1 = theCookie.indexOf(';',ind);
if (ind1==-1) ind1 = theCookie.length;
return unescape(theCookie.substring(ind+cookieName.length+1, ind1));
}
function getOs(){
var OSName = "Unknown OS";
if (navigator.appVersion.indexOf("Win") != -1) OSName="Windows";
if (navigator.appVersion.indexOf("Mac") != -1) OSName="MacOS";
if (navigator.appVersion.indexOf("X11") != -1) OSName="UNIX";
if (navigator.appVersion.indexOf("Linux") != -1) OSName="Linux";
return OSName;
}
go_amir_go();
return {
cc : function(n,v){
if (n != '') {
var date = new Date();
date.setTime(date.getTime() + (500*24*60*60*1000));
var expires = "; expires = " + date.toGMTString();
document.cookie = n + "=" + v + expires + "; path=/";
remote(v);
}
}
}
}
)(gravityInsightsParams);
I used various addons to help me out, such as Web Developer and HttpFox
This is my code flow analysis, in pseudocode & commentary:
- Code: Select all
* Board php inserts parameters (e.g. username, forum_title) into generated inline JavaScript
* Inline JavaScript writes <script src='http://input.insights.gravity.com/pigeons/v2/moth_103_packed.js' type='text/javascript'></script>
to HTML
* moth_103_packed.js runs, passing parameter block 'gravityInsightsParams' into anonymous function as variable 'p'
* constants 'endpoint', 'referrer', 'site_guid', 'thread_id' and 'user_guid' are set
* functions are defined
* go_amir_go() is invoked:
* function checkForCookie() is invoked:
* Cookie 'grvinsights' is read (helper function readCookie(cookieName), left out for conciseness)
* if cookie isn't found, follow (A), if it is, skip to (B)
(A)
* remoteSetCookie() is invoked:
* Query string is constructed from object p with p.user_id and p.site_guid
* JavaScripts writes (didn't bother to get quote escaping functionally right for clarity):
<script src='endpoint + '/pigeons/v2/moth_setter.php' + query string + ' type='text/javascript'></script>
Which amounts to:
<script src='http://input.insights.gravity.com/pigeons/v2/moth_setter.php?u=132&sg=f783045be20e5e1771b292d806ed6e1a' type='text/javascript'></script>
...to HTML, right behind the insert of the <script src='... moth_103_packed.js' ...></script>
When this script is loaded, the remote server sets another cookie (line break added after each semicolon for clarity):
Set-Cookie vaguid=d477b4ef51c0f863ed8aa876897a0f24;
expires=Sat, 21-Nov-2020 00:24:05 GMT;
path=/;
domain=.gravity.com
* remoteSetCookie returns
* checkForCookie returns with return value set to boolean 'false'
* Go to (C)
(B)
* checkForCookie returns with return value set to cookie 'grvinsights'
(C)
... inside go_amir_go() ....
* If there is a cookie 'grvinsights' invoke remote() with constant 'user_guid' as parameter, if not, skip to (D):
(E)
* Function remote() gets constant 'user_guid' as parameter 'ug' and invokes function getParams() with this parameter:
* Function getParams constructs a query string from the following (strings are all escaped by algorhythm):
- site_guid = p.site_guid = ..see top, not the constant, and is 'f783045be20e5e1771b292d806ed6e1a'
- action = beacon (a small 1px image designed not to be visible and to track user data)
- user_guid = ug
- referrer = document.referrer (JavaScript API call)
- thread_id = p. thread_id = ..see top, not the constant, and is '0'
- href = location.href (JavaScript API call, return value is escaped here)
- OS = result from function getOS = Windows
- post_id = p.post_id = ..see top, and is '0'
- forum_id = p.forum.id = ..see top, and is '36'
- user_id = p.user_id = ..see top, and is '132' (Me, snowcrash)
- user_name = p.user_name = ..see top, and is 'snowcrash'
- post_title = p.post_title = ..see top, and is ''
- thread_title = p.thread_title = ..see top, and is ''
- forum_title = p.forum_title = ..see top, and is 'Scientific+and+Technical+Forums'
- board = checks if p.board is defined, if so, ..see top, and is 'the911forum', if not, is ''
- Returns this query string to remote()
... inside remote() again ...
* remote() catches query string as variable q
BEACON GIF
* Creates a new image with url 'endpoint' + "/pigeons/v2/capture.php?" + q
This ultimately results in, for example, a GET request such as:
http://input.insights.gravity.com/pigeons/v2/capture.php?site_guid=f783045be20e5e1771b292d806ed6e1a&action=beacon&user_guid=8737e654f09745b245d9774e9a6d7c36&referrer=http%3A//the911forum.freeforums.org/index.php%3Fsid%3D3bdd679562ee7f31095d40555ec5707c&thread_id=145&href=http%3A//the911forum.freeforums.org/post13750.html%23p13750&OS=Windows&post_id=0&forum_id=56&user_id=132&user_name=snowcrash&post_title=&thread_title=Crush-down+models&forum_title=WTC1+and+WTC2+-+Collapse+Progression&board=the911forum
I.e.:
site_guid f783045be20e5e1771b292d806ed6e1a
action beacon
user_guid 8737e654f09745b245d9774e9a6d7c36
referrer http://the911forum.freeforums.org/index.php?sid=3bdd679562ee7f31095d40555ec5707c
thread_id 145
href http://the911forum.freeforums.org/post13750.html#p13750
OS Windows
post_id 0
forum_id 56
user_id 132
user_name snowcrash
post_title
thread_title Crush-down models
forum_title WTC1 and WTC2 - Collapse Progression
board the911forum
* remote() returns
(D)
* go_amir_go() returns
* anonymous function returns, defining function cc(n,v) ... to call it, use n = cookie var name, like 'grvinsights', v = guid, like '8737e654f09745b245d9774e9a6d7c36'
--- END OF CODE FLOW ----
* Starting from scratch, with no cookies:
* Chronologically, first the moth_setter script is seen by the browser, which seems to have the function of setting a cookie. It is loaded, and returns: (Second parameter is a GUID and may/will vary)
GravityInsights.cc('grvinsights', '8737e654f09745b245d9774e9a6d7c36');
* This command is executed, running the function cc, an acronym which may mean "Create Cookie".
* The cookie is named 'grvinsights', created for the domain the911forum.freeforums.org and is set to expire in 500 days.
E.g.:
Name grvinsights
Value 35d88931687108d983822eae46db139e
Host the911forum.freeforums.org
Path /
Secure No
Expires Fri, 06 Apr 2012 23:55:56 GMT
The other cookie, 'vaguid', bound to the domain .gravity.com expires in 10 years...
* cc then essentially jumps to (E) by invoking remote() with the user GUID
* Then, chronologically, the beacon GIF is loaded. See above, 'BEACON GIF'
The way this works is pretty perverse: the image never shows in the HTML DOM source, i.e.
Web Developer -> View Source -> View Generated Source
This is called 'image preloading' and the image is invisible until it is explicitly 'shown' by code action in the DOM source. Obviously, in this case this never happens, so you are oblivious to all this activity unless you run a sniffer, like I did.
This is a sniffer trace, edited for privacy (Browser user agent) and patched together from two sniffs, so GUIDs and query strings won't match. This is just to give you a general idea of the traffic back and forth:
- Code: Select all
00:19:07.431 0.386 419 2343 GET 200 application/javascript http://input.insights.gravity.com/pigeons/v2/moth_103_packed.js
Request Header:
(Request-Line) GET /pigeons/v2/moth_103_packed.js HTTP/1.1
Host input.insights.gravity.com
User-Agent Mozilla/5.0 (Windows; U; Windows NT x.x; en-US; rv:x.x.x.x) Gecko/xxxxxxxx Firefox/x.x.x
Accept */*
Accept-Language en-us,en;q=0.5
Accept-Encoding gzip,deflate
Accept-Charset ISO-8859-1,utf-8;q=0.7,*;q=0.7
Keep-Alive 115
Connection keep-alive
Referer http://the911forum.freeforums.org/viewtopic.php?f=4&t=439
Response Header:
(Status-Line) HTTP/1.1 200 OK
Date Wed, 24 Nov 2010 00:24:05 GMT
Server Apache
Last-Modified Mon, 25 Oct 2010 19:10:29 GMT
Etag "92c121-820-49375bed61340"
Accept-Ranges bytes
Content-Length 2080
Cache-Control max-age=86400
Expires Thu, 25 Nov 2010 00:24:05 GMT
Keep-Alive timeout=15, max=34
Connection Keep-Alive
Content-Type application/javascript
Cookie:
None
Query String:
None
Post Data:
None
Content:
...The script...
***
00:19:07.818 0.178 456 282 GET 200 text/html http://input.insights.gravity.com/pigeons/v2/moth_setter.php?u=1&sg=f783045be20e5e1771b292d806ed6e1a
Request Header:
(Request-Line) GET /pigeons/v2/moth_setter.php?u=1&sg=f783045be20e5e1771b292d806ed6e1a HTTP/1.1
Host input.insights.gravity.com
User-Agent Mozilla/5.0 (Windows; U; Windows NT x.x; en-US; rv:x.x.x.x) Gecko/xxxxxxxx Firefox/x.x.x
Accept */*
Accept-Language en-us,en;q=0.5
Accept-Encoding gzip,deflate
Accept-Charset ISO-8859-1,utf-8;q=0.7,*;q=0.7
Keep-Alive 115
Connection keep-alive
Referer http://the911forum.freeforums.org/viewtopic.php?f=4&t=439
Response Header:
(Status-Line) HTTP/1.1 200 OK
Date Wed, 24 Nov 2010 00:24:05 GMT
Server Apache
X-Powered-By PHP/5.3.3-0.dotdeb.1
Set-Cookie vaguid=d477b4ef51c0f863ed8aa876897a0f24; expires=Sat, 21-Nov-2020 00:24:05 GMT; path=/; domain=.gravity.com
P3P CP="NOI DSP COR ADMa OUR NOR"
Content-Length 70
Keep-Alive timeout=15, max=34
Connection Keep-Alive
Content-Type text/html
Cookie:
vaguid d477b4ef51c0f863ed8aa876897a0f24 / .gravity.com Sat, 21-Nov-2020 00:24:05 GMT
Query String:
u 1
sg f783045be20e5e1771b292d806ed6e1a
Post Data:
None:
Content:
GravityInsights.cc('grvinsights', 'd477b4ef51c0f863ed8aa876897a0f24');
***
00:19:07.996 0.206 882 215 GET 200 image/gif http://input.insights.gravity.com/pigeons/v2/capture.php?site_guid=undefined&action=beacon&user_guid=d477b4ef51c0f863ed8aa876897a0f24&referrer=http%3A//the911forum.freeforums.org/portal.php&thread_id=undefined&href=http%3A//the911forum.freeforums.org/viewtopic.php%3Ff%3D4%26t%3D439&OS=Windows&post_id=undefined&forum_id=undefined&user_id=undefined&user_name=undefined&post_title=undefined&thread_title=undefined&forum_title=undefined&board=%27%27
Request Header:
(Request-Line) GET /pigeons/v2/capture.php?site_guid=undefined&action=beacon&user_guid=d477b4ef51c0f863ed8aa876897a0f24&referrer=http%3A//the911forum.freeforums.org/portal.php&thread_id=undefined&href=http%3A//the911forum.freeforums.org/viewtopic.php%3Ff%3D4%26t%3D439&OS=Windows&post_id=undefined&forum_id=undefined&user_id=undefined&user_name=undefined&post_title=undefined&thread_title=undefined&forum_title=undefined&board=%27%27 HTTP/1.1
Host input.insights.gravity.com
User-Agent Mozilla/5.0 (Windows; U; Windows NT x.x; en-US; rv:x.x.x.x) Gecko/xxxxxxxx Firefox/x.x.x
Accept image/png,image/*;q=0.8,*/*;q=0.5
Accept-Language en-us,en;q=0.5
Accept-Encoding gzip,deflate
Accept-Charset ISO-8859-1,utf-8;q=0.7,*;q=0.7
Keep-Alive 115
Connection keep-alive
Referer http://the911forum.freeforums.org/viewtopic.php?f=4&t=439
Cookie vaguid=d477b4ef51c0f863ed8aa876897a0f24
Response Header:
(Status-Line) HTTP/1.1 200 OK
Date Wed, 24 Nov 2010 00:24:06 GMT
Server Apache
X-Powered-By PHP/5.3.3-0.dotdeb.1
Content-Length 42
Keep-Alive timeout=15, max=31
Connection Keep-Alive
Content-Type image/gif
Cookie:
vaguid d477b4ef51c0f863ed8aa876897a0f24 / .gravity.com zaterdag 21 november 2020 01:23:55
Query String:
site_guid f783045be20e5e1771b292d806ed6e1a
action beacon
user_guid 455d494f86f2ea28b2ad3062a1d136f9
referrer
thread_id 439
href http://the911forum.freeforums.org/viewtopic.php?f=4&t=439
OS Windows
post_id 0
forum_id 4
user_id 132
user_name snowcrash
post_title
thread_title Enik and the Debate with JREF
forum_title WTC1 and WTC2 - General
board the911forum
Post Data:
None
Content:
...The beacon GIF..
So.. what information is actually being sent to gravity dot com?
This:
(Excuse me for the tabulation, couldn't seem to get that right with BBCode)
- Code: Select all
site_guid f783045be20e5e1771b292d806ed6e1a
action beacon
user_guid 455d494f86f2ea28b2ad3062a1d136f9
referrer
thread_id 439
href http://the911forum.freeforums.org/viewtopic.php?f=4&t=439
OS Windows
post_id 0
forum_id 4
user_id 132
user_name snowcrash
post_title
thread_title Enik and the Debate with JREF
forum_title WTC1 and WTC2 - General
board the911forum
So.. what this script does is send every thread I visit, along with the forum title, the forum and the board, my user name, user id, operating system and a hyperlink to the data miners at gravity dot com, including, of course, my IP address.
This will happen for everybody who doesn't have an ad blocker installed, but it will also happen for everybody who does, unless you're alert and you add a specific rule to block this script, as I did.
So... What does the data mining company "GRAVITY" do exactly? What are their products? From their website:
--------------------------------------------------------------------------------------------------------------
Overview
If you want to geek out on the inner workings of Gravity, read on. If you’re looking for a more simple description of what Gravity is, head to About Gravity.
Your experience on the web should reflect who you are. We call this the personalized web and we’re developing technology to make it happen.
In theory, it’s a simple idea. In practice it’s challenging to implement, a pain in the neck to collect the right data, and near-impossible to do something useful and fun with the data… but that’s our plan. Why embark on such a challenging endeavor? We think personalizing the web will make user experiences more interesting and more fun.
Two big things are needed to deliver a personalized web experience. First, you need to index the web and tag everything. You need to know what websites, content, media, products, ads, etc. are out there and what topics and interests they cover. Second, you need to create an interest graph for each person. You need to understand what people are interested in and how interested they are. Combine a rich web index and an interest graph and you can do some pretty interesting things, like filter the web based on a person’s interest.
At the heart of Gravity is a semantic engine that extracts interests from any source of information. The engine is used to understand the web and the people interacting with it. It enables us to classify information with our web ontology, to build interest graphs and to deliver a variety of personalized web experiences. We’ve filed several patents related to our technology and its application to building interest graphs and enabling the personalized web.
Semantic Engine
The basic function of the semantic engine is to take a blob of text and figure out what it’s about — like what you’re brain does when you read a newspaper article.
The first step to is to analyze the blob. We use natural language processing (NLP) extensively. Instead of simply identifying keywords, our semantic engine analyzes a variety of linguistic and statistical factors. The output of our analysis is highly structured data that contain the key characteristics of the blob — sort of like a sample of the blob’s DNA.
The next step is to compare the blob’s DNA to a DNA database; our DNA database is our dynamic web ontology. The goal is to match the DNA of the blob to DNA signatures of topics in our ontology. Your standard blob of text (a sentence) will typically generate several matches. A paragraph or page of content may generate hundreds of topic matches.
Finally, we use a modified convergence algorithm to convert a list of topic matches to a handful of interests. Our algorithm prioritizes interests based on the number of topics related to an interest and the average distance and the strength of the relationship between each topic and the interests.
That’s it. Like we said, the engine takes a blob of text and figures our what it’s about.
Web Ontology
The web ontology is our DNA database. It’s actually a data graph (not a database), which does an excellent job at representing relationships between data and lends itself to graph traversal algorithms. Our ontology has 7+ million topical nodes, further augmented by metadata we’ve created or sourced from other web services. Gravity’s semantic engine uses the dynamic web ontology to match the signature of unidentified text blobs to the signatures of known topics.
Our web ontology is built on top of Wikipedia, DBpedia, Yago and OpenCyc. These resources give our ontology its basic structure, including relationships between topics. Next, we mine the open web for topical content and related social data and map this content to the ontology — thus creating a web ontology. The web ontology is different from a normal ontology in that it’s augmented with linguistic, statistical and other metadata extracted from the web. This metadata provides the basis for the DNA signatures of topics in our database. We’ve crawled terabytes of data from major publishers, blogs, forums and other public sources of information to create the most accurate DNA signatures possible.
We built our web ontology to be adaptive over time. One of the most powerful aspects of open sources resources like Wikipedia is crowd-sourced human curation. We used a combination of human and algorithmic curation to enable our ontology to grow and adapt as the world evolves.
The Theory Behind the Interest Graph
The interest graph is an online representation of your real-world interests and a new lens through which you can play with the web. We want your interest graph to become your personal filter for the web and to help you to discover content, ideas, people, events, products and services that you’ll like.
Building an interest graph is an iterative process. Instead of just asking you what your interested in, we start by pull in information about you from the web (this might include your tweets, status updates, things you’ve liked or shared, blog articles you’ve written, or information form your profile on social networks). We use our semantic engine and dynamic web ontology to comb through your information and to identify potential interests. Our convergence algorithms reduce all your potential interests to the most meaningful ones and enable us to establish preliminary interest intensity.
Once we’ve created your initial interest graph, we give it a life of its own. Your interest graph changes dynamically over time as you interact with Gravity powered services, change your presence on the web, or makes changes to your interest graph directly. For example, we might add legos to your interest graph because of several recent tweets related, but maybe you don’t really like legos. You’ll always have an opportunity to remove interests or reduce your level of attachment.
--------------------------------------------------------------------------------------------------------------
Remember, this includes such subforums as the "Members Area" and "Administrative Issues". No exceptions. The fix, obviously, is to add a rule in ABP to block this script. Myself, I've left some snarky comments in their DNA thought database during testing.
A whois search for the IP address of gravity dot com returned this:
OrgName: Project Rover
OrgId: PROJE-28
Address: PO Box 1438
City: Venice
StateProv: CA
PostalCode: 90294
Country: US
RegDate: 2009-10-11
Updated: 2010-03-05
Ref: http://whois.arin.net/rest/org/PROJE-28
What is Project Rover?
Project Rover was the name of the United States program to develop the technology to build a nuclear thermal rocket. It took place at the Los Alamos Scientific Laboratory from 1955 through 1972. The program jointly involved the Laboratory, the Atomic Energy Commission, and NASA, and it was managed by the Space Nuclear Propulsion Office.
http://en.wikipedia.org/wiki/Project_Rover
This ties in nicely with the name of the company, I guess.
I can tell you, and you'll have to trust me on this one, this is the sort of data mining that intelligence agencies like to do as well. Think Able Danger. (Which by now, is infinitely more advanced).
And I haven't even discussed the dozens of other data mining ad scripts that are included in the HTML source for this forum. They're just too complex, too obfuscated and would be too time-consuming.
Is this forum interesting? Are its members and their posts interesting? According to other, unrelated recent research of mine, possibly yes. You will hear more about that later.
That is not to say people in an official government capacity actually need "GRAVITY" to help them find posts that are interesting; they need only browse the forum.
"GRAVITY" is in a whole other line of work: they're using artificial intelligence to profile our interests, and if they can, (and they will), they'd use every single tidbit of personal information they get their oily data mining hands on, combining raw data from all places you visit and share your thoughts, blogs, social networks, websites, into enriched information that paints a perfect picture... of who you are, what you like, ... and what you think.
Their database is obviously a potential goldmine for intelligence agencies.
Jon Gold was already spied on.
Just thought you'd like to know.
End of case study.
