Skip to content

Commit

Permalink
POST query urlkey fix (#20)
Browse files Browse the repository at this point in the history
* indexing: post-request fixes
- ensure requestBody + __wb_method added to urlkey
- don't include method in requestBody field
- bump to 1.4.1

* Update badge
  • Loading branch information
ikreymer authored Feb 26, 2021
1 parent f865b19 commit 92c5b55
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Streaming web archive (WARC) file support for modern browsers and Node.

This package represents an approxipate port Javascript port of the Python [warcio](https://github.com/webrecorder/warcio) module.

[![Build Status](https://travis-ci.com/webrecorder/warcio.js.svg?branch=master)](https://travis-ci.com/webrecorder/warcio.js)
[![Node.js CI](https://github.com/webrecorder/warcio.js/actions/workflows/ci.yaml/badge.svg)](https://github.com/webrecorder/warcio.js/actions/workflows/ci.yaml)
[![codecov](https://codecov.io/gh/webrecorder/warcio.js/branch/master/graph/badge.svg)](https://codecov.io/gh/webrecorder/warcio.js)


Expand Down
2 changes: 1 addition & 1 deletion main.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ export { WARCRecord } from './src/warcrecord';

export { Indexer, CDXIndexer } from './src/indexer';

export { postToGetUrl, getSurt } from './src/utils';
export { postToGetUrl, getSurt, appendRequestQuery } from './src/utils';
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "warcio",
"version": "1.4.0",
"version": "1.4.1",
"main": "index.js",
"module": "main.js",
"license": "Apache-2.0",
Expand Down
8 changes: 5 additions & 3 deletions src/indexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { WARCParser } from './warcparser';

const DEFAULT_FIELDS = 'offset,warc-type,warc-target-uri'.split(',');

import { postToGetUrl, getSurt } from './utils';
import { postToGetUrl, getSurt, appendRequestQuery } from './utils';


// ===========================================================================
Expand Down Expand Up @@ -238,7 +238,9 @@ class CDXIndexer extends Indexer
method = request.method;

if (postToGetUrl(request)) {
requestBody = request.url.slice(record.warcTargetURI.length);
requestBody = request.requestBody;
record.method = method;
record.requestBody = requestBody;
}
}

Expand Down Expand Up @@ -279,7 +281,7 @@ class CDXIndexer extends Indexer

switch (field) {
case "urlkey":
return getSurt(record.updatedURL ? record.updatedURL : record.warcTargetURI);
return getSurt(appendRequestQuery(record.warcTargetURI, record.requestBody, record.method));

case "timestamp":
value = record.warcDate;
Expand Down
16 changes: 13 additions & 3 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,25 @@ function postToGetUrl(request) {
}

if (query) {
const start = (url.indexOf("?") > 0 ? "&" : "?");
request.url += `${start}__wb_method=${method}&${query}`;
request.url = appendRequestQuery(request.url, query, request.method);
request.method = "GET";
request.requestBody = query;
return true;
}

return false;
}

function appendRequestQuery(url, query, method) {
if (!method) {
return url;
}

const start = (url.indexOf("?") > 0 ? "&" : "?");

return `${url}${start}__wb_method=${method}&${query}`;
}

function jsonToQueryString(json) {
if (typeof(json) === "string") {
try {
Expand Down Expand Up @@ -117,4 +127,4 @@ function mfdToQueryString(mfd, contentType) {
return params.toString();
}

export { postToGetUrl, getSurt };
export { postToGetUrl, getSurt, appendRequestQuery };
10 changes: 5 additions & 5 deletions test/testIndexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,17 @@ test('cdx json warc.gz all', index,
test('post append', index,
['cdx-index', './test/data/post-test.warc.gz'],
`\
org,httpbin)/post 20140610000859 {"url":"http://httpbin.org/post","mime":"application/json","status":200,"digest":"M532K5WS4GY2H4OVZO6HRPOP47A7KDWU","length":720,"offset":0,"filename":"post-test.warc.gz","method":"POST","requestBody":"?__wb_method=POST&foo=bar&test=abc"}
org,httpbin)/post 20140610001151 {"url":"http://httpbin.org/post","mime":"application/json","status":200,"digest":"M7YCTM7HS3YKYQTAWQVMQSQZBNEOXGU2","length":723,"offset":1196,"filename":"post-test.warc.gz","method":"POST","requestBody":"?__wb_method=POST&A=1&B=[]&C=3"}
org,httpbin)/post?foo=bar 20140610001255 {"url":"http://httpbin.org/post?foo=bar","mime":"application/json","status":200,"digest":"B6E5P6JUZI6UPDTNO4L2BCHMGLTNCUAJ","length":723,"offset":2395,"filename":"post-test.warc.gz","method":"POST","requestBody":"&__wb_method=POST&data=^"}
org,httpbin)/post?__wb_method=post&foo=bar&test=abc 20140610000859 {"url":"http://httpbin.org/post","mime":"application/json","status":200,"digest":"M532K5WS4GY2H4OVZO6HRPOP47A7KDWU","length":720,"offset":0,"filename":"post-test.warc.gz","method":"POST","requestBody":"foo=bar&test=abc"}
org,httpbin)/post?__wb_method=post&a=1&b=%5B%5D&c=3 20140610001151 {"url":"http://httpbin.org/post","mime":"application/json","status":200,"digest":"M7YCTM7HS3YKYQTAWQVMQSQZBNEOXGU2","length":723,"offset":1196,"filename":"post-test.warc.gz","method":"POST","requestBody":"A=1&B=[]&C=3"}
org,httpbin)/post?__wb_method=post&data=%5E&foo=bar 20140610001255 {"url":"http://httpbin.org/post?foo=bar","mime":"application/json","status":200,"digest":"B6E5P6JUZI6UPDTNO4L2BCHMGLTNCUAJ","length":723,"offset":2395,"filename":"post-test.warc.gz","method":"POST","requestBody":"data=^"}
`);


test('post append 2', index,
['cdx-index', './test/data/post-test-more.warc'],
`\
org,httpbin)/post 20200809195334 {"url":"https://httpbin.org/post","mime":"application/json","status":200,"digest":"7AWVEIPQMCA4KTCNDXWSZ465FITB7LSK","length":688,"offset":0,"filename":"post-test-more.warc","method":"POST","requestBody":"?__wb_method=POST&test=some+data&another=more%5Edata"}
org,httpbin)/post 20200809195334 {"url":"https://httpbin.org/post","mime":"application/json","status":200,"digest":"BYOQWRSQFW3A5SNUBDSASHFLXGL4FNGB","length":655,"offset":1227,"filename":"post-test-more.warc","method":"POST","requestBody":"?__wb_method=POST&a=json-data"}
org,httpbin)/post?__wb_method=post&another=more%5Edata&test=some+data 20200809195334 {"url":"https://httpbin.org/post","mime":"application/json","status":200,"digest":"7AWVEIPQMCA4KTCNDXWSZ465FITB7LSK","length":688,"offset":0,"filename":"post-test-more.warc","method":"POST","requestBody":"test=some+data&another=more%5Edata"}
org,httpbin)/post?__wb_method=post&a=json-data 20200809195334 {"url":"https://httpbin.org/post","mime":"application/json","status":200,"digest":"BYOQWRSQFW3A5SNUBDSASHFLXGL4FNGB","length":655,"offset":1227,"filename":"post-test-more.warc","method":"POST","requestBody":"a=json-data"}
org,httpbin)/post 20200810055049 {"url":"https://httpbin.org/post","mime":"application/json","status":200,"digest":"34LEADQD3MOBQ42FCO2WA5TUSEL5QOKP","length":628,"offset":2338,"filename":"post-test-more.warc","method":"POST"}
`);

Expand Down

0 comments on commit 92c5b55

Please sign in to comment.