Skip to content

Commit

Permalink
add dist
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Nov 6, 2024
1 parent 9687ec3 commit 97bb136
Show file tree
Hide file tree
Showing 21 changed files with 766 additions and 0 deletions.
11 changes: 11 additions & 0 deletions dist/cli.cjs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dist/cli.d.cts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#!/usr/bin/env node
1 change: 1 addition & 0 deletions dist/cli.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#!/usr/bin/env node
11 changes: 11 additions & 0 deletions dist/cli.js

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions dist/index.all.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dist/index.all.js.map

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions dist/index.cjs

Large diffs are not rendered by default.

82 changes: 82 additions & 0 deletions dist/index.d.cts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { W as WARCRecord, A as AsyncIterReader, S as StatusAndHeaders, L as LimitReader, a as StatusAndHeadersParser } from './warcserializer-0b661154.js';
export { A as AsyncIterReader, i as AsyncIterReaderOpts, B as BaseAsyncIterReader, c as BaseSerializerBuffer, L as LimitReader, N as NoConcatInflator, S as StatusAndHeaders, a as StatusAndHeadersParser, W as WARCRecord, g as WARCRecordOpts, b as WARCSerializer, f as WARCSerializerOpts, h as WARCType, d as WARC_1_0, e as WARC_1_1 } from './warcserializer-0b661154.js';
import { I as IndexerOffsetLength, S as Source, a as StreamResults } from './types-bcbdd303.js';
export { I as IndexerOffsetLength, R as Request, S as Source, c as SourceReadable, b as SourceReader, d as StreamResult, a as StreamResults } from './types-bcbdd303.js';
import { WritableStreamBuffer } from 'stream-buffers';
export { a as appendRequestQuery, d as concatChunks, g as getSurt, j as jsonToQueryParams, b as jsonToQueryString, m as mfdToQueryParams, c as mfdToQueryString, p as postToGetUrl, s as splitChunk } from './utils-17b80bf1.js';
import 'hash-wasm/dist/lib/WASMInterface.js';
import 'pako';

type WARCParserOpts = {
keepHeadersCase?: boolean;
parseHttp?: boolean;
};
declare class WARCParser implements IndexerOffsetLength {
static parse(source: Source, options?: WARCParserOpts): Promise<WARCRecord | null>;
static iterRecords(source: Source, options?: WARCParserOpts): AsyncGenerator<WARCRecord, void, unknown>;
_offset: number;
_warcHeadersLength: number;
_headersClass: typeof Map | typeof Headers;
_parseHttp: boolean;
_reader: AsyncIterReader;
_record: WARCRecord | null;
constructor(source: Source, { keepHeadersCase, parseHttp }?: WARCParserOpts);
readToNextRecord(): Promise<Uint8Array | null>;
_initRecordReader(warcHeaders: StatusAndHeaders): LimitReader;
parse(): Promise<WARCRecord | null>;
get offset(): number;
get recordLength(): number;
[Symbol.asyncIterator](): AsyncGenerator<WARCRecord, void, unknown>;
_addHttpHeaders(record: WARCRecord, headersParser: StatusAndHeadersParser): Promise<void>;
}

type IndexCommandArgs = any;
type CdxIndexCommandArgs = any;

declare const DEFAULT_FIELDS: string[];
declare abstract class BaseIndexer {
opts: Partial<IndexCommandArgs>;
fields: string[];
reqFields: string[];
parseHttp: boolean;
constructor(opts?: Partial<IndexCommandArgs>, defaultFields?: string[]);
serialize(result: Record<string, any>): string;
write(result: Record<string, any>, out: WritableStreamBuffer | NodeJS.WriteStream): void;
writeAll(files: StreamResults, out: WritableStreamBuffer | NodeJS.WriteStream): Promise<void>;
iterIndex(files: StreamResults): AsyncGenerator<Record<string, any>, void, unknown>;
iterRecords(parser: WARCParser, filename: string): AsyncGenerator<Record<string, any>, void, unknown>;
filterRecord?(record: WARCRecord): boolean;
indexRecord(record: WARCRecord, indexerOffset: IndexerOffsetLength, filename: string): Record<string, any> | null;
setField(field: string, record: WARCRecord, result: Record<string, any>): void;
getField(field: string, record: WARCRecord): string | number | null | undefined;
}
declare class Indexer extends BaseIndexer {
constructor(opts?: Partial<IndexCommandArgs>, defaultFields?: string[]);
}
declare const DEFAULT_CDX_FIELDS: string[];
declare const DEFAULT_LEGACY_CDX_FIELDS: string[];
interface CDXAndRecord {
cdx: Record<string, any>;
record: WARCRecord;
reqRecord: WARCRecord | null;
}
declare class CDXIndexer extends Indexer {
includeAll: boolean;
overrideIndexForAll: boolean;
noSurt: boolean;
_lastRecord: WARCRecord | null;
constructor(opts?: Partial<CdxIndexCommandArgs>);
iterRecords(parser: WARCParser, filename: string): AsyncGenerator<Record<string, any>, void, unknown>;
filterRecord(record: WARCRecord): boolean;
indexRecord(record: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): Record<string, any> | null;
indexRecordPair(record: WARCRecord, reqRecord: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): Record<string, any> | null;
serializeCDXJ(result: Record<string, any>): string;
serializeCDX11(result: Record<string, any>): string;
getField(field: string, record: WARCRecord): string | number | null | undefined;
}
declare class CDXAndRecordIndexer extends CDXIndexer {
constructor(opts?: Partial<CdxIndexCommandArgs>);
indexRecordPair(record: WARCRecord, reqRecord: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): CDXAndRecord | null;
}

export { CDXAndRecordIndexer, CDXIndexer, DEFAULT_CDX_FIELDS, DEFAULT_FIELDS, DEFAULT_LEGACY_CDX_FIELDS, Indexer, WARCParser, WARCParserOpts };
304 changes: 304 additions & 0 deletions dist/index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
import pako from 'pako';
import { IHasher } from 'hash-wasm/dist/lib/WASMInterface.js';
import { WritableStreamBuffer } from 'stream-buffers';

type SourceReader = {
read: Function;
};
type SourceReadable = {
getReader: (...args: any) => {
read: Function;
};
};
type Source = SourceReader | SourceReadable | AsyncIterable<Uint8Array> | Iterable<Uint8Array>;
type StreamResult = {
filename: string;
reader: AsyncIterable<Uint8Array>;
};
type StreamResults = StreamResult[];
type IndexerOffsetLength = {
offset: number;
recordLength: number;
};
type Request = {
method: string;
url: string;
headers: Map<string, string> | Headers;
postData?: Uint8Array | string | undefined | null;
requestBody?: any;
};

declare class NoConcatInflator<T extends BaseAsyncIterReader> extends pako.Inflate {
reader: T;
ended: boolean;
chunks: Uint8Array[];
constructor(options: pako.InflateOptions, reader: T);
onEnd(status: pako.ReturnCodes): void;
}
declare abstract class BaseAsyncIterReader {
static readFully(iter: AsyncIterable<Uint8Array> | Iterable<Uint8Array>): Promise<Uint8Array>;
abstract [Symbol.asyncIterator](): AsyncIterator<Uint8Array>;
getReadableStream(): ReadableStream<any>;
readFully(): Promise<Uint8Array>;
abstract readlineRaw(maxLength?: number): Promise<Uint8Array | null>;
readline(maxLength?: number): Promise<string>;
iterLines(maxLength?: number): AsyncGenerator<string, void, unknown>;
}
type AsyncIterReaderOpts = {
raw: boolean;
};
declare class AsyncIterReader extends BaseAsyncIterReader {
compressed: string | null;
opts: AsyncIterReaderOpts;
inflator: NoConcatInflator<this> | null;
_sourceIter: AsyncIterator<Uint8Array | null>;
lastValue: Uint8Array | null;
errored: boolean;
_savedChunk: Uint8Array | null;
_rawOffset: number;
_readOffset: number;
numChunks: number;
constructor(streamOrIter: Source, compressed?: string | null, dechunk?: boolean);
_loadNext(): Promise<Uint8Array | null>;
dechunk(source: AsyncIterable<Uint8Array>): AsyncIterator<Uint8Array | null>;
unread(chunk: Uint8Array): void;
_next(): Promise<Uint8Array | null>;
_push(value: Uint8Array): void;
_getNextChunk(original?: Uint8Array): Uint8Array | null | undefined;
[Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
readlineRaw(maxLength?: number): Promise<Uint8Array | null>;
readFully(): Promise<Uint8Array>;
readSize(sizeLimit: number): Promise<Uint8Array>;
skipSize(sizeLimit: number): Promise<number>;
_readOrSkip(sizeLimit?: number, skip?: boolean): Promise<readonly [number, Uint8Array]>;
getReadOffset(): number;
getRawOffset(): number;
getRawLength(prevOffset: number): number;
static fromReadable<Readable extends SourceReader>(source: Readable): {
[Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
};
static fromIter(source: Iterable<Uint8Array>): {
[Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
};
}
declare class LimitReader extends BaseAsyncIterReader {
sourceIter: AsyncIterReader;
length: number;
limit: number;
skip: number;
constructor(streamIter: AsyncIterReader, limit: number, skip?: number);
setLimitSkip(limit: number, skip?: number): void;
[Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
readlineRaw(maxLength?: number): Promise<Uint8Array | null>;
skipFully(): Promise<number>;
}

declare class StatusAndHeaders {
statusline: string;
headers: Map<string, string> | Headers;
constructor({ statusline, headers, }: {
statusline: string;
headers: Map<string, string> | Headers;
});
toString(): string;
iterSerialize(encoder: TextEncoder): AsyncGenerator<Uint8Array, void, unknown>;
_protocol: string | undefined;
_statusCode: number | string | undefined;
_statusText: string | undefined;
_parseResponseStatusLine(): void;
get statusCode(): string | number | undefined;
get protocol(): string | undefined;
get statusText(): string | undefined;
_method: string | undefined;
_requestPath: string | undefined;
_parseRequestStatusLine(): void;
get method(): string | undefined;
get requestPath(): string | undefined;
}
declare class StatusAndHeadersParser {
parse(reader: AsyncIterReader, { headersClass, firstLine, }?: {
firstLine?: string;
headersClass: typeof Map | typeof Headers;
}): Promise<StatusAndHeaders | null>;
}

declare const WARC_1_1 = "WARC/1.1";
declare const WARC_1_0 = "WARC/1.0";
type WARCType = "warcinfo" | "response" | "resource" | "request" | "metadata" | "revisit" | "conversion" | "continuation";
type WARCRecordOpts = {
url?: string;
date?: string;
type?: WARCType;
warcHeaders?: Record<string, string>;
filename?: string;
httpHeaders?: HeadersInit;
statusline?: string;
warcVersion?: typeof WARC_1_0 | typeof WARC_1_1;
keepHeadersCase?: boolean;
refersToUrl?: string;
refersToDate?: string;
};
declare class WARCRecord extends BaseAsyncIterReader {
static create({ url, date, type, warcHeaders, filename, httpHeaders, statusline, warcVersion, keepHeadersCase, refersToUrl, refersToDate, }?: WARCRecordOpts, reader?: AsyncIterable<Uint8Array> | Iterable<Uint8Array>): WARCRecord;
static createWARCInfo(opts: WARCRecordOpts | undefined, info: Record<string, string>): WARCRecord;
warcHeaders: StatusAndHeaders;
_reader: AsyncIterable<Uint8Array> | Iterable<Uint8Array>;
_contentReader: BaseAsyncIterReader | null;
payload: Uint8Array | null;
httpHeaders: StatusAndHeaders | null;
consumed: "content" | "raw" | "skipped" | "";
_offset: number | undefined;
_length: number;
_urlkey: string;
constructor({ warcHeaders, reader, }: {
warcHeaders: StatusAndHeaders;
reader: AsyncIterable<Uint8Array> | Iterable<Uint8Array>;
});
getResponseInfo(): {
headers: Map<string, string> | Headers;
status: string | number | undefined;
statusText: string | undefined;
} | null;
fixUp(): void;
readFully(isContent?: boolean): Promise<Uint8Array>;
get reader(): AsyncIterable<Uint8Array> | Iterable<Uint8Array>;
get contentReader(): AsyncIterable<Uint8Array> | Iterable<Uint8Array>;
_createDecodingReader(source: Source): AsyncIterReader;
readlineRaw(maxLength?: number): Promise<Uint8Array | null>;
contentText(): Promise<string>;
[Symbol.asyncIterator](): AsyncGenerator<Uint8Array, void, unknown>;
skipFully(): Promise<number | undefined>;
warcHeader(name: string): string | null | undefined;
get warcType(): string | null | undefined;
get warcTargetURI(): string | null | undefined;
get warcDate(): string | null | undefined;
get warcRefersToTargetURI(): string | null | undefined;
get warcRefersToDate(): string | null | undefined;
get warcPayloadDigest(): string | null | undefined;
get warcBlockDigest(): string | null | undefined;
get warcContentType(): string | null | undefined;
get warcContentLength(): number;
}

type WARCParserOpts = {
keepHeadersCase?: boolean;
parseHttp?: boolean;
};
declare class WARCParser implements IndexerOffsetLength {
static parse(source: Source, options?: WARCParserOpts): Promise<WARCRecord | null>;
static iterRecords(source: Source, options?: WARCParserOpts): AsyncGenerator<WARCRecord, void, unknown>;
_offset: number;
_warcHeadersLength: number;
_headersClass: typeof Map | typeof Headers;
_parseHttp: boolean;
_reader: AsyncIterReader;
_record: WARCRecord | null;
constructor(source: Source, { keepHeadersCase, parseHttp }?: WARCParserOpts);
readToNextRecord(): Promise<Uint8Array | null>;
_initRecordReader(warcHeaders: StatusAndHeaders): LimitReader;
parse(): Promise<WARCRecord | null>;
get offset(): number;
get recordLength(): number;
[Symbol.asyncIterator](): AsyncGenerator<WARCRecord, void, unknown>;
_addHttpHeaders(record: WARCRecord, headersParser: StatusAndHeadersParser): Promise<void>;
}

type WARCSerializerOpts = {
gzip?: boolean;
digest?: {
algo?: AlgorithmIdentifier;
prefix?: string;
base32?: boolean;
};
preferPako?: boolean;
};
declare abstract class BaseSerializerBuffer {
abstract write(chunk: Uint8Array): void;
abstract readAll(): AsyncIterable<Uint8Array>;
}
declare class WARCSerializer extends BaseAsyncIterReader {
gzip: boolean;
digestAlgo: AlgorithmIdentifier;
digestAlgoPrefix: string;
digestBase32: boolean;
preferPako: boolean;
record: WARCRecord;
externalBuffer: BaseSerializerBuffer | undefined;
_alreadyDigested: boolean;
blockHasher: IHasher | null;
payloadHasher: IHasher | null;
httpHeadersBuff: Uint8Array | null;
warcHeadersBuff: Uint8Array | null;
static serialize(record: WARCRecord, opts?: WARCSerializerOpts, externalBuffer?: BaseSerializerBuffer): Promise<Uint8Array>;
constructor(record: WARCRecord, opts?: WARCSerializerOpts, externalBuffer?: BaseSerializerBuffer);
static noComputeDigest(record: WARCRecord): string | true | null | undefined;
[Symbol.asyncIterator](): AsyncGenerator<any, void, unknown>;
readlineRaw(maxLength?: number): Promise<Uint8Array | null>;
pakoCompress(): AsyncGenerator<any, void, unknown>;
streamCompress(cs: CompressionStream): AsyncGenerator<Uint8Array, void, unknown>;
newHasher(): Promise<IHasher | null>;
getDigest(hasher: IHasher): string;
digestRecord(): Promise<number>;
generateRecord(): AsyncGenerator<Uint8Array, void, unknown>;
}

type IndexCommandArgs = any;
type CdxIndexCommandArgs = any;

declare const DEFAULT_FIELDS: string[];
declare abstract class BaseIndexer {
opts: Partial<IndexCommandArgs>;
fields: string[];
reqFields: string[];
parseHttp: boolean;
constructor(opts?: Partial<IndexCommandArgs>, defaultFields?: string[]);
serialize(result: Record<string, any>): string;
write(result: Record<string, any>, out: WritableStreamBuffer | NodeJS.WriteStream): void;
writeAll(files: StreamResults, out: WritableStreamBuffer | NodeJS.WriteStream): Promise<void>;
iterIndex(files: StreamResults): AsyncGenerator<Record<string, any>, void, unknown>;
iterRecords(parser: WARCParser, filename: string): AsyncGenerator<Record<string, any>, void, unknown>;
filterRecord?(record: WARCRecord): boolean;
indexRecord(record: WARCRecord, indexerOffset: IndexerOffsetLength, filename: string): Record<string, any> | null;
setField(field: string, record: WARCRecord, result: Record<string, any>): void;
getField(field: string, record: WARCRecord): string | number | null | undefined;
}
declare class Indexer extends BaseIndexer {
constructor(opts?: Partial<IndexCommandArgs>, defaultFields?: string[]);
}
declare const DEFAULT_CDX_FIELDS: string[];
declare const DEFAULT_LEGACY_CDX_FIELDS: string[];
interface CDXAndRecord {
cdx: Record<string, any>;
record: WARCRecord;
reqRecord: WARCRecord | null;
}
declare class CDXIndexer extends Indexer {
includeAll: boolean;
overrideIndexForAll: boolean;
noSurt: boolean;
_lastRecord: WARCRecord | null;
constructor(opts?: Partial<CdxIndexCommandArgs>);
iterRecords(parser: WARCParser, filename: string): AsyncGenerator<Record<string, any>, void, unknown>;
filterRecord(record: WARCRecord): boolean;
indexRecord(record: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): Record<string, any> | null;
indexRecordPair(record: WARCRecord, reqRecord: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): Record<string, any> | null;
serializeCDXJ(result: Record<string, any>): string;
serializeCDX11(result: Record<string, any>): string;
getField(field: string, record: WARCRecord): string | number | null | undefined;
}
declare class CDXAndRecordIndexer extends CDXIndexer {
constructor(opts?: Partial<CdxIndexCommandArgs>);
indexRecordPair(record: WARCRecord, reqRecord: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): CDXAndRecord | null;
}

declare function getSurt(url: string): string;
declare function postToGetUrl(request: Request): boolean;
declare function appendRequestQuery(url: string, query: string, method: string): string;
declare function jsonToQueryParams(json: unknown, ignoreInvalid?: boolean): URLSearchParams;
declare function mfdToQueryParams(mfd: string | Uint8Array | null | undefined, contentType: string): URLSearchParams;
declare function jsonToQueryString(json?: string | Record<string, unknown> | undefined | null, ignoreInvalid?: boolean): string;
declare function mfdToQueryString(mfd: string | Uint8Array | undefined | null, contentType: string): string;
declare function concatChunks(chunks: Uint8Array[], size: number): Uint8Array;
declare function splitChunk(chunk: Uint8Array, inx: number): [Uint8Array, Uint8Array];

export { AsyncIterReader, AsyncIterReaderOpts, BaseAsyncIterReader, BaseSerializerBuffer, CDXAndRecordIndexer, CDXIndexer, DEFAULT_CDX_FIELDS, DEFAULT_FIELDS, DEFAULT_LEGACY_CDX_FIELDS, Indexer, IndexerOffsetLength, LimitReader, NoConcatInflator, Request, Source, SourceReadable, SourceReader, StatusAndHeaders, StatusAndHeadersParser, StreamResult, StreamResults, WARCParser, WARCParserOpts, WARCRecord, WARCRecordOpts, WARCSerializer, WARCSerializerOpts, WARCType, WARC_1_0, WARC_1_1, appendRequestQuery, concatChunks, getSurt, jsonToQueryParams, jsonToQueryString, mfdToQueryParams, mfdToQueryString, postToGetUrl, splitChunk };
Loading

0 comments on commit 97bb136

Please sign in to comment.