|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectorg.archive.modules.Processor
com.powerset.heritrix.writer.HBaseWriterProcessor
public class HBaseWriterProcessor
An heritrix2 processor that writes to Hadoop HBase.
| Field Summary | |
|---|---|
static org.archive.state.Key<java.lang.String> |
MASTER
Location of hbase master. |
static org.archive.state.Key<java.lang.Integer> |
POOL_MAX_ACTIVE
Maximum active files in pool. |
static org.archive.state.Key<java.lang.Integer> |
POOL_MAX_WAIT
Maximum time to wait on pool element (milliseconds). |
static org.archive.state.Key<org.archive.modules.net.ServerCache> |
SERVER_CACHE
|
static org.archive.state.Key<java.lang.String> |
TABLE
HBase table to crawl into. |
static org.archive.state.Key<java.lang.Long> |
TOTAL_BYTES_TO_WRITE
Total file bytes to write to disk. |
| Fields inherited from class org.archive.modules.Processor |
|---|
DECIDE_RULES, ENABLED |
| Constructor Summary | |
|---|---|
HBaseWriterProcessor()
|
|
| Method Summary | |
|---|---|
protected org.archive.modules.ProcessResult |
checkBytesWritten(org.archive.state.StateProvider context)
|
void |
close()
|
protected java.lang.String |
getHostAddress(org.archive.modules.ProcessorURI curi)
Return IP address of given URI suitable for recording (as in a classic ARC 5-field header line). |
protected java.lang.String |
getMaster()
|
protected int |
getMaxActive()
|
protected int |
getMaxWait()
|
protected org.archive.io.WriterPool |
getPool()
|
protected java.lang.String |
getTable()
|
protected long |
getTotalBytesWritten()
|
void |
initialTasks(org.archive.state.StateProvider context)
|
protected void |
innerProcess(org.archive.modules.ProcessorURI puri)
|
protected org.archive.modules.ProcessResult |
innerProcessResult(org.archive.modules.ProcessorURI puri)
|
protected void |
setPool(org.archive.io.WriterPool pool)
|
protected void |
setTotalBytesWritten(long b)
|
protected void |
setupPool()
|
protected boolean |
shouldProcess(org.archive.modules.ProcessorURI uri)
|
protected boolean |
shouldWrite(org.archive.modules.ProcessorURI curi)
Whether the given ProcessorURI should be written to archive files. |
protected org.archive.modules.ProcessResult |
write(org.archive.modules.ProcessorURI curi,
long recordLength,
java.io.InputStream in,
java.lang.String ip)
|
| Methods inherited from class org.archive.modules.Processor |
|---|
flattenVia, getRecordedSize, getURICount, hasRfc2617CredentialAvatar, innerRejectProcess, isSuccess, process, report |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
@Immutable public static final org.archive.state.Key<java.lang.String> MASTER
@Immutable public static final org.archive.state.Key<java.lang.String> TABLE
@Immutable public static final org.archive.state.Key<java.lang.Integer> POOL_MAX_ACTIVE
@Immutable public static final org.archive.state.Key<java.lang.Integer> POOL_MAX_WAIT
@Immutable public static final org.archive.state.Key<org.archive.modules.net.ServerCache> SERVER_CACHE
@Immutable @Expert public static final org.archive.state.Key<java.lang.Long> TOTAL_BYTES_TO_WRITE
| Constructor Detail |
|---|
public HBaseWriterProcessor()
| Method Detail |
|---|
public void initialTasks(org.archive.state.StateProvider context)
initialTasks in interface org.archive.state.Initializableprotected java.lang.String getMaster()
protected java.lang.String getTable()
protected void setupPool()
protected int getMaxActive()
protected int getMaxWait()
protected void setPool(org.archive.io.WriterPool pool)
protected org.archive.io.WriterPool getPool()
protected long getTotalBytesWritten()
protected void setTotalBytesWritten(long b)
protected org.archive.modules.ProcessResult innerProcessResult(org.archive.modules.ProcessorURI puri)
innerProcessResult in class org.archive.modules.Processorprotected java.lang.String getHostAddress(org.archive.modules.ProcessorURI curi)
curi - ProcessorURI
protected boolean shouldWrite(org.archive.modules.ProcessorURI curi)
curi - ProcessorURI
protected org.archive.modules.ProcessResult write(org.archive.modules.ProcessorURI curi,
long recordLength,
java.io.InputStream in,
java.lang.String ip)
throws java.io.IOException
java.io.IOExceptionprotected org.archive.modules.ProcessResult checkBytesWritten(org.archive.state.StateProvider context)
protected void innerProcess(org.archive.modules.ProcessorURI puri)
innerProcess in class org.archive.modules.Processorpublic void close()
close in interface java.io.Closeableprotected boolean shouldProcess(org.archive.modules.ProcessorURI uri)
shouldProcess in class org.archive.modules.Processor
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||