public class HBaseParameters extends Object
<bean id="hbaseParameterSettings" class="org.archive.io.hbase.HBaseParameters">
<!-- These settings are required -->
<property name="zkQuorum" value="localhost" />
<property name="hbaseTableName" value="crawl" />
<!-- This should reflect your installation, but 2181 is the default -->
<property name="zkPort" value="2181" />
<!-- All other settings are optional -->
<property name="onlyProcessNewRecords" value="false" />
<property name="onlyWriteNewRecords" value="false" />
<property name="contentColumnFamily" value="newcontent" />
<!-- Overwrite more options here -->
</bean>
<bean id="hbaseWriterProcessor" class="org.archive.modules.writer.HBaseWriterProcessor">
<property name="hbaseParameters">
<ref bean="hbaseParameterSettings"/>
</property>
</bean>
<bean id="dispositionProcessors" class="org.archive.modules.DispositionChain">
<property name="processors">
<list>
<ref bean="hbaseWriterProcessor"/>
<!-- other references -->
</list>
</property>
</bean>
| Modifier and Type | Field and Description |
|---|---|
static String |
CONTENT_COLUMN_FAMILY
The Constant CONTENT_COLUMN_FAMILY.
|
static String |
CONTENT_COLUMN_NAME
The Constant CONTENT_COLUMN_NAME.
|
static String |
CONTENT_LENGTH_COLUMN_NAME |
static String |
CONTENT_SIZE_COLUMN_NAME |
static String |
CONTENT_TYPE_COLUMN_NAME |
static String |
CURI_COLUMN_FAMILY
The Constant CURI_COLUMN_FAMILY.
|
static long |
DEFAULT_MAX_FILE_SIZE_IN_BYTES
The Constant DEFAULT_MAX_FILE_SIZE_IN_BYTES.
|
static String |
defaultHbaseTableNameSpace |
static String |
FETCH_ANNOTATIONS_COLUMN_NAME |
static String |
FETCH_ANNOTATIONS_VALUE_DELIMITER |
static String |
FETCH_ATTEMPTS_COLUMN_NAME |
static String |
FETCH_DURATION_COLUMN_NAME |
static String |
IP_COLUMN_NAME
The Constant IP_COLUMN_NAME.
|
static String |
IS_SEED_COLUMN_NAME
The Constant IS_SEED_COLUMN_NAME.
|
static String |
PATH_FROM_SEED_COLUMN_NAME
The Constant PATH_FROM_SEED_COLUMN_NAME.
|
static String |
REQUEST_COLUMN_NAME
The Constant REQUEST_COLUMN_NAME.
|
static String |
URL_COLUMN_NAME
The Constant URL_COLUMN_NAME.
|
static String |
VIA_COLUMN_NAME
The Constant VIA_COLUMN_NAME.
|
static int |
ZK_PORT
DEFAULT OPTIONS *.
|
static String |
ZOOKEEPER_CLIENT_PORT
The ZOOKEEPER client port.
|
| Constructor and Description |
|---|
HBaseParameters() |
| Modifier and Type | Method and Description |
|---|---|
String |
getContentColumnFamily()
Gets the content column family.
|
String |
getContentColumnName()
Gets the content column name.
|
String |
getContentLengthColumnName() |
String |
getContentSizeColumnName() |
String |
getContentTypeColumnName() |
String |
getCuriColumnFamily()
Gets the curi column family.
|
long |
getDefaultMaxFileSizeInBytes()
Gets the default max file size in bytes.
|
String |
getFetchAnnotationsColumnName() |
String |
getFetchAnnotationsValueDelimiter() |
String |
getFetchAttmptsColumnName() |
String |
getFetchDurationColumnName() |
String |
getHbaseTableName()
Gets the hbase table name.
|
String |
getIpColumnName()
Gets the ip column name.
|
String |
getIsSeedColumnName()
Gets the checks if is seed column name.
|
String |
getPathFromSeedColumnName()
Gets the path from seed column name.
|
String |
getRequestColumnName()
Gets the request column name.
|
Serializer |
getSerializer()
Gets the serializer.
|
String |
getUrlColumnName()
Gets the url column name.
|
String |
getViaColumnName()
Gets the via column name.
|
int |
getZkPort()
Gets the zk port.
|
String |
getZkQuorum()
Gets the zk quorum.
|
String |
getZookeeperClientPortKey()
Gets the zookeeper client port key.
|
boolean |
isMd5Key()
Checks if is md5 key.
|
boolean |
isOnlyProcessNewRecords()
Checks if is only process new records.
|
boolean |
isOnlyWriteNewRecords()
Checks if is only write new records.
|
void |
setContentColumnFamily(String contentColumnFamily)
Sets the content column family.
|
void |
setContentColumnName(String contentColumnName)
Sets the content column name.
|
void |
setContentLengthColumnName(String contentLengthColumnName) |
void |
setContentSizeColumnName(String contentSizeColumnName) |
void |
setContentTypeColumnName(String contentTypeColumnName) |
void |
setCuriColumnFamily(String curiColumnFamily)
Sets the curi column family.
|
void |
setDefaultMaxFileSizeInBytes(long defaultMaxFileSizeInBytes)
Sets the default max file size in bytes.
|
void |
setFetchAnnotationsColumnName(String fetchAnnotationsColumnName) |
void |
setFetchAnnotationsValueDelimiter(String fetchAnnotationsValueDelimiter) |
void |
setFetchAttmptsColumnName(String fetchAttmptsColumnName) |
void |
setFetchDurationColumnName(String fetchDurationColumnName) |
void |
setHbaseTableName(String tableName)
Sets the hbase table name.
|
void |
setIpColumnName(String ipColumnName)
Sets the ip column name.
|
void |
setIsSeedColumnName(String isSeedColumnName)
Sets the checks if is seed column name.
|
void |
setMd5Key(boolean md5Key)
Sets the md5 key.
|
void |
setOnlyProcessNewRecords(boolean onlyProcessNewRecords)
Sets the only process new records.
|
void |
setOnlyWriteNewRecords(boolean onlyWriteNewRecords)
Sets the only write new records.
|
void |
setPathFromSeedColumnName(String pathFromSeedColumnName)
Sets the path from seed column name.
|
void |
setRequestColumnName(String requestColumnName)
Sets the request column name.
|
void |
setSerializer(Serializer serializer)
Sets the serializer.
|
void |
setUrlColumnName(String urlColumnName)
Sets the url column name.
|
void |
setViaColumnName(String viaColumnName)
Sets the via column name.
|
void |
setZkPort(int port)
Sets the zk port.
|
void |
setZkQuorum(String quorum)
Sets the zk quorum.
|
public static final int ZK_PORT
public static final String defaultHbaseTableNameSpace
public static final String CONTENT_COLUMN_FAMILY
public static final String CONTENT_COLUMN_NAME
public static final String CURI_COLUMN_FAMILY
public static final String IP_COLUMN_NAME
public static final String CONTENT_TYPE_COLUMN_NAME
public static final String CONTENT_SIZE_COLUMN_NAME
public static final String CONTENT_LENGTH_COLUMN_NAME
public static final String FETCH_ATTEMPTS_COLUMN_NAME
public static final String FETCH_DURATION_COLUMN_NAME
public static final String FETCH_ANNOTATIONS_COLUMN_NAME
public static final String FETCH_ANNOTATIONS_VALUE_DELIMITER
public static final String PATH_FROM_SEED_COLUMN_NAME
public static final String IS_SEED_COLUMN_NAME
public static final String VIA_COLUMN_NAME
public static final String URL_COLUMN_NAME
public static final String REQUEST_COLUMN_NAME
public static final long DEFAULT_MAX_FILE_SIZE_IN_BYTES
public static final String ZOOKEEPER_CLIENT_PORT
public String getZkQuorum()
public void setZkQuorum(String quorum)
quorum - the new zk quorumpublic int getZkPort()
public void setZkPort(int port)
port - the new zk portpublic String getHbaseTableName()
public void setHbaseTableName(String tableName)
tableName - the new hbase table namepublic String getContentColumnFamily()
public void setContentColumnFamily(String contentColumnFamily)
contentColumnFamily - the new content column familypublic String getContentColumnName()
public void setContentColumnName(String contentColumnName)
contentColumnName - the new content column namepublic String getCuriColumnFamily()
public void setCuriColumnFamily(String curiColumnFamily)
curiColumnFamily - the new curi column familypublic String getIpColumnName()
public void setIpColumnName(String ipColumnName)
ipColumnName - the new ip column namepublic String getPathFromSeedColumnName()
public void setPathFromSeedColumnName(String pathFromSeedColumnName)
pathFromSeedColumnName - the new path from seed column namepublic String getIsSeedColumnName()
public void setIsSeedColumnName(String isSeedColumnName)
isSeedColumnName - the new checks if is seed column namepublic String getViaColumnName()
public void setViaColumnName(String viaColumnName)
viaColumnName - the new via column namepublic String getUrlColumnName()
public void setUrlColumnName(String urlColumnName)
urlColumnName - the new url column namepublic String getRequestColumnName()
public void setRequestColumnName(String requestColumnName)
requestColumnName - the new request column namepublic String getZookeeperClientPortKey()
public Serializer getSerializer()
public void setSerializer(Serializer serializer)
serializer - the new serializerpublic boolean isMd5Key()
public void setMd5Key(boolean md5Key)
md5Key - the new md5 keypublic boolean isOnlyWriteNewRecords()
public void setOnlyWriteNewRecords(boolean onlyWriteNewRecords)
onlyWriteNewRecords - the new only write new recordspublic boolean isOnlyProcessNewRecords()
public void setOnlyProcessNewRecords(boolean onlyProcessNewRecords)
onlyProcessNewRecords - the new only process new recordspublic long getDefaultMaxFileSizeInBytes()
public void setDefaultMaxFileSizeInBytes(long defaultMaxFileSizeInBytes)
defaultMaxFileSizeInBytes - the new default max file size in bytespublic String getContentTypeColumnName()
public void setContentTypeColumnName(String contentTypeColumnName)
public String getContentSizeColumnName()
public void setContentSizeColumnName(String contentSizeColumnName)
public String getFetchAttmptsColumnName()
public void setFetchAttmptsColumnName(String fetchAttmptsColumnName)
public String getFetchDurationColumnName()
public void setFetchDurationColumnName(String fetchDurationColumnName)
public String getFetchAnnotationsColumnName()
public void setFetchAnnotationsColumnName(String fetchAnnotationsColumnName)
public String getContentLengthColumnName()
public void setContentLengthColumnName(String contentLengthColumnName)
public String getFetchAnnotationsValueDelimiter()
public void setFetchAnnotationsValueDelimiter(String fetchAnnotationsValueDelimiter)
Copyright © 2007–2014. All rights reserved.