Amazon DynamoDB 是一種完全託管的 NoSQL 數據庫服務,提供快速且可預測的性能,同時還能夠實現無縫擴展。使用 DynamoDB,您可以免除操作和擴展分佈式數據庫的管理工作負擔,因而無需擔心硬件預置、設置和配置、複製、軟件修補或集羣擴展等問題。DynamoDB 還提供靜態加密,這消除了在保護敏感數據時涉及的操作負擔和複雜性。
背景
有時需要將DynamoDB中的數據完全拷貝下來。需要用到掃描操作。
Amazon DynamoDB 中的 Scan
操作讀取表或二級索引中的每個項目。默認情況下,Scan
操作返回表或索引中每個項目的全部數據屬性。但是,單個 Scan
請求最多可檢索 1 MB 數據。對於大表需要進行多次掃描操作。且需要注意的是讀取限制,如果超出限制,那麼就會告警。
我的表格結構如下
JAVA環境的配置
(1)JDK和maven環境就不說了。
(2)將用戶的認證相關內容放入配置文件當中,也可以直接寫在代碼當中(後面解決方法二就是直接寫的)
(3)maven配置
因爲用到了寫入csv文件,因此引入了csv相關工具包。
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-bom</artifactId>
<version>1.11.327</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-dynamodb</artifactId>
</dependency>
<dependency>
<groupId>net.sourceforge.javacsv</groupId>
<artifactId>javacsv</artifactId>
<version>2.0</version>
</dependency>
</dependencies>
解決方法(一)小容量讀取
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.PropertiesCredentials;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
import com.amazonaws.services.dynamodbv2.model.ScanResult;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class DynamoDBUtils {
public static void main(String[] args) throws IOException {
AWSCredentials credentials = new PropertiesCredentials(new File("src/main/resources/key.properties"));
AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().
withCredentials(new AWSStaticCredentialsProvider(credentials)).
withRegion("us-east-1").build();
ScanRequest scanRequest = new ScanRequest().withTableName("yucheng");
ScanResult result = client.scan(scanRequest);
for (Map<String, AttributeValue> item : result.getItems()) {
System.out.println(item);
String query = item.get("query").getS();
System.out.println(query);
List<AttributeValue> asin_list = item.get("asin_list").getL();
for (AttributeValue value : asin_list) {
System.out.print(value.getS() + " ");
}
System.out.println();
}
}
}
讀取結果如下。
解決方法(二)大容量讀取
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
import com.amazonaws.services.dynamodbv2.model.ScanResult;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.csvreader.CsvWriter;
public class DynamoDBUtils {
private static String region = "us-east-1";//替換成自己的
private static String AWS_ACCESS_KEY_ID = "XXXXXX";//替換成自己的
private static String AWS_SECRET_ACCESS_KEY = "XXXXXXXXXXX";//替換成自己的
public static void main(String[] args) {
try {
f();
} catch (InterruptedException | IOException e) {
e.printStackTrace();
}
}
public static void f() throws IOException, InterruptedException {
String filePath = "XXXX.tsv";//替換成自己的
AWSCredentials credentials = new BasicAWSCredentials(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY);
AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(credentials)).withRegion(region).build();
CsvWriter csvWriter = new CsvWriter(filePath, '\t', Charset.forName("UTF-8"));
int count = 0;
Map<String, AttributeValue> lastKeyEvaluated = null;
do {
count++;
if (count % 10 == 0) {
System.out.println(count);
}
ScanRequest scanRequest = new ScanRequest()
.withTableName("yucheng")
.withExclusiveStartKey(lastKeyEvaluated);
ScanResult result = client.scan(scanRequest);
for (Map<String, AttributeValue> item : result.getItems()) {
String query = item.get("query").getS();
List<AttributeValue> asinList = item.get("asin_list").getL();
List<String> asins = new ArrayList<>();
for (AttributeValue value : asinList) {
asins.add(value.getS());
}
String[] line = new String[2];
line[0] = query;
line[1] = String.join(",", asins);
csvWriter.writeRecord(line);
}
lastKeyEvaluated = result.getLastEvaluatedKey();
//爲了控制頻率,需要將此處進行一個暫停,不然DynamoDB承受不住就會告警
Thread.sleep(40000);
} while (lastKeyEvaluated != null);
csvWriter.close();
}
}