【AWS】DynamoDB掃描操作獲取表全部數據

Amazon DynamoDB 是一種完全託管的 NoSQL 數據庫服務,提供快速且可預測的性能,同時還能夠實現無縫擴展。使用 DynamoDB,您可以免除操作和擴展分佈式數據庫的管理工作負擔,因而無需擔心硬件預置、設置和配置、複製、軟件修補或集羣擴展等問題。DynamoDB 還提供靜態加密,這消除了在保護敏感數據時涉及的操作負擔和複雜性。

背景

有時需要將DynamoDB中的數據完全拷貝下來。需要用到掃描操作。

Amazon DynamoDB 中的 Scan 操作讀取表或二級索引中的每個項目。默認情況下,Scan 操作返回表或索引中每個項目的全部數據屬性。但是,單個 Scan 請求最多可檢索 1 MB 數據。對於大表需要進行多次掃描操作。且需要注意的是讀取限制,如果超出限制,那麼就會告警。

我的表格結構如下

JAVA環境的配置 

(1)JDK和maven環境就不說了。

(2)將用戶的認證相關內容放入配置文件當中,也可以直接寫在代碼當中(後面解決方法二就是直接寫的)

(3)maven配置

因爲用到了寫入csv文件,因此引入了csv相關工具包。

<dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>com.amazonaws</groupId>
                <artifactId>aws-java-sdk-bom</artifactId>
                <version>1.11.327</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <dependencies>
        <dependency>
            <groupId>com.amazonaws</groupId>
            <artifactId>aws-java-sdk-dynamodb</artifactId>
        </dependency>
        <dependency>
            <groupId>net.sourceforge.javacsv</groupId>
            <artifactId>javacsv</artifactId>
            <version>2.0</version>
        </dependency>
    </dependencies>

解決方法(一)小容量讀取

import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.PropertiesCredentials;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
import com.amazonaws.services.dynamodbv2.model.ScanResult;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;


public class DynamoDBUtils {

    public static void main(String[] args) throws IOException {
        AWSCredentials credentials = new PropertiesCredentials(new File("src/main/resources/key.properties"));
        AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().
                withCredentials(new AWSStaticCredentialsProvider(credentials)).
                withRegion("us-east-1").build();

        ScanRequest scanRequest = new ScanRequest().withTableName("yucheng");
        ScanResult result = client.scan(scanRequest);
        for (Map<String, AttributeValue> item : result.getItems()) {
            System.out.println(item);
            String query = item.get("query").getS();
            System.out.println(query);
            List<AttributeValue> asin_list = item.get("asin_list").getL();
            for (AttributeValue value : asin_list) {
                System.out.print(value.getS() + " ");
            }
            System.out.println();
        }
    }
}

讀取結果如下。

 解決方法(二)大容量讀取


import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
import com.amazonaws.services.dynamodbv2.model.ScanResult;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;

import java.util.List;
import java.util.Map;

import com.csvreader.CsvWriter;


public class DynamoDBUtils {

    private static String region = "us-east-1";//替換成自己的
    private static String AWS_ACCESS_KEY_ID = "XXXXXX";//替換成自己的
    private static String AWS_SECRET_ACCESS_KEY = "XXXXXXXXXXX";//替換成自己的


    public static void main(String[] args) {
        try {
            f();
        } catch (InterruptedException | IOException e) {
            e.printStackTrace();
        }
    }


    public static void f() throws IOException, InterruptedException {
        String filePath = "XXXX.tsv";//替換成自己的
        AWSCredentials credentials = new BasicAWSCredentials(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY);
        AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(credentials)).withRegion(region).build();
        CsvWriter csvWriter = new CsvWriter(filePath, '\t', Charset.forName("UTF-8"));

        int count = 0;
        Map<String, AttributeValue> lastKeyEvaluated = null;
        do {
            count++;
            if (count % 10 == 0) {
                System.out.println(count);
            }
            ScanRequest scanRequest = new ScanRequest()
                    .withTableName("yucheng")
                    .withExclusiveStartKey(lastKeyEvaluated);

            ScanResult result = client.scan(scanRequest);
            for (Map<String, AttributeValue> item : result.getItems()) {
                String query = item.get("query").getS();

                List<AttributeValue> asinList = item.get("asin_list").getL();
                List<String> asins = new ArrayList<>();

                for (AttributeValue value : asinList) {
                    asins.add(value.getS());
                }

                String[] line = new String[2];
                line[0] = query;
                line[1] = String.join(",", asins);
                csvWriter.writeRecord(line);

            }
            lastKeyEvaluated = result.getLastEvaluatedKey();
            //爲了控制頻率,需要將此處進行一個暫停,不然DynamoDB承受不住就會告警
            Thread.sleep(40000);
        } while (lastKeyEvaluated != null);
        csvWriter.close();
    }

}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章