37.9d026956.js

(window.webpackJsonp=window.webpackJsonp||[]).push([[37],{465:function(e,t,a){"use strict";a.r(t);var n=a(56),i=Object(n.a)({},(function(){var e=this,t=e.$createElement,a=e._self._c||t;return a("ContentSlotsDistributor",{attrs:{"slot-key":e.$parent.slotKey}},[a("h1",{attrs:{id:"spring-batch-introduction"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#spring-batch-introduction"}},[e._v("#")]),e._v(" Spring Batch Introduction")]),e._v(" "),a("h2",{attrs:{id:"spring-batch-introduction-2"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#spring-batch-introduction-2"}},[e._v("#")]),e._v(" Spring Batch Introduction")]),e._v(" "),a("p",[e._v("Many applications within the enterprise domain require bulk processing to perform\nbusiness operations in mission critical environments. These business operations include:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Automated, complex processing of large volumes of information that is most efficiently\nprocessed without user interaction. These operations typically include time-based events\n(such as month-end calculations, notices, or correspondence).")])]),e._v(" "),a("li",[a("p",[e._v("Periodic application of complex business rules processed repetitively across very large\ndata sets (for example, insurance benefit determination or rate adjustments).")])]),e._v(" "),a("li",[a("p",[e._v("Integration of information that is received from internal and external systems that\ntypically requires formatting, validation, and processing in a transactional manner into\nthe system of record. Batch processing is used to process billions of transactions every\nday for enterprises.")])])]),e._v(" "),a("p",[e._v("Spring Batch is a lightweight, comprehensive batch framework designed to enable the\ndevelopment of robust batch applications vital for the daily operations of enterprise\nsystems. Spring Batch builds upon the characteristics of the Spring Framework that people\nhave come to expect (productivity, POJO-based development approach, and general ease of\nuse), while making it easy for developers to access and leverage more advance enterprise\nservices when necessary. Spring Batch is not a scheduling framework. There are many good\nenterprise schedulers (such as Quartz, Tivoli, Control-M, etc.) available in both the\ncommercial and open source spaces. It is intended to work in conjunction with a\nscheduler, not replace a scheduler.")]),e._v(" "),a("p",[e._v("Spring Batch provides reusable functions that are essential in processing large volumes\nof records, including logging/tracing, transaction management, job processing statistics,\njob restart, skip, and resource management. It also provides more advanced technical\nservices and features that enable extremely high-volume and high performance batch jobs\nthrough optimization and partitioning techniques. Spring Batch can be used in both simple\nuse cases (such as reading a file into a database or running a stored procedure) as well\nas complex, high volume use cases (such as moving high volumes of data between databases,\ntransforming it, and so on). High-volume batch jobs can leverage the framework in a\nhighly scalable manner to process significant volumes of information.")]),e._v(" "),a("h3",{attrs:{id:"background"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#background"}},[e._v("#")]),e._v(" Background")]),e._v(" "),a("p",[e._v("While open source software projects and associated communities have focused greater\nattention on web-based and microservices-based architecture frameworks, there has been a\nnotable lack of focus on reusable architecture frameworks to accommodate Java-based batch\nprocessing needs, despite continued needs to handle such processing within enterprise IT\nenvironments. The lack of a standard, reusable batch architecture has resulted in the\nproliferation of many one-off, in-house solutions developed within client enterprise IT\nfunctions.")]),e._v(" "),a("p",[e._v("SpringSource (now Pivotal) and Accenture collaborated to change this. Accenture’s\nhands-on industry and technical experience in implementing batch architectures,\nSpringSource’s depth of technical experience, and Spring’s proven programming model\ntogether made a natural and powerful partnership to create high-quality, market-relevant\nsoftware aimed at filling an important gap in enterprise Java. Both companies worked with\na number of clients who were solving similar problems by developing Spring-based batch\narchitecture solutions. This provided some useful additional detail and real-life\nconstraints that helped to ensure the solution can be applied to the real-world problems\nposed by clients.")]),e._v(" "),a("p",[e._v("Accenture contributed previously proprietary batch processing architecture frameworks to\nthe Spring Batch project, along with committer resources to drive support, enhancements,\nand the existing feature set. Accenture’s contribution was based upon decades of\nexperience in building batch architectures with the last several generations of\nplatforms: COBOL/Mainframe, C++/Unix, and now Java/anywhere.")]),e._v(" "),a("p",[e._v("The collaborative effort between Accenture and SpringSource aimed to promote the\nstandardization of software processing approaches, frameworks, and tools that can be\nconsistently leveraged by enterprise users when creating batch applications. Companies\nand government agencies desiring to deliver standard, proven solutions to their\nenterprise IT environments can benefit from Spring Batch.")]),e._v(" "),a("h3",{attrs:{id:"usage-scenarios"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#usage-scenarios"}},[e._v("#")]),e._v(" Usage Scenarios")]),e._v(" "),a("p",[e._v("A typical batch program generally:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Reads a large number of records from a database, file, or queue.")])]),e._v(" "),a("li",[a("p",[e._v("Processes the data in some fashion.")])]),e._v(" "),a("li",[a("p",[e._v("Writes back data in a modified form.")])])]),e._v(" "),a("p",[e._v("Spring Batch automates this basic batch iteration, providing the capability to process\nsimilar transactions as a set, typically in an offline environment without any user\ninteraction. Batch jobs are part of most IT projects, and Spring Batch is the only open\nsource framework that provides a robust, enterprise-scale solution.")]),e._v(" "),a("p",[e._v("Business Scenarios")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Commit batch process periodically")])]),e._v(" "),a("li",[a("p",[e._v("Concurrent batch processing: parallel processing of a job")])]),e._v(" "),a("li",[a("p",[e._v("Staged, enterprise message-driven processing")])]),e._v(" "),a("li",[a("p",[e._v("Massively parallel batch processing")])]),e._v(" "),a("li",[a("p",[e._v("Manual or scheduled restart after failure")])]),e._v(" "),a("li",[a("p",[e._v("Sequential processing of dependent steps (with extensions to workflow-driven batches)")])]),e._v(" "),a("li",[a("p",[e._v("Partial processing: skip records (for example, on rollback)")])]),e._v(" "),a("li",[a("p",[e._v("Whole-batch transaction, for cases with a small batch size or existing stored\nprocedures/scripts")])])]),e._v(" "),a("p",[e._v("Technical Objectives")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Batch developers use the Spring programming model: Concentrate on business logic and\nlet the framework take care of infrastructure.")])]),e._v(" "),a("li",[a("p",[e._v("Clear separation of concerns between the infrastructure, the batch execution\nenvironment, and the batch application.")])]),e._v(" "),a("li",[a("p",[e._v("Provide common, core execution services as interfaces that all projects can implement.")])]),e._v(" "),a("li",[a("p",[e._v("Provide simple and default implementations of the core execution interfaces that can be\nused 'out of the box'.")])]),e._v(" "),a("li",[a("p",[e._v("Easy to configure, customize, and extend services, by leveraging the spring framework\nin all layers.")])]),e._v(" "),a("li",[a("p",[e._v("All existing core services should be easy to replace or extend, without any impact to\nthe infrastructure layer.")])]),e._v(" "),a("li",[a("p",[e._v("Provide a simple deployment model, with the architecture JARs completely separate from\nthe application, built using Maven.")])])]),e._v(" "),a("h3",{attrs:{id:"spring-batch-architecture"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#spring-batch-architecture"}},[e._v("#")]),e._v(" Spring Batch Architecture")]),e._v(" "),a("p",[e._v("Spring Batch is designed with extensibility and a diverse group of end users in mind. The\nfigure below shows the layered architecture that supports the extensibility and ease of\nuse for end-user developers.")]),e._v(" "),a("p",[a("img",{attrs:{src:"https://docs.spring.io/spring-batch/docs/current/reference/html/images/spring-batch-layers.png",alt:"Figure 1.1: Spring Batch Layered Architecture"}})]),e._v(" "),a("p",[e._v("Figure 1. Spring Batch Layered Architecture")]),e._v(" "),a("p",[e._v("This layered architecture highlights three major high-level components: Application,\nCore, and Infrastructure. The application contains all batch jobs and custom code written\nby developers using Spring Batch. The Batch Core contains the core runtime classes\nnecessary to launch and control a batch job. It includes implementations for"),a("code",[e._v("JobLauncher")]),e._v(", "),a("code",[e._v("Job")]),e._v(", and "),a("code",[e._v("Step")]),e._v(". Both Application and Core are built on top of a common\ninfrastructure. This infrastructure contains common readers and writers and services\n(such as the "),a("code",[e._v("RetryTemplate")]),e._v("), which are used both by application developers(readers and\nwriters, such as "),a("code",[e._v("ItemReader")]),e._v(" and "),a("code",[e._v("ItemWriter")]),e._v(") and the core framework itself (retry,\nwhich is its own library).")]),e._v(" "),a("h3",{attrs:{id:"general-batch-principles-and-guidelines"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#general-batch-principles-and-guidelines"}},[e._v("#")]),e._v(" General Batch Principles and Guidelines")]),e._v(" "),a("p",[e._v("The following key principles, guidelines, and general considerations should be considered\nwhen building a batch solution.")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Remember that a batch architecture typically affects on-line architecture and vice\nversa. Design with both architectures and environments in mind using common building\nblocks when possible.")])]),e._v(" "),a("li",[a("p",[e._v("Simplify as much as possible and avoid building complex logical structures in single\nbatch applications.")])]),e._v(" "),a("li",[a("p",[e._v("Keep the processing and storage of data physically close together (in other words, keep\nyour data where your processing occurs).")])]),e._v(" "),a("li",[a("p",[e._v("Minimize system resource use, especially I/O. Perform as many operations as possible in\ninternal memory.")])]),e._v(" "),a("li",[a("p",[e._v("Review application I/O (analyze SQL statements) to ensure that unnecessary physical I/O\nis avoided. In particular, the following four common flaws need to be looked for:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Reading data for every transaction when the data could be read once and cached or kept\nin the working storage.")])]),e._v(" "),a("li",[a("p",[e._v("Rereading data for a transaction where the data was read earlier in the same\ntransaction.")])]),e._v(" "),a("li",[a("p",[e._v("Causing unnecessary table or index scans.")])]),e._v(" "),a("li",[a("p",[e._v("Not specifying key values in the WHERE clause of an SQL statement.")])])])]),e._v(" "),a("li",[a("p",[e._v("Do not do things twice in a batch run. For instance, if you need data summarization for\nreporting purposes, you should (if possible) increment stored totals when data is being\ninitially processed, so your reporting application does not have to reprocess the same\ndata.")])]),e._v(" "),a("li",[a("p",[e._v("Allocate enough memory at the beginning of a batch application to avoid time-consuming\nreallocation during the process.")])]),e._v(" "),a("li",[a("p",[e._v("Always assume the worst with regard to data integrity. Insert adequate checks and\nrecord validation to maintain data integrity.")])]),e._v(" "),a("li",[a("p",[e._v("Implement checksums for internal validation where possible. For example, flat files\nshould have a trailer record telling the total of records in the file and an aggregate of\nthe key fields.")])]),e._v(" "),a("li",[a("p",[e._v("Plan and execute stress tests as early as possible in a production-like environment\nwith realistic data volumes.")])]),e._v(" "),a("li",[a("p",[e._v("In large batch systems, backups can be challenging, especially if the system is running\nconcurrent with on-line on a 24-7 basis. Database backups are typically well taken care\nof in the on-line design, but file backups should be considered to be just as important.\nIf the system depends on flat files, file backup procedures should not only be in place\nand documented but be regularly tested as well.")])])]),e._v(" "),a("h3",{attrs:{id:"batch-processing-strategies"}},[a("a",{staticClass:"header-anchor",attrs:{href:"#batch-processing-strategies"}},[e._v("#")]),e._v(" Batch Processing Strategies")]),e._v(" "),a("p",[e._v("To help design and implement batch systems, basic batch application building blocks and\npatterns should be provided to the designers and programmers in the form of sample\nstructure charts and code shells. When starting to design a batch job, the business logic\nshould be decomposed into a series of steps that can be implemented using the following\nstandard building blocks:")]),e._v(" "),a("ul",[a("li",[a("p",[a("em",[e._v("Conversion Applications:")]),e._v(" For each type of file supplied by or generated to an\nexternal system, a conversion application must be created to convert the transaction\nrecords supplied into a standard format required for processing. This type of batch\napplication can partly or entirely consist of translation utility modules (see Basic\nBatch Services).")])]),e._v(" "),a("li",[a("p",[a("em",[e._v("Validation Applications:")]),e._v(" Validation applications ensure that all input/output\nrecords are correct and consistent. Validation is typically based on file headers and\ntrailers, checksums and validation algorithms, and record level cross-checks.")])]),e._v(" "),a("li",[a("p",[a("em",[e._v("Extract Applications:")]),e._v(" An application that reads a set of records from a database or\ninput file, selects records based on predefined rules, and writes the records to an\noutput file.")])]),e._v(" "),a("li",[a("p",[a("em",[e._v("Extract/Update Applications:")]),e._v(" An application that reads records from a database or\nan input file and makes changes to a database or an output file driven by the data found\nin each input record.")])]),e._v(" "),a("li",[a("p",[a("em",[e._v("Processing and Updating Applications:")]),e._v(" An application that performs processing on\ninput transactions from an extract or a validation application. The processing usually\ninvolves reading a database to obtain data required for processing, potentially updating\nthe database and creating records for output processing.")])]),e._v(" "),a("li",[a("p",[a("em",[e._v("Output/Format Applications:")]),e._v(" Applications that read an input file, restructure data\nfrom this record according to a standard format, and produce an output file for printing\nor transmission to another program or system.")])])]),e._v(" "),a("p",[e._v("Additionally, a basic application shell should be provided for business logic that cannot\nbe built using the previously mentioned building blocks.")]),e._v(" "),a("p",[e._v("In addition to the main building blocks, each application may use one or more of standard\nutility steps, such as:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Sort: A program that reads an input file and produces an output file where records\nhave been re-sequenced according to a sort key field in the records. Sorts are usually\nperformed by standard system utilities.")])]),e._v(" "),a("li",[a("p",[e._v("Split: A program that reads a single input file and writes each record to one of\nseveral output files based on a field value. Splits can be tailored or performed by\nparameter-driven standard system utilities.")])]),e._v(" "),a("li",[a("p",[e._v("Merge: A program that reads records from multiple input files and produces one output\nfile with combined data from the input files. Merges can be tailored or performed by\nparameter-driven standard system utilities.")])])]),e._v(" "),a("p",[e._v("Batch applications can additionally be categorized by their input source:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Database-driven applications are driven by rows or values retrieved from the database.")])]),e._v(" "),a("li",[a("p",[e._v("File-driven applications are driven by records or values retrieved from a file.")])]),e._v(" "),a("li",[a("p",[e._v("Message-driven applications are driven by messages retrieved from a message queue.")])])]),e._v(" "),a("p",[e._v("The foundation of any batch system is the processing strategy. Factors affecting the\nselection of the strategy include: estimated batch system volume, concurrency with\non-line systems or with other batch systems, available batch windows. (Note that, with\nmore enterprises wanting to be up and running 24x7, clear batch windows are\ndisappearing).")]),e._v(" "),a("p",[e._v("Typical processing options for batch are (in increasing order of implementation\ncomplexity):")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Normal processing during a batch window in off-line mode.")])]),e._v(" "),a("li",[a("p",[e._v("Concurrent batch or on-line processing.")])]),e._v(" "),a("li",[a("p",[e._v("Parallel processing of many different batch runs or jobs at the same time.")])]),e._v(" "),a("li",[a("p",[e._v("Partitioning (processing of many instances of the same job at the same time).")])]),e._v(" "),a("li",[a("p",[e._v("A combination of the preceding options.")])])]),e._v(" "),a("p",[e._v("Some or all of these options may be supported by a commercial scheduler.")]),e._v(" "),a("p",[e._v("The following section discusses these processing options in more detail. It is important\nto notice that, as a rule of thumb, the commit and locking strategy adopted by batch\nprocesses depends on the type of processing performed and that the on-line locking\nstrategy should also use the same principles. Therefore, the batch architecture cannot be\nsimply an afterthought when designing an overall architecture.")]),e._v(" "),a("p",[e._v("The locking strategy can be to use only normal database locks or to implement an\nadditional custom locking service in the architecture. The locking service would track\ndatabase locking (for example, by storing the necessary information in a dedicated\ndb-table) and give or deny permissions to the application programs requesting a db\noperation. Retry logic could also be implemented by this architecture to avoid aborting a\nbatch job in case of a lock situation.")]),e._v(" "),a("p",[a("strong",[e._v("1. Normal processing in a batch window")]),e._v(" For simple batch processes running in a separate\nbatch window where the data being updated is not required by on-line users or other batch\nprocesses, concurrency is not an issue and a single commit can be done at the end of the\nbatch run.")]),e._v(" "),a("p",[e._v("In most cases, a more robust approach is more appropriate. Keep in mind that batch\nsystems have a tendency to grow as time goes by, both in terms of complexity and the data\nvolumes they handle. If no locking strategy is in place and the system still relies on a\nsingle commit point, modifying the batch programs can be painful. Therefore, even with\nthe simplest batch systems, consider the need for commit logic for restart-recovery\noptions as well as the information concerning the more complex cases described later in\nthis section.")]),e._v(" "),a("p",[a("strong",[e._v("2. Concurrent batch or on-line processing")]),e._v(" Batch applications processing data that can\nbe simultaneously updated by on-line users should not lock any data (either in the\ndatabase or in files) which could be required by on-line users for more than a few\nseconds. Also, updates should be committed to the database at the end of every few\ntransactions. This minimizes the portion of data that is unavailable to other processes\nand the elapsed time the data is unavailable.")]),e._v(" "),a("p",[e._v("Another option to minimize physical locking is to have logical row-level locking\nimplemented with either an Optimistic Locking Pattern or a Pessimistic Locking Pattern.")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Optimistic locking assumes a low likelihood of record contention. It typically means\ninserting a timestamp column in each database table used concurrently by both batch and\non-line processing. When an application fetches a row for processing, it also fetches the\ntimestamp. As the application then tries to update the processed row, the update uses the\noriginal timestamp in the WHERE clause. If the timestamp matches, the data and the\ntimestamp are updated. If the timestamp does not match, this indicates that another\napplication has updated the same row between the fetch and the update attempt. Therefore,\nthe update cannot be performed.")])]),e._v(" "),a("li",[a("p",[e._v("Pessimistic locking is any locking strategy that assumes there is a high likelihood of\nrecord contention and therefore either a physical or logical lock needs to be obtained at\nretrieval time. One type of pessimistic logical locking uses a dedicated lock-column in\nthe database table. When an application retrieves the row for update, it sets a flag in\nthe lock column. With the flag in place, other applications attempting to retrieve the\nsame row logically fail. When the application that sets the flag updates the row, it also\nclears the flag, enabling the row to be retrieved by other applications. Please note that\nthe integrity of data must be maintained also between the initial fetch and the setting\nof the flag, for example by using db locks (such as "),a("code",[e._v("SELECT FOR UPDATE")]),e._v("). Note also that\nthis method suffers from the same downside as physical locking except that it is somewhat\neasier to manage building a time-out mechanism that gets the lock released if the user\ngoes to lunch while the record is locked.")])])]),e._v(" "),a("p",[e._v("These patterns are not necessarily suitable for batch processing, but they might be used\nfor concurrent batch and on-line processing (such as in cases where the database does not\nsupport row-level locking). As a general rule, optimistic locking is more suitable for\non-line applications, while pessimistic locking is more suitable for batch applications.\nWhenever logical locking is used, the same scheme must be used for all applications\naccessing data entities protected by logical locks.")]),e._v(" "),a("p",[e._v("Note that both of these solutions only address locking a single record. Often, we may\nneed to lock a logically related group of records. With physical locks, you have to\nmanage these very carefully in order to avoid potential deadlocks. With logical locks, it\nis usually best to build a logical lock manager that understands the logical record\ngroups you want to protect and that can ensure that locks are coherent and\nnon-deadlocking. This logical lock manager usually uses its own tables for lock\nmanagement, contention reporting, time-out mechanism, and other concerns.")]),e._v(" "),a("p",[a("strong",[e._v("3. Parallel Processing")]),e._v(" Parallel processing allows multiple batch runs or jobs to run in\nparallel to minimize the total elapsed batch processing time. This is not a problem as\nlong as the jobs are not sharing the same files, db-tables, or index spaces. If they do,\nthis service should be implemented using partitioned data. Another option is to build an\narchitecture module for maintaining interdependencies by using a control table. A control\ntable should contain a row for each shared resource and whether it is in use by an\napplication or not. The batch architecture or the application in a parallel job would\nthen retrieve information from that table to determine if it can get access to the\nresource it needs or not.")]),e._v(" "),a("p",[e._v("If the data access is not a problem, parallel processing can be implemented through the\nuse of additional threads to process in parallel. In the mainframe environment, parallel\njob classes have traditionally been used, in order to ensure adequate CPU time for all\nthe processes. Regardless, the solution has to be robust enough to ensure time slices for\nall the running processes.")]),e._v(" "),a("p",[e._v("Other key issues in parallel processing include load balancing and the availability of\ngeneral system resources such as files, database buffer pools, and so on. Also note that\nthe control table itself can easily become a critical resource.")]),e._v(" "),a("p",[a("strong",[e._v("4. Partitioning")]),e._v(" Using partitioning allows multiple versions of large batch applications\nto run concurrently. The purpose of this is to reduce the elapsed time required to\nprocess long batch jobs. Processes that can be successfully partitioned are those where\nthe input file can be split and/or the main database tables partitioned to allow the\napplication to run against different sets of data.")]),e._v(" "),a("p",[e._v("In addition, processes which are partitioned must be designed to only process their\nassigned data set. A partitioning architecture has to be closely tied to the database\ndesign and the database partitioning strategy. Note that database partitioning does not\nnecessarily mean physical partitioning of the database, although in most cases this is\nadvisable. The following picture illustrates the partitioning approach:")]),e._v(" "),a("p",[a("img",{attrs:{src:"https://docs.spring.io/spring-batch/docs/current/reference/html/images/partitioned.png",alt:"Figure 1.2: Partitioned Process"}})]),e._v(" "),a("p",[e._v("Figure 2. Partitioned Process")]),e._v(" "),a("p",[e._v("The architecture should be flexible enough to allow dynamic configuration of the number\nof partitions. Both automatic and user controlled configuration should be considered.\nAutomatic configuration may be based on parameters such as the input file size and the\nnumber of input records.")]),e._v(" "),a("p",[a("strong",[e._v("4.1 Partitioning Approaches")]),e._v(" Selecting a partitioning approach has to be done on a\ncase-by-case basis. The following list describes some of the possible partitioning\napproaches:")]),e._v(" "),a("p",[a("em",[e._v("1. Fixed and Even Break-Up of Record Set")])]),e._v(" "),a("p",[e._v("This involves breaking the input record set into an even number of portions (for example,\n10, where each portion has exactly 1/10th of the entire record set). Each portion is then\nprocessed by one instance of the batch/extract application.")]),e._v(" "),a("p",[e._v("In order to use this approach, preprocessing is required to split the record set up. The\nresult of this split will be a lower and upper bound placement number which can be used\nas input to the batch/extract application in order to restrict its processing to only its\nportion.")]),e._v(" "),a("p",[e._v("Preprocessing could be a large overhead, as it has to calculate and determine the bounds\nof each portion of the record set.")]),e._v(" "),a("p",[a("em",[e._v("2. Break up by a Key Column")])]),e._v(" "),a("p",[e._v("This involves breaking up the input record set by a key column, such as a location code,\nand assigning data from each key to a batch instance. In order to achieve this, column\nvalues can be either:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Assigned to a batch instance by a partitioning table (described later in this\nsection).")])]),e._v(" "),a("li",[a("p",[e._v("Assigned to a batch instance by a portion of the value (such as 0000-0999, 1000 - 1999,\nand so on).")])])]),e._v(" "),a("p",[e._v("Under option 1, adding new values means a manual reconfiguration of the batch/extract to\nensure that the new value is added to a particular instance.")]),e._v(" "),a("p",[e._v("Under option 2, this ensures that all values are covered via an instance of the batch\njob. However, the number of values processed by one instance is dependent on the\ndistribution of column values (there may be a large number of locations in the 0000-0999\nrange, and few in the 1000-1999 range). Under this option, the data range should be\ndesigned with partitioning in mind.")]),e._v(" "),a("p",[e._v("Under both options, the optimal even distribution of records to batch instances cannot be\nrealized. There is no dynamic configuration of the number of batch instances used.")]),e._v(" "),a("p",[a("em",[e._v("3. Breakup by Views")])]),e._v(" "),a("p",[e._v("This approach is basically breakup by a key column but on the database level. It involves\nbreaking up the record set into views. These views are used by each instance of the batch\napplication during its processing. The breakup is done by grouping the data.")]),e._v(" "),a("p",[e._v("With this option, each instance of a batch application has to be configured to hit a\nparticular view (instead of the master table). Also, with the addition of new data\nvalues, this new group of data has to be included into a view. There is no dynamic\nconfiguration capability, as a change in the number of instances results in a change to\nthe views.")]),e._v(" "),a("p",[a("em",[e._v("4. Addition of a Processing Indicator")])]),e._v(" "),a("p",[e._v("This involves the addition of a new column to the input table, which acts as an\nindicator. As a preprocessing step, all indicators are marked as being non-processed.\nDuring the record fetch stage of the batch application, records are read on the condition\nthat that record is marked as being non-processed, and once they are read (with lock),\nthey are marked as being in processing. When that record is completed, the indicator is\nupdated to either complete or error. Many instances of a batch application can be started\nwithout a change, as the additional column ensures that a record is only processed once.")]),e._v(" "),a("p",[e._v("With this option, I/O on the table increases dynamically. In the case of an updating\nbatch application, this impact is reduced, as a write must occur anyway.")]),e._v(" "),a("p",[a("em",[e._v("5. Extract Table to a Flat File")])]),e._v(" "),a("p",[e._v("This involves the extraction of the table into a file. This file can then be split into\nmultiple segments and used as input to the batch instances.")]),e._v(" "),a("p",[e._v("With this option, the additional overhead of extracting the table into a file and\nsplitting it may cancel out the effect of multi-partitioning. Dynamic configuration can\nbe achieved by changing the file splitting script.")]),e._v(" "),a("p",[a("em",[e._v("6. Use of a Hashing Column")])]),e._v(" "),a("p",[e._v("This scheme involves the addition of a hash column (key/index) to the database tables\nused to retrieve the driver record. This hash column has an indicator to determine which\ninstance of the batch application processes this particular row. For example, if there\nare three batch instances to be started, then an indicator of 'A' marks a row for\nprocessing by instance 1, an indicator of 'B' marks a row for processing by instance 2,\nand an indicator of 'C' marks a row for processing by instance 3.")]),e._v(" "),a("p",[e._v("The procedure used to retrieve the records would then have an additional "),a("code",[e._v("WHERE")]),e._v(" clause\nto select all rows marked by a particular indicator. The inserts in this table would\ninvolve the addition of the marker field, which would be defaulted to one of the\ninstances (such as 'A').")]),e._v(" "),a("p",[e._v("A simple batch application would be used to update the indicators, such as to\nredistribute the load between the different instances. When a sufficiently large number\nof new rows have been added, this batch can be run (anytime, except in the batch window)\nto redistribute the new rows to other instances.")]),e._v(" "),a("p",[e._v("Additional instances of the batch application only require the running of the batch\napplication as described in the preceding paragraphs to redistribute the indicators to\nwork with a new number of instances.")]),e._v(" "),a("p",[a("strong",[e._v("4.2 Database and Application Design Principles")])]),e._v(" "),a("p",[e._v("An architecture that supports multi-partitioned applications which run against\npartitioned database tables using the key column approach should include a central\npartition repository for storing partition parameters. This provides flexibility and\nensures maintainability. The repository generally consists of a single table, known as\nthe partition table.")]),e._v(" "),a("p",[e._v("Information stored in the partition table is static and, in general, should be maintained\nby the DBA. The table should consist of one row of information for each partition of a\nmulti-partitioned application. The table should have columns for Program ID Code,\nPartition Number (logical ID of the partition), Low Value of the db key column for this\npartition, and High Value of the db key column for this partition.")]),e._v(" "),a("p",[e._v("On program start-up, the program "),a("code",[e._v("id")]),e._v(" and partition number should be passed to the\napplication from the architecture (specifically, from the Control Processing Tasklet). If\na key column approach is used, these variables are used to read the partition table in\norder to determine what range of data the application is to process. In addition the\npartition number must be used throughout the processing to:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Add to the output files/database updates in order for the merge process to work\nproperly.")])]),e._v(" "),a("li",[a("p",[e._v("Report normal processing to the batch log and any errors to the architecture error\nhandler.")])])]),e._v(" "),a("p",[a("strong",[e._v("4.3 Minimizing Deadlocks")])]),e._v(" "),a("p",[e._v("When applications run in parallel or are partitioned, contention in database resources\nand deadlocks may occur. It is critical that the database design team eliminates\npotential contention situations as much as possible as part of the database design.")]),e._v(" "),a("p",[e._v("Also, the developers must ensure that the database index tables are designed with\ndeadlock prevention and performance in mind.")]),e._v(" "),a("p",[e._v("Deadlocks or hot spots often occur in administration or architecture tables, such as log\ntables, control tables, and lock tables. The implications of these should be taken into\naccount as well. A realistic stress test is crucial for identifying the possible\nbottlenecks in the architecture.")]),e._v(" "),a("p",[e._v("To minimize the impact of conflicts on data, the architecture should provide services\nsuch as wait-and-retry intervals when attaching to a database or when encountering a\ndeadlock. This means a built-in mechanism to react to certain database return codes and,\ninstead of issuing an immediate error, waiting a predetermined amount of time and\nretrying the database operation.")]),e._v(" "),a("p",[a("strong",[e._v("4.4 Parameter Passing and Validation")])]),e._v(" "),a("p",[e._v("The partition architecture should be relatively transparent to application developers.\nThe architecture should perform all tasks associated with running the application in a\npartitioned mode, including:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Retrieving partition parameters before application start-up.")])]),e._v(" "),a("li",[a("p",[e._v("Validating partition parameters before application start-up.")])]),e._v(" "),a("li",[a("p",[e._v("Passing parameters to the application at start-up.")])])]),e._v(" "),a("p",[e._v("The validation should include checks to ensure that:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("The application has sufficient partitions to cover the whole data range.")])]),e._v(" "),a("li",[a("p",[e._v("There are no gaps between partitions.")])])]),e._v(" "),a("p",[e._v("If the database is partitioned, some additional validation may be necessary to ensure\nthat a single partition does not span database partitions.")]),e._v(" "),a("p",[e._v("Also, the architecture should take into consideration the consolidation of partitions.\nKey questions include:")]),e._v(" "),a("ul",[a("li",[a("p",[e._v("Must all the partitions be finished before going into the next job step?")])]),e._v(" "),a("li",[a("p",[e._v("What happens if one of the partitions aborts?")])])])])}),[],!1,null,null,null);t.default=i.exports}}]);